Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/illumina samplesheet v2 #77

Merged
merged 20 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clarity_epp/export/bioanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def samplesheet(lims, process_id, output_file):
}

# Get sample placement
for placement, artifact in process.output_containers()[0].placements.iteritems():
for placement, artifact in process.output_containers()[0].placements.items():
placement = ''.join(placement.split(':'))
plate[placement]['name'] = artifact.name
plate[placement]['comment'] = ''
Expand Down
4 changes: 2 additions & 2 deletions clarity_epp/export/hamilton.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def samplesheet_filling_out(lims, process_id, output_file):
process = Process(lims, id=process_id)
well_plate = {}

for placement, artifact in process.output_containers()[0].placements.iteritems():
for placement, artifact in process.output_containers()[0].placements.items():
placement = ''.join(placement.split(':'))
well_plate[placement] = artifact.samples[0].udf['Dx Fractienummer']

Expand All @@ -29,7 +29,7 @@ def samplesheet_purify(lims, process_id, output_file):
parent_process_barcode = process.parent_processes()[0].output_containers()[0].name
well_plate = {}

for placement, artifact in process.output_containers()[0].placements.iteritems():
for placement, artifact in process.output_containers()[0].placements.items():
placement = ''.join(placement.split(':'))
well_plate[placement] = artifact.samples[0].udf['Dx Fractienummer']

Expand Down
130 changes: 67 additions & 63 deletions clarity_epp/export/illumina.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@


def get_project(projects, urgent=False):
"""Get a project name for sample."""
"""Get a project name from projects dict ({'project_name': sample_count, ...})
If urgent is True, return the first project with < 9 samples, else return the project with the least amount of samples.
"""
if urgent: # Sort projects for urgent samples on name
projects_sorted = sorted(projects.items(), key=operator.itemgetter(0))
for project in projects_sorted:
Expand All @@ -22,7 +24,7 @@ def get_project(projects, urgent=False):
return projects_sorted[0][0] # return project with least amount of samples.


def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_orientation):
def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_conversion_orientation):
"""Get override cycles per sample."""
read_cycles = ['', '']
index_cycles = ['', '']
Expand All @@ -38,7 +40,7 @@ def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_ori
if index_len[idx]:
if index_len[idx] < max_index_len[idx]:
n_bases = max_index_len[idx] - index_len[idx]
if idx == 1 and index_2_orientation == 'F': # Index 2 in forward orientation (NovaSeq X Plus)
if idx == 1 and index_2_conversion_orientation == 'F': # Index 2 in forward orientation (NovaSeq X Plus)
index_cycle = f'N{n_bases}I{index_len[idx]}'
else:
index_cycle = f'I{index_len[idx]}N{n_bases}'
Expand All @@ -58,7 +60,7 @@ def get_override_cycles(read_len, umi_len, index_len, max_index_len, index_2_ori
return override_cycles


def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
def get_samplesheet_samples(sample_artifacts, process, index_2_conversion_orientation):
families = {}
samplesheet_samples = {}

Expand All @@ -72,21 +74,21 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
for sample in sample_artifact.samples:
# Dx production sample
if (
'Dx Familienummer' in list(sample.udf) and
'Dx NICU Spoed' in list(sample.udf) and
'Dx Protocolomschrijving' in list(sample.udf) and
'Dx Stoftest code' in list(sample.udf)
'Dx Familienummer' in sample.udf and
'Dx NICU Spoed' in sample.udf and
'Dx Protocolomschrijving' in sample.udf and
'Dx Stoftest code' in sample.udf
):
# Skip Mengfractie samples
if sample.udf['Dx Stoftest code'] == config.stoftestcode_wes_duplo:
continue

# Get sample conversion_settings
sample_conversion_setting = config.conversion_settings['default']
# Get sample conversion settings
sample_conversion_setting = config.sample_conversion_settings['default']
newest_protocol = sample.udf['Dx Protocolomschrijving'].split(';')[0]
for protocol_code in config.conversion_settings:
if protocol_code in newest_protocol:
sample_conversion_setting = config.conversion_settings[protocol_code]
for protocol_code in config.sample_conversion_settings:
if protocol_code in newest_protocol: # Look for protocol code (elid number) in newest protocol
sample_conversion_setting = config.sample_conversion_settings[protocol_code]
break

# Get sample override cycles
Expand All @@ -95,7 +97,7 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
umi_len=sample_conversion_setting['umi_len'],
index_len=[len(sample_index[0]), len(sample_index[1])],
max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
index_2_orientation=index_2_orientation
index_2_conversion_orientation=index_2_conversion_orientation
)

# Set family and create if not exist
Expand Down Expand Up @@ -156,10 +158,10 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
else:
sample_override_cycles = get_override_cycles(
read_len=[process.udf['Read 1 Cycles'], process.udf['Read 2 Cycles']],
umi_len=config.conversion_settings['default']['umi_len'],
umi_len=config.sample_conversion_settings['default']['umi_len'],
index_len=[len(sample_index[0]), len(sample_index[1])],
max_index_len=[process.udf['Index Read 1'], process.udf['Index Read 2']],
index_2_orientation=index_2_orientation
index_2_conversion_orientation=index_2_conversion_orientation
)

# Add sample to samplesheet_samples
Expand All @@ -168,7 +170,7 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
'index_2': sample_index[1],
'override_cycles': sample_override_cycles,
}
if index_2_orientation == 'RC': # Reverse complement index 2
if index_2_conversion_orientation == 'RC': # Reverse complement index 2
samplesheet_samples[sample_sequence_name]['index_2'] = reverse_complement(
samplesheet_samples[sample_sequence_name]['index_2']
)
Expand Down Expand Up @@ -227,65 +229,67 @@ def get_samplesheet_samples(sample_artifacts, process, index_2_orientation):
def create_samplesheet(lims, process_id, output_file):
"""Create illumina samplesheet v2."""
process = Process(lims, id=process_id)
index_2_orientation = config.index_2_orientation[process.type.name]
sequencer_conversion_settings = config.sequencer_conversion_settings[process.type.name]

# Get samples samples per lane
samplesheet_samples = []
for lane in process.analytes()[0]:
sample_artifacts = get_sample_artifacts_from_pool(lims, process.analytes()[0][0])
samplesheet_samples.append(get_samplesheet_samples(sample_artifacts, process, index_2_orientation))
samplesheet_samples.append(
get_samplesheet_samples(
sample_artifacts, process, sequencer_conversion_settings['index_2_conversion_orientation']
)
)

# Create SampleSheet
sample_sheet = []

# Header
sample_sheet.append('[Header]')
sample_sheet.append('FileFormatVersion,2')
sample_sheet.append('RunName,{0}'.format(process.udf['Experiment Name']))

# Reads
sample_sheet.append('[Reads]')
sample_sheet.append('Read1Cycles,{0}'.format(process.udf['Read 1 Cycles']))
sample_sheet.append('Read2Cycles,{0}'.format(process.udf['Read 2 Cycles']))
sample_sheet.append('Index1Cycles,{0}'.format(process.udf['Index Read 1']))
sample_sheet.append('Index2Cycles,{0}'.format(process.udf['Index Read 2']))

# BCLConvert_Settings
sample_sheet.append('[BCLConvert_Settings]')
sample_sheet.append('AdapterRead1,{0}'.format(process.udf['Adapter']))
sample_sheet.append('AdapterRead2,{0}'.format(process.udf['Adapter Read 2']))
sample_sheet.append('FindAdaptersWithIndels,true')
sample_sheet.append('BarcodeMismatchesIndex1,0')
sample_sheet.append('BarcodeMismatchesIndex2,0')

# BCLConvert_Data
sample_sheet.append('[BCLConvert_Data]')
sample_sheet = [
# Header
"[Header]",
"FileFormatVersion,2",
f"InstrumentPlatform,{sequencer_conversion_settings['instrument_platform']}",
f"IndexOrientation,{sequencer_conversion_settings['index_orientation']}",
f"RunName,{process.udf['Experiment Name']}",
# Reads
"[Reads]",
f"Read1Cycles,{process.udf['Read 1 Cycles']}",
f"Read2Cycles,{process.udf['Read 2 Cycles']}",
f"Index1Cycles,{process.udf['Index Read 1']}",
f"Index2Cycles,{process.udf['Index Read 2']}",
# BCLConvert_Settings
"[BCLConvert_Settings]",
f"SoftwareVersion,{sequencer_conversion_settings['software_version']}",
f"FastqCompressionFormat,{sequencer_conversion_settings['fastq_compression_format']}",
f"AdapterRead1,{process.udf['Adapter']}",
f"AdapterRead2,{process.udf['Adapter Read 2']}",
"FindAdaptersWithIndels,TRUE",
"BarcodeMismatchesIndex1,0",
"BarcodeMismatchesIndex2,0",
# BCLConvert_Data
"[BCLConvert_Data]"
]

# Set header for single or multiple lanes conversion
bcl_convert_data_header = "Sample_ID,index,index2,OverrideCycles,Sample_Project"
if len(samplesheet_samples) == 1: # All samples on all lanes
lane = 0
sample_sheet.append('Sample_ID,index,index2,OverrideCycles,Sample_Project')
for sample in samplesheet_samples[lane]:
sample_sheet.append(
'{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
multiple_lanes = False
else:
multiple_lanes = True
bcl_convert_data_header = f"Lane,{bcl_convert_data_header}" # Add lane column to header if multiple lanes conversion
sample_sheet.append(bcl_convert_data_header)

# Add samples to SampleSheet
for lane, lane_samples in enumerate(samplesheet_samples):
for sample in lane_samples:
bcl_convert_data_row = "{sample_name},{index_1},{index_2},{override_cycles},{project}".format(
sample_name=sample,
index_1=samplesheet_samples[lane][sample]['index_1'],
index_2=samplesheet_samples[lane][sample]['index_2'],
override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
project=samplesheet_samples[lane][sample]['project']
)
)
else: # Samples divided over lanes
sample_sheet.append('Lane,Sample_ID,index,index2,OverrideCycles,Sample_Project')
for lane, lane_samples in enumerate(samplesheet_samples):
for sample in lane_samples:
sample_sheet.append(
'{lane},{sample_name},{index_1},{index_2},{override_cycles},{project}'.format(
lane=lane+1,
sample_name=sample,
index_1=samplesheet_samples[lane][sample]['index_1'],
index_2=samplesheet_samples[lane][sample]['index_2'],
override_cycles=samplesheet_samples[lane][sample]['override_cycles'],
project=samplesheet_samples[lane][sample]['project']
)
)
if multiple_lanes: # Add lane number to row if multiple lanes conversion
bcl_convert_data_row = f"{lane+1},{bcl_convert_data_row}"
sample_sheet.append(bcl_convert_data_row)

# Write SampleSheet to file
output_file.write('\n'.join(sample_sheet))
6 changes: 4 additions & 2 deletions clarity_epp/export/manual_pipetting.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,8 @@ def samplesheet_multiplex_sequence_pool(lims, process_id, output_file):
# print header
output_file.write('Naam\tuL\n')

print(total_sample_count)
print(input_pools)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Print statement necessary?

# Last calcuations and print sample
for input_pool in input_pools:
input_pool_load_pM = (float(process.udf['Dx Laadconcentratie (pM)'])/total_sample_count) * input_pool['sample_count']
Expand Down Expand Up @@ -681,9 +683,9 @@ def samplesheet_pool_samples(lims, process_id, output_file):
input_sample = input_artifact.samples[0] # Asume one sample

if 'Dx Exoomequivalent' in input_sample.udf:
volume = 5 * input_sample.udf['Dx Exoomequivalent']
volume = 4 * input_sample.udf['Dx Exoomequivalent']
else:
volume = 5
volume = 4

output_file.write(
'{sample}\t{container}\t{well}\t{pool}\t{volume}\n'.format(
Expand Down
2 changes: 1 addition & 1 deletion clarity_epp/export/tapestation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def samplesheet(lims, process_id, output_file):
process = Process(lims, id=process_id)
well_plate = {}

for placement, artifact in process.output_containers()[0].placements.iteritems():
for placement, artifact in process.output_containers()[0].placements.items():
placement = ''.join(placement.split(':'))
well_plate[placement] = artifact.name.split('_')[0]

Expand Down
20 changes: 16 additions & 4 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,25 @@
]

# BCLConvert conversion settings
index_2_orientation = {
sequencer_conversion_settings = {
# Orientation options: F=forward or RC=reverse complement
# https://knowledge.illumina.com/software/general/software-general-reference_material-list/000001800
'Dx Library pool denatureren en laden (NovaSeq) v1.3': 'RC',
'Dx Library pool denatureren en laden (NovaSeqXPlus) v1.0': 'F',
'Dx Library pool denatureren en laden (NovaSeq) v1.3': {
'index_2_conversion_orientation': 'RC',
'instrument_platform': 'NovaSeq',
'index_orientation': 'Forward',
'software_version': '4.1.7',
'fastq_compression_format': 'gzip',
},
'Dx Library pool denatureren en laden (NovaSeqXPlus) v1.0': {
'index_2_conversion_orientation': 'F',
'instrument_platform': 'NovaSeqXSeries',
'index_orientation': 'Forward',
'software_version': '4.1.7',
'fastq_compression_format': 'gzip',
},
}
conversion_settings = {
sample_conversion_settings = {
'default': {
'project': 'unknown',
'split_project': False,
Expand Down
Loading