Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly handle underscores in sample sheet parsing #1005

Merged
merged 3 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions micall/tests/test_sample_sheet_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,52 @@ def test_extra_commas(self):
ss = sample_sheet_parser(StringIO(stub_sample_sheet))
self.assertEqual(ss["Experiment Name"], "10-Jul-2014")

def test_underscores_in_sample_name(self):
"""
Extracts the correct project code and sample name in presence of underscores.
"""

stub_sample_sheet = """
[Header]
IEMFileVersion,3
Investigator Name,RL
Project Name,10-Jul-2014_v1test
Experiment Name,10-Jul-2014_v1test
Date,07/10/2014
Workflow,GenerateFASTQ
Assay,Nextera
Description,Nextera
Chemistry,Amplicon
[Reads]
251
251
[Settings]
[Data]
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,index,index2,Sample_Project,Description,GenomeFolder
CFE_SomeId_10-Jul-2014_N501-N701_Sample1_Proj1,Sample1_Proj1,10-Jul-2014_testing,N/A,ACGTACGT,TGCATGCA,\
10-Jul-2014_testing,Research:Sample1_Proj1:TRUE Comments:Sample1_Proj1:thisiscommentone \
Disablecontamcheck:Sample1_Proj1:FALSE,
CFE_SomeId_10-Jul-2014_N501-N702_Sample2_Proj2,Sample2_Proj2,10-Jul-2014_testing,N/A,AAAAGGGG,CCCCTTTT,\
10-Jul-2014_testing,Research:Sample2_Foo_Proj2:FALSE Comments:Sample2_Foo_Proj2:thisiscommenttwo \
Chemistry:Sample2_Foo_Proj2:BreakingBad Disablecontamcheck:Sample2_Foo_Proj2:TRUE,
"""

ss = sample_sheet_parser(StringIO(stub_sample_sheet))
split_rows = ss['DataSplit']
assert len(split_rows) == 2

assert split_rows[0]['filename'] == 'Sample1-Proj1_S1'
assert split_rows[1]['filename'] == 'Sample2-Proj2_S2'

assert split_rows[0]['project'] == 'Proj1'
assert split_rows[1]['project'] == 'Proj2'

assert split_rows[0]['sample'] == 'Sample1'
assert split_rows[1]['sample'] == 'Sample2'

assert split_rows[0]['sample_number'] == 'S1'
assert split_rows[1]['sample_number'] == 'S2'


def test_read_sample_sheet_overrides(tmpdir):
sample_sheet_path = Path(str(tmpdir)) / 'SampleSheet.csv'
Expand Down
7 changes: 5 additions & 2 deletions micall/utils/sample_sheet_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,12 @@ def sample_sheet_parser(handle):
samp, proj, val = None, None, None
if sample_sheet_version == 1:
sj, val = elem.split(':')
samp, proj = sj.split(project_delimiter_v1)
components = sj.split(project_delimiter_v1)
samp, proj = (project_delimiter_v1.join(components[:-1]), components[-1])
elif sample_sheet_version == 2:
samp, proj, val = elem.split(project_delimiter_v2)
components = elem.split(project_delimiter_v2)
samp, proj, val = (project_delimiter_v2.join(components[:-2]),
components[-2], components[-1])

if samp == entry['sample'] and proj == entry['project']:
if name == 'Research':
Expand Down
Loading