Skip to content

Commit 0ddae3f

Browse files
authored
fix: add doi support + handle mix references for sources (#28)
close #27
1 parent 37cdaeb commit 0ddae3f

3 files changed

Lines changed: 60 additions & 85 deletions

File tree

clinvar_this/io/gks_json/base.py

Lines changed: 40 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -397,31 +397,62 @@ def _get_citations(
397397
Citations may be sourced from:
398398
- `citations` extensions attached to evidence lines
399399
- PubMed IDs (`pmid`) on reported documents
400+
- DOI IDs (`doi`) on reported documents
400401
- IRI reference URLs attached to reported documents
401402
402-
If all reported documents contain PMIDs, citations are submitted using
403-
PubMed database identifiers. Otherwise, citations fall back to URL-based
404-
references.
403+
Each citation is submitted using the most specific reference available:
404+
PMID, DOI, or URL.
405405
406406
:param evidence_lines: Evidence lines that may contain supporting citation
407407
information.
408408
:return: A deduplicated list of submission citations.
409409
"""
410410

411-
citations: list[SubmissionCitation] = []
412-
reported_in_documents: list[Document | iriReference] = []
413-
414411
def add_citation(citation: SubmissionCitation) -> None:
415412
"""Append a citation if it has not already been added."""
416413
if citation not in citations:
417414
citations.append(citation)
418415

416+
def add_reference(
417+
reference: str | iriReference | Document | None,
418+
) -> None:
419+
"""Convert a reference into a SubmissionCitation."""
420+
if reference is None:
421+
return
422+
423+
if isinstance(reference, str):
424+
add_citation(SubmissionCitation(url=reference))
425+
return
426+
427+
if isinstance(reference, iriReference):
428+
add_citation(SubmissionCitation(url=reference.root))
429+
return
430+
431+
if isinstance(reference, Document):
432+
if reference.pmid:
433+
add_citation(
434+
SubmissionCitation(
435+
db=CitationDb.PUBMED,
436+
id=reference.pmid,
437+
)
438+
)
439+
elif reference.doi:
440+
add_citation(
441+
SubmissionCitation(
442+
db=CitationDb.DOI,
443+
id=reference.doi,
444+
)
445+
)
446+
447+
citations: list[SubmissionCitation] = []
448+
reported_in_documents: list[Document | iriReference] = []
449+
419450
for evidence_line in evidence_lines:
420451
# Temporary support for citations stored in extensions
421452
for ext in evidence_line.extensions or []:
422453
if ext.name == "citations" and isinstance(ext.value, list):
423-
for citation_url in ext.value:
424-
add_citation(SubmissionCitation(url=citation_url))
454+
for citation in ext.value:
455+
add_reference(citation)
425456

426457
reported_in_documents.extend(evidence_line.reportedIn or [])
427458

@@ -434,32 +465,8 @@ def add_citation(citation: SubmissionCitation) -> None:
434465

435466
reported_in_documents.extend(reported_in or [])
436467

437-
documents_have_all_pmids = all(
438-
isinstance(document, Document) and document.pmid
439-
for document in reported_in_documents
440-
)
441-
442-
if documents_have_all_pmids:
443-
for document in reported_in_documents:
444-
add_citation(
445-
SubmissionCitation(
446-
db=CitationDb.PUBMED,
447-
id=document.pmid,
448-
)
449-
)
450-
451-
return citations
452-
453468
for document in reported_in_documents:
454-
if isinstance(document, Document):
455-
if document.pmid:
456-
add_citation(
457-
SubmissionCitation(
458-
url=f"https://pubmed.ncbi.nlm.nih.gov/{document.pmid}"
459-
)
460-
)
461-
elif isinstance(document, iriReference):
462-
add_citation(SubmissionCitation(url=document.root))
469+
add_reference(document)
463470

464471
return citations
465472

@@ -525,27 +532,6 @@ def _build_shared_submission_kwargs(
525532
RecordStatus.NOVEL if clinvar_accession is None else RecordStatus.UPDATE
526533
)
527534

528-
clinvar_accession = next(
529-
(
530-
str(extension.value)
531-
for extension in statement.extensions or []
532-
if extension.name == "clinvar_accession"
533-
),
534-
None,
535-
)
536-
537-
if clinvar_accession and not CLINVAR_ACCESSION_RE.match(clinvar_accession):
538-
logger.warning(
539-
"Statement ID %s ClinVar accession %s does not match ClinVar regex",
540-
statement.id,
541-
clinvar_accession,
542-
)
543-
clinvar_accession = None
544-
545-
record_status = (
546-
RecordStatus.NOVEL if clinvar_accession is None else RecordStatus.UPDATE
547-
)
548-
549535
return {
550536
"clinvar_accession": clinvar_accession,
551537
"record_status": record_status,

docs/file_formats.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ Supported citation sources include:
286286

287287
- ``citations`` extensions attached to evidence lines
288288
- PubMed IDs (``pmid``) on reported documents
289+
- DOI IDs (``doi``) on reported documents
289290
- IRI reference URLs attached to reported documents
290291

291292
Comment Details

tests/clinvar_this/io/gks_json/test_clinical_impact_transformer.py

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,13 @@ def civic_aid7_submission():
172172
comment="Combination treatment of BRAF inhibitor dabrafenib and MEK inhibitor trametinib is recommended for adjuvant treatment of stage III or recurrent melanoma with BRAF V600E mutation detected by the approved THxID kit, as well as first line treatment for metastatic melanoma. The treatments are FDA approved based on studies including the Phase III COMBI-V, COMBI-D and COMBI-AD Trials. Combination therapy is now recommended above BRAF inhibitor monotherapy. Cutaneous squamous-cell carcinoma and keratoacanthoma occur at lower rates with combination therapy than with BRAF inhibitor alone.",
173173
citation=[
174174
SubmissionCitation(url="https://civicdb.org/links/evidence/3758"),
175-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/25399551"),
175+
SubmissionCitation(db=CitationDb.PUBMED, id="25399551"),
176176
SubmissionCitation(url="https://civicdb.org/links/evidence/6178"),
177-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/28891408"),
177+
SubmissionCitation(db=CitationDb.PUBMED, id="28891408"),
178178
SubmissionCitation(url="https://civicdb.org/links/evidence/6940"),
179-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/23020132"),
179+
SubmissionCitation(db=CitationDb.PUBMED, id="23020132"),
180180
SubmissionCitation(url="https://civicdb.org/links/evidence/6938"),
181-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/25265492"),
181+
SubmissionCitation(db=CitationDb.PUBMED, id="25265492"),
182182
],
183183
drug_for_therapeutic_assertion="Dabrafenib;Trametinib",
184184
),
@@ -228,39 +228,27 @@ def civic_tr_submissions(civic_aid7_submission, amp_asco_cap_assertion_criteria)
228228
SubmissionCitation(
229229
url="https://civicdb.org/links/evidence/2997"
230230
),
231-
SubmissionCitation(
232-
url="https://pubmed.ncbi.nlm.nih.gov/23982599"
233-
),
231+
SubmissionCitation(db=CitationDb.PUBMED, id="23982599"),
234232
SubmissionCitation(
235233
url="https://civicdb.org/links/evidence/2629"
236234
),
237-
SubmissionCitation(
238-
url="https://pubmed.ncbi.nlm.nih.gov/18408761"
239-
),
235+
SubmissionCitation(db=CitationDb.PUBMED, id="18408761"),
240236
SubmissionCitation(
241237
url="https://civicdb.org/links/evidence/982"
242238
),
243-
SubmissionCitation(
244-
url="https://pubmed.ncbi.nlm.nih.gov/24439929"
245-
),
239+
SubmissionCitation(db=CitationDb.PUBMED, id="24439929"),
246240
SubmissionCitation(
247241
url="https://civicdb.org/links/evidence/968"
248242
),
249-
SubmissionCitation(
250-
url="https://pubmed.ncbi.nlm.nih.gov/26515464"
251-
),
243+
SubmissionCitation(db=CitationDb.PUBMED, id="26515464"),
252244
SubmissionCitation(
253245
url="https://civicdb.org/links/evidence/883"
254246
),
255-
SubmissionCitation(
256-
url="https://pubmed.ncbi.nlm.nih.gov/22452895"
257-
),
247+
SubmissionCitation(db=CitationDb.PUBMED, id="22452895"),
258248
SubmissionCitation(
259249
url="https://civicdb.org/links/evidence/879"
260250
),
261-
SubmissionCitation(
262-
url="https://pubmed.ncbi.nlm.nih.gov/23816960"
263-
),
251+
SubmissionCitation(db=CitationDb.PUBMED, id="23816960"),
264252
],
265253
drug_for_therapeutic_assertion="Afatinib",
266254
),
@@ -308,9 +296,9 @@ def civic_aid9_submission():
308296
citation=[
309297
# SubmissionCitation(url="https://identifiers.org/civic.mpid:1594"),
310298
SubmissionCitation(url="https://civicdb.org/links/evidence/4846"),
311-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/24705250"),
299+
SubmissionCitation(db=CitationDb.PUBMED, id="24705250"),
312300
SubmissionCitation(url="https://civicdb.org/links/evidence/6955"),
313-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/24705254"),
301+
SubmissionCitation(db=CitationDb.PUBMED, id="24705254"),
314302
],
315303
),
316304
)
@@ -365,17 +353,17 @@ def civic_aid20_submission():
365353
citation=[
366354
# SubmissionCitation(url="https://identifiers.org/civic.mpid:12"),
367355
SubmissionCitation(url="https://civicdb.org/links/evidence/7159"),
368-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/24112392"),
356+
SubmissionCitation(db=CitationDb.PUBMED, id="24112392"),
369357
SubmissionCitation(url="https://civicdb.org/links/evidence/7158"),
370-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/21641636"),
358+
SubmissionCitation(db=CitationDb.PUBMED, id="21641636"),
371359
SubmissionCitation(url="https://civicdb.org/links/evidence/7157"),
372-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/21502544"),
360+
SubmissionCitation(db=CitationDb.PUBMED, id="21502544"),
373361
SubmissionCitation(url="https://civicdb.org/links/evidence/7156"),
374-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/20008640"),
362+
SubmissionCitation(db=CitationDb.PUBMED, id="20008640"),
375363
SubmissionCitation(url="https://civicdb.org/links/evidence/103"),
376-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/24594804"),
364+
SubmissionCitation(db=CitationDb.PUBMED, id="24594804"),
377365
SubmissionCitation(url="https://civicdb.org/links/evidence/1552"),
378-
SubmissionCitation(url="https://pubmed.ncbi.nlm.nih.gov/27404270"),
366+
SubmissionCitation(db=CitationDb.PUBMED, id="27404270"),
379367
],
380368
),
381369
)
@@ -522,7 +510,7 @@ def test_citations(
522510
for citation in civic_aid20_submission_cpy["clinical_impact_classification"][
523511
"citation"
524512
]:
525-
if citation["url"].startswith("https://civicdb.org"):
513+
if (citation.get("url") or "").startswith("https://civicdb.org"):
526514
new_citations.append(citation)
527515
civic_aid20_submission_cpy["clinical_impact_classification"]["citation"] = (
528516
new_citations

0 commit comments

Comments
 (0)