From df3b12544d15fb26f2b1d95da16ccbc6ed3756c5 Mon Sep 17 00:00:00 2001 From: bennwei Date: Tue, 19 Oct 2021 08:10:47 -0700 Subject: [PATCH 1/5] keep negated HPO terms --- tests/test_extract.py | 30 ++++++++++++++++++++++++++++++ txt2hpo/extract.py | 6 ++++++ 2 files changed, 36 insertions(+) diff --git a/tests/test_extract.py b/tests/test_extract.py index c1db996..eb4a563 100644 --- a/tests/test_extract.py +++ b/tests/test_extract.py @@ -424,3 +424,33 @@ def test_handling_term_hyphenation(self): # replace hyphens with space hpids = extract.hpo(test[0].replace('-', ' ')).hpids self.assertEqual(hpids, [test[1]]) + + def test_negated_hpo_retention(self): + extract = Extractor(correct_spelling=False, + remove_overlapping=True, + resolve_conflicts=True, + max_neighbors=2, + phenotypes_only=False, + remove_negated=True) + + resp = extract.hpo("Patient has developmental delay but no hypotonia") + self.assertEqual(["HP:0001252"], resp.negated_hpids) + + resp = extract.hpo("developmental delay and a wide mouth") + self.assertEqual([], resp.negated_hpids) + + resp = extract.hpo("developmental delay with no wide mouth") + self.assertEqual(['HP:0000154'], resp.negated_hpids) + + resp = extract.hpo("developmental delay without a wide mouth") + self.assertEqual(['HP:0000154'], resp.negated_hpids) + + resp = extract.hpo("no developmental delay, but has a wide mouth") + self.assertEqual(['HP:0001263'], resp.negated_hpids) + + resp = extract.hpo("the patient has a wide mouth but no developmental delay.") + self.assertEqual(['HP:0001263'], resp.negated_hpids) + + resp = extract.hpo("the patient does not have either a wide mouth or developmental delay.") + self.assertEqual(set(['HP:0000154', 'HP:0001263']), set(resp.negated_hpids)) + diff --git a/txt2hpo/extract.py b/txt2hpo/extract.py index dd6304f..df70c08 100644 --- a/txt2hpo/extract.py +++ b/txt2hpo/extract.py @@ -22,6 +22,7 @@ def __init__(self, entries=None, model=None, negation_model=None): self.entries = entries self.model = model self.negation_model = negation_model + self.negated_entries = [] def add(self,entry): self.entries += entry @@ -36,6 +37,7 @@ def remove_tagged(self, tag, state=True, status=True): to_remove = [entry for entry in self.entries if entry[tag] != state] for element in to_remove: self.remove(element) + self.negated_entries.append(element) def detect_negation(self): for entry in self.entries: @@ -124,6 +126,10 @@ def resolve_conflicts(self): def hpids(self): return list(set(np.array([x['hpid'] for x in self.entries]).flatten())) + @property + def negated_hpids(self): + return list(set(np.array([x['hpid'] for x in self.negated_entries]).flatten())) + @property def json(self): result = self.entries_sans_context.copy() From 225b973a5e149c86cc53e623d17726fb8a2891a9 Mon Sep 17 00:00:00 2001 From: Eric Liao Date: Wed, 20 Oct 2021 10:01:59 -0700 Subject: [PATCH 2/5] Update README.md update the README example with remove_negated=True to show the new output of negated terms --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1379c62..d45a3f3 100644 --- a/README.md +++ b/README.md @@ -52,8 +52,8 @@ print(result.hpids) ``` `txt2hpo` handles negation using [negspaCy](https://spacy.io/universe/project/negspacy). To remove negated phenotypes set `remove_negated` flag to True. + Both the extracted and negated HPO terms can be retrieved. - ```python from txt2hpo.extract import Extractor extract = Extractor(remove_negated=True) @@ -62,7 +62,10 @@ result = extract.hpo("patient has developmental delay but no hypotonia") print(result.hpids) ["HP:0001263"] - + +print(result.negated_hpids) + +["HP:0001252"] ``` From ba859b07ab7f4f5a9208c01cf472ccf7130c02e7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 20 Oct 2021 17:07:11 +0000 Subject: [PATCH 3/5] Bump nltk from 3.4.5 to 3.6.5 Bumps [nltk](https://github.com/nltk/nltk) from 3.4.5 to 3.6.5. - [Release notes](https://github.com/nltk/nltk/releases) - [Changelog](https://github.com/nltk/nltk/blob/develop/ChangeLog) - [Commits](https://github.com/nltk/nltk/compare/3.4.5...3.6.5) --- updated-dependencies: - dependency-name: nltk dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index a772239..25c8958 100644 --- a/Pipfile +++ b/Pipfile @@ -9,7 +9,7 @@ pytest = "*" pytest-cov = "*" [packages] -nltk = "==3.4.5" +nltk = "==3.6.5" spacy = "==2.2.4" scispacy = "==0.2.4" negspacy = "==0.1.9" From 6660f9b94b94e77ca11542e791777e516198641c Mon Sep 17 00:00:00 2001 From: rebecca810 Date: Wed, 20 Oct 2021 14:37:35 -0400 Subject: [PATCH 4/5] Update pythonpackage.yml update github workflow to use unittest instead of tests --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index e29f8bc..42296e1 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -26,4 +26,4 @@ jobs: - name: Test with unittest run: | - pytest tests + python -m unittest From 603e84b67335cefc172274f92c1514698184afa1 Mon Sep 17 00:00:00 2001 From: rebecca810 Date: Wed, 10 Nov 2021 09:35:22 -0500 Subject: [PATCH 5/5] version bump --- txt2hpo/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/txt2hpo/__init__.py b/txt2hpo/__init__.py index 0a99c64..c503e66 100644 --- a/txt2hpo/__init__.py +++ b/txt2hpo/__init__.py @@ -1,2 +1,2 @@ __project__ = 'txt2hpo' -__version__ = '0.2.3' \ No newline at end of file +__version__ = '2021.0'