Skip to content

Commit 02d9160

Browse files
author
Corentin
committed
Format before push
1 parent d544e32 commit 02d9160

File tree

7 files changed

+407
-417
lines changed

7 files changed

+407
-417
lines changed

app/historeport/ocr.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,10 @@ def _spacy_ngrams(self, text_section: str) -> dict:
157157
sent, flag_neg = self._detect_negation(sent_str)
158158
ngrams_generator = ngrams(sent, (1, 2, 3, 4, 5, 6), filter_punct=True)
159159
for i in ngrams_generator:
160-
full_ngrams.append((i.text.lower(), 0 if flag_neg else 1))
160+
pos_ngrams = " ".join(self.sentence_as_list).find(i.text)
161+
full_ngrams.append(
162+
(i.text.lower(), 0 if flag_neg else 1, pos_ngrams)
163+
)
161164

162165
return full_ngrams
163166

@@ -188,14 +191,15 @@ def _match_ngram_ontology(self, full_ngrams) -> list:
188191
for onto_index, j in enumerate(full_onto_processed):
189192
score = fuzz.ratio(i.lower(), j.lower())
190193
if score >= 85:
191-
# [neg_flag, ngram, match_term, node_id, score]
194+
# [neg_flag, ngram, match_term, node_id, score, match pos in string]
192195
match_list.append(
193196
[
194197
full_ngrams[n_gram_index][1],
195198
i,
196199
j,
197200
ontology_terms[onto_index][0],
198201
score,
202+
full_ngrams[n_gram_index][2],
199203
]
200204
)
201205
return match_list
@@ -218,7 +222,7 @@ def analyze_text(self) -> list:
218222
First value is the neg flag, second value is the ngram, third value
219223
is the matching terms, last value is the node ID.
220224
"""
221-
full_ngrams = self._spacy_ngrams(self.raw_text.replace('\n'," "))
225+
full_ngrams = self._spacy_ngrams(self.raw_text.replace("\n", " "))
222226
match_list = self._match_ngram_ontology(full_ngrams)
223227
return match_list
224228

app/historeport/static/historeport.js

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,10 @@ $(function () {
442442
let absent_feat_overview_auto = document.getElementById(
443443
"feature-absent-auto"
444444
);
445-
present_feat_overview_auto.innerHTML = `ID | Vocab. Term | Text | Score<br />`;
445+
present_feat_overview_auto.innerHTML =
446+
"ID | Vocab. Term | Pos. in Text | Text | Score<br />";
446447
absent_feat_overview_auto.innerHTML =
447-
"ID | Vocab. Term | Text | Score<br />";
448+
"ID | Vocab. Term | Pos. in Text | Text | Score<br />";
448449

449450
// For each entires in our match list add to corresponding accordion
450451
for (const [key, value] of Object.entries(
@@ -457,6 +458,8 @@ $(function () {
457458
" | " +
458459
value[2] +
459460
" | " +
461+
value[5] +
462+
" | " +
460463
value[1] +
461464
" | " +
462465
value[4] +
@@ -468,6 +471,8 @@ $(function () {
468471
" | " +
469472
value[2] +
470473
" | " +
474+
value[5] +
475+
" | " +
471476
value[1] +
472477
" | " +
473478
value[4] +

data/database/app.db.demo

4 KB
Binary file not shown.

0 commit comments

Comments
 (0)