|
1 | 1 | # cython: infer_types=True, boundscheck=False
|
2 | 2 | # distutils: language=c++
|
3 |
| -""" NeuralCoref resolution spaCy v2.0 pipeline component |
| 3 | +""" NeuralCoref resolution spaCy v2.0 pipeline component |
4 | 4 | Custom pipeline components: https://spacy.io//usage/processing-pipelines#custom-components
|
5 | 5 | Compatible with: spaCy v2.0.0+
|
6 | 6 | """
|
@@ -126,7 +126,7 @@ NSUBJ_OR_DEP = ["nsubj", "dep"]
|
126 | 126 | CONJ_OR_PREP = ["conj", "prep"]
|
127 | 127 | LEAVE_DEP = ["det", "compound", "appos"]
|
128 | 128 | KEEP_DEP = ["nsubj", "dobj", "iobj", "pobj"]
|
129 |
| -REMOVE_POS = ["CCONJ", "INTJ", "ADP"] |
| 129 | +REMOVE_POS = ["CCONJ", "SCONJ", "INTJ", "ADP"] |
130 | 130 | LOWER_NOT_END = ["'s", ',', '.', '!', '?', ':', ';']
|
131 | 131 | PUNCTS = [".", "!", "?"]
|
132 | 132 | ACCEPTED_ENTS = ["PERSON", "NORP", "FACILITY", "ORG", "GPE", "LOC", "PRODUCT", "EVENT", "WORK_OF_ART", "LANGUAGE"]
|
@@ -327,7 +327,7 @@ cdef (int, int) enlarge_span(TokenC* doc_c, int i, int sent_start, int sent_end,
|
327 | 327 | maxchild_idx -= 1 # We don't want mentions finishing with 's or conjunctions/punctuation
|
328 | 328 | # if debug: print("maxchild_idx", maxchild_idx)
|
329 | 329 | while minchild_idx <= maxchild_idx and minchild_idx < sent_end - 1 \
|
330 |
| - and (inside(doc_c[minchild_idx].pos, hashes.remove_pos) |
| 330 | + and (inside(doc_c[minchild_idx].pos, hashes.remove_pos) |
331 | 331 | or inside(doc_c[minchild_idx].lex.lower, hashes.lower_not_end)):
|
332 | 332 | minchild_idx += 1 # We don't want mentions starting with 's or conjunctions/punctuation
|
333 | 333 | # if debug: print("minchild_idx", minchild_idx)
|
@@ -882,7 +882,7 @@ cdef class NeuralCoref(object):
|
882 | 882 | if tuned and hash_w in self.tuned_vectors:
|
883 | 883 | return self.tuned_vectors[hash_w]
|
884 | 884 | return self.get_static(hash_w)
|
885 |
| - |
| 885 | + |
886 | 886 | def get_word_in_sentence(self, int i, Span sent):
|
887 | 887 | if i < sent.start or i >= sent.end:
|
888 | 888 | return self.tuned_vectors[self.hashes.missing_word]
|
|
0 commit comments