Merge branch 'hotfix/0.9.1' into master

emfomy · emfomy · commit 18fc3a8c28ad · 2020-08-28T15:25:57.000+08:00
diff --git a/ckipnlp/__init__.py b/ckipnlp/__init__.py
@@ -10,7 +10,7 @@
 __copyright__ = '2018-2020 CKIP Lab'
 
 __title__ = 'CKIPNLP'
-__version__ = '0.9.0'
+__version__ = '0.9.1'
 __description__ = 'CKIP CoreNLP'
 __license__ = 'CC BY-NC-SA 4.0'
 
diff --git a/ckipnlp/container/parse.py b/ckipnlp/container/parse.py
@@ -2,7 +2,7 @@
 # -*- coding:utf-8 -*-
 
 """
-This module provides containers for parse sentences.
+This module provides containers for parsed sentences.
 """
 
 __author__ = 'Mu Yang <http://muyang.pro>'
diff --git a/ckipnlp/container/util/parse_tree.py b/ckipnlp/container/util/parse_tree.py
@@ -2,7 +2,7 @@
 # -*- coding:utf-8 -*-
 
 """
-This module provides tree containers for sentence parse.
+This module provides tree containers for parsed sentences.
 """
 
 __author__ = 'Mu Yang <http://muyang.pro>'
@@ -23,7 +23,7 @@
     Node as _Node,
 )
 
-from ckipnlp.data.constituency import (
+from ckipnlp.data.conparse import (
     SUBJECT_ROLES as _SUBJECT_ROLES,
     NEUTRAL_ROLES as _NEUTRAL_ROLES,
 )
diff --git a/ckipnlp/data/conparse.py b/ckipnlp/data/conparse.py
diff --git a/ckipnlp/driver/__init__.py b/ckipnlp/driver/__init__.py
@@ -17,7 +17,7 @@
 
 from .classic import (
     CkipClassicWordSegmenter,
-    CkipClassicConstituencyParser,
+    CkipClassicConParser,
 )
 
 from .ss import (
diff --git a/ckipnlp/driver/classic.py b/ckipnlp/driver/classic.py
@@ -130,8 +130,8 @@ def _call(self, *, _wspos):
 
 ################################################################################################################################
 
-class CkipClassicConstituencyParser(_BaseDriver):
-    """The CKIP sentence parsing driver with CkipClassic backend.
+class CkipClassicConParser(_BaseDriver):
+    """The CKIP constituency parsing driver with CkipClassic backend.
 
     Arguments
     ---------
@@ -140,17 +140,17 @@ class CkipClassicConstituencyParser(_BaseDriver):
 
     .. method:: __call__(*, ws, pos)
 
-        Apply sentence parsing.
+        Apply constituency parsing.
 
         Parameters
             - **ws** (:class:`~ckipnlp.container.text.TextParagraph`) — The word-segmented sentences.
             - **pos** (:class:`~ckipnlp.container.text.TextParagraph`) — The part-of-speech sentences.
 
         Returns
-            **constituency** (:class:`~ckipnlp.container.parse.ParseSentence`) — The constituency-parsing sentences.
+            **conparse** (:class:`~ckipnlp.container.parse.ParseSentence`) — The constituency-parsing sentences.
     """
 
-    driver_type = 'constituncy_parser'
+    driver_type = 'con_parser'
     driver_family = 'classic'
     driver_inputs = ('ws', 'pos',)
 
@@ -169,9 +169,9 @@ def _call(self, *, ws, pos):
         assert isinstance(pos, _SegParagraph)
 
 
-        constituency_text = []
+        conparse_text = []
         for ws_sent, pos_sent in zip(ws, pos):
-            constituency_sent_text = []
+            conparse_sent_text = []
             ws_clause = []
             pos_clause = []
             for ws_token, pos_token in _chain(zip(ws_sent, pos_sent), [(None, None),]):
@@ -181,16 +181,16 @@ def _call(self, *, ws, pos):
                     continue
 
                 # Segment clauses by punctuations
-                if pos_token is None or pos_token.endswith('CATEGORY'):
+                if pos_token is None or (pos_token.endswith('CATEGORY') and pos_token != 'PAUSECATEGORY'):
                     if ws_clause:
                         wspos_clause_text = _WsPosSentence.to_text(ws_clause, pos_clause)
-                        for constituency_clause_text in self._core.apply_list([wspos_clause_text]):
-                            constituency_sent_text.append([self._normalize(constituency_clause_text), '',])
+                        for conparse_clause_text in self._core.apply_list([wspos_clause_text]):
+                            conparse_sent_text.append([self._normalize(conparse_clause_text), '',])
 
                     if ws_token:
-                        if not constituency_sent_text:
-                            constituency_sent_text.append([None, '',])
-                        constituency_sent_text[-1][1] += ws_token
+                        if not conparse_sent_text:
+                            conparse_sent_text.append([None, '',])
+                        conparse_sent_text[-1][1] += ws_token
 
                     ws_clause = []
                     pos_clause = []
@@ -199,10 +199,10 @@ def _call(self, *, ws, pos):
                     ws_clause.append(self._half2full(ws_token))
                     pos_clause.append(pos_token)
 
-            constituency_text.append(constituency_sent_text)
-        constituency = _ParseParagraph.from_list(constituency_text)
+            conparse_text.append(conparse_sent_text)
+        conparse = _ParseParagraph.from_list(conparse_text)
 
-        return constituency
+        return conparse
 
     @staticmethod
     def _half2full(text):
diff --git a/ckipnlp/driver/coref.py b/ckipnlp/driver/coref.py
@@ -25,7 +25,7 @@
     CorefParagraph as _CorefParagraph,
 )
 
-from ckipnlp.data.constituency import (
+from ckipnlp.data.conparse import (
     APPOSITION_ROLES as _APPOSITION_ROLES,
 )
 
@@ -49,12 +49,12 @@ class CkipCorefChunker(_BaseDriver):  # pylint: disable=too-few-public-methods
         lazy : bool
             Lazy initialize the driver.
 
-    .. method:: __call__(*, constituency)
+    .. method:: __call__(*, conparse)
 
         Apply coreference delectation.
 
         Parameters
-            **constituency** (:class:`~ckipnlp.container.parse.ParseParagraph`) — The constituency-parsing sentences.
+            **conparse** (:class:`~ckipnlp.container.parse.ParseParagraph`) — The constituency-parsing sentences.
 
         Returns
             **coref** (:class:`~ckipnlp.container.coref.CorefParagraph`) — The coreference results.
@@ -67,15 +67,15 @@ class CkipCorefChunker(_BaseDriver):  # pylint: disable=too-few-public-methods
     def _init(self):
         pass
 
-    def _call(self, *, constituency):
-        assert isinstance(constituency, _ParseParagraph)
+    def _call(self, *, conparse):
+        assert isinstance(conparse, _ParseParagraph)
 
         # Convert to tree structure
         tree_list = [
             [
                 (clause.to_tree(), clause.delim,)
                 for clause in sent
-            ] for sent in constituency
+            ] for sent in conparse
         ]
 
         # Find coreference
diff --git a/ckipnlp/pipeline/coref.py b/ckipnlp/pipeline/coref.py
@@ -32,18 +32,18 @@ class CkipCorefDocument(_Mapping):
             The word-segmented sentences.
         pos : :class:`~ckipnlp.container.seg.SegParagraph`
             The part-of-speech sentences.
-        constituency : :class:`~ckipnlp.container.constituency.ParseParagraph`
+        conparse : :class:`~ckipnlp.container.parse.ParseParagraph`
             The constituency sentences.
         coref : :class:`~ckipnlp.container.coref.CorefParagraph`
             The coreference resolution results.
     """
 
-    __keys = ('ws', 'pos', 'constituency', 'coref',)
+    __keys = ('ws', 'pos', 'conparse', 'coref',)
 
-    def __init__(self, *, ws=None, pos=None, constituency=None, coref=None):
+    def __init__(self, *, ws=None, pos=None, conparse=None, coref=None):
         self.ws = ws
         self.pos = pos
-        self.constituency = constituency
+        self.conparse = conparse
         self.coref = coref
 
     def __len__(self):
@@ -74,8 +74,8 @@ class CkipCorefPipeline(_CkipPipeline):
         ner_chunker : str
             The type of named-entity recognition chunker.
 
-        sentence_parser : str
-            The type of sentence parser.
+        con_parser : str
+            The type of constituency parser.
 
         coref_chunker : str
             The type of coreference resolution chunker.
@@ -171,10 +171,10 @@ def get_coref(self, doc, corefdoc):
             )
 
         # Do parsing
-        if corefdoc.constituency is None:
-            corefdoc.constituency = self.get_constituency(corefdoc)
+        if corefdoc.conparse is None:
+            corefdoc.conparse = self.get_conparse(corefdoc)
 
         # Do coreference resolution
-        corefdoc.coref = self._coref_chunker(constituency=corefdoc.constituency)
+        corefdoc.coref = self._coref_chunker(conparse=corefdoc.conparse)
 
         return corefdoc.coref
diff --git a/ckipnlp/pipeline/kernel.py b/ckipnlp/pipeline/kernel.py
@@ -34,19 +34,19 @@ class CkipDocument(_Mapping):
             The part-of-speech sentences.
         ner : :class:`~ckipnlp.container.ner.NerParagraph`
             The named-entity recognition results.
-        constituency : :class:`~ckipnlp.container.parse.ParseParagraph`
+        conparse : :class:`~ckipnlp.container.parse.ParseParagraph`
             The constituency-parsing sentences.
     """
 
-    __keys = ('raw', 'text', 'ws', 'pos', 'ner', 'constituency',)
+    __keys = ('raw', 'text', 'ws', 'pos', 'ner', 'conparse',)
 
-    def __init__(self, *, raw=None, text=None, ws=None, pos=None, ner=None, constituency=None):
+    def __init__(self, *, raw=None, text=None, ws=None, pos=None, ner=None, conparse=None):
         self.raw = raw
         self.text = text
         self.ws = ws
         self.pos = pos
         self.ner = ner
-        self.constituency = constituency
+        self.conparse = conparse
 
         self._wspos = None
 
@@ -78,8 +78,8 @@ class CkipPipeline:
         ner_chunker : str
             The type of named-entity recognition chunker.
 
-        sentence_parser : str
-            The type of sentence parser.
+        con_parser : str
+            The type of constituency parser.
 
     Other Parameters
     ----------------
@@ -94,7 +94,7 @@ def __init__(self, *,
             sentence_segmenter='default',
             word_segmenter='tagger',
             pos_tagger='tagger',
-            sentence_parser='classic',
+            con_parser='classic',
             ner_chunker='tagger',
             lazy=True,
             opts={},
@@ -125,8 +125,8 @@ def __init__(self, *,
         self._pos_tagger = _DriverRegister.get('pos_tagger', pos_tagger)(
             lazy=lazy, **opts.get('pos_tagger', {}),
         )
-        self._constituency_parser = _DriverRegister.get('constituncy_parser', sentence_parser)(
-            lazy=lazy, **opts.get('sentence_parser', {}),
+        self._con_parser = _DriverRegister.get('con_parser', con_parser)(
+            lazy=lazy, **opts.get('con_parser', {}),
         )
         self._ner_chunker = _DriverRegister.get('ner_tagger', ner_chunker)(
             lazy=lazy, **opts.get('ner_chunker', {}),
@@ -151,8 +151,8 @@ def _get(self, key, doc):
             'pos': (
                 self._pos_tagger, 'part-of-speech tagging',
             ),
-            'constituency': (
-                self._constituency_parser, 'constituency parsing',
+            'conparse': (
+                self._con_parser, 'constituency parsing',
             ),
             'ner': (
                 self._ner_chunker, 'named-entity recognition',
@@ -261,7 +261,7 @@ def get_ner(self, doc):
 
     ########################################################################################################################
 
-    def get_constituency(self, doc):
+    def get_conparse(self, doc):
         """Apply constituency parsing.
 
         Arguments
@@ -271,11 +271,11 @@ def get_constituency(self, doc):
 
         Returns
         -------
-            doc.constituency : :class:`~ckipnlp.container.parse.ParseParagraph`
+            doc.conparse : :class:`~ckipnlp.container.parse.ParseParagraph`
                 The constituency parsing sentences.
 
         .. note::
 
             This routine modify **doc** inplace.
         """
-        return self._get('constituency', doc)
+        return self._get('conparse', doc)
diff --git a/docs/main/_defn.rst b/docs/main/_defn.rst
@@ -12,7 +12,7 @@
 .. Driver
 
 .. |CkipClassicWordSegmenter| replace:: :class:`~ckipnlp.driver.classic.CkipClassicWordSegmenter`
-.. |CkipClassicConstituencyParser| replace:: :class:`~ckipnlp.driver.classic.CkipClassicConstituencyParser`
+.. |CkipClassicConParser| replace:: :class:`~ckipnlp.driver.classic.CkipClassicConParser`
 
 .. |CkipTaggerWordSegmenter| replace:: :class:`~ckipnlp.driver.tagger.CkipTaggerWordSegmenter`
 .. |CkipTaggerPosTagger| replace:: :class:`~ckipnlp.driver.tagger.CkipTaggerPosTagger`
diff --git a/docs/main/tag.rst b/docs/main/tag.rst
@@ -13,14 +13,14 @@ Constituency Parsing Tags
 -------------------------
 
 .. csv-table::
-   :file: ./tag/constituency_pos.csv
+   :file: ./tag/conparse_pos.csv
    :widths: 50 50
    :header-rows: 1
 
 Constituency Parsing Roles
 --------------------------
 
 .. csv-table::
-   :file: ./tag/constituency_role.csv
+   :file: ./tag/conparse_role.csv
    :widths: 50 50
    :header-rows: 1
diff --git a/docs/main/tag/conparse_pos.csv b/docs/main/tag/conparse_pos.csv
diff --git a/docs/main/tag/conparse_role.csv b/docs/main/tag/conparse_role.csv
diff --git a/docs/main/usage/driver.rst b/docs/main/usage/driver.rst
@@ -42,7 +42,7 @@ Sentence Segmenter                |CkipSentenceSegmenter|
 Word Segmenter                                                      |CkipTaggerWordSegmenter|         |CkipClassicWordSegmenter|†
 Pos Tagger                                                          |CkipTaggerPosTagger|             |CkipClassicWordSegmenter|†
 Ner Chunker                                                         |CkipTaggerNerChunker|
-Constituncy Parser                                                                                    |CkipClassicConstituencyParser|
+Constituency Parser                                                                                   |CkipClassicConParser|
 Coref Chunker                     |CkipCorefChunker|
 ================================  ================================  ================================  ================================
 
diff --git a/docs/main/usage/pipeline.rst b/docs/main/usage/pipeline.rst
@@ -38,8 +38,8 @@ The |CkipPipeline| will compute all necessary dependencies. For example, if one
    print(doc.ner)
 
    # Constituency Parsing
-   pipeline.get_constituency(doc)
-   print(doc.constituency)
+   pipeline.get_conparse(doc)
+   print(doc.conparse)
 
    ################################################################
 
diff --git a/test/script/pipeline/_base.py b/test/script/pipeline/_base.py
@@ -36,7 +36,7 @@
     [ [ '中文字', 'LANGUAGE', (0, 3), ], ],
     [ [ '畢卡索', 'PERSON', (6, 9), ], ],
 ]
-constituency = [
+conparse = [
     [
         [ 'S(Head:Nab:中文字|particle:Td:耶)', '，', ],
         [ '%(particle:I:啊|manner:Dh:哈|manner:D:哈哈)', '。', ],
diff --git a/test/script/pipeline/run_classic_con_parser.py b/test/script/pipeline/run_classic_con_parser.py
@@ -7,8 +7,8 @@
 
 from _base import *
 
-def test_classic_constituency_parser():
-    obj = CkipPipeline(sentence_parser='classic')
+def test_classic_con_parser():
+    obj = CkipPipeline(con_parser='classic')
     doc = CkipDocument(ws=SegParagraph.from_list(ws), pos=SegParagraph.from_list(pos))
-    obj.get_constituency(doc)
-    assert doc.constituency.to_list() == constituency
+    obj.get_conparse(doc)
+    assert doc.conparse.to_list() == conparse
diff --git a/test/tox.ini b/test/tox.ini
@@ -82,4 +82,4 @@ commands_pre =
 commands =
 	pytest {toxinidir}/script/pipeline/run_classic_word_segmenter.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}
 	pytest {toxinidir}/script/pipeline/run_classic_word_segmenter_pos_tagger.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}
-	pytest {toxinidir}/script/pipeline/run_classic_constituency_parser.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}
+	pytest {toxinidir}/script/pipeline/run_classic_con_parser.py {env:NO_COV:--cov=ckipnlp.pipeline --cov=ckipnlp.driver} {posargs}

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`	`# -- coding:utf-8 --`
`3`	`3`
`4`	`4`	`"""`
`5`		`-This module provides tree containers for sentence parse.`
	`5`	`+This module provides tree containers for parsed sentences.`
`6`	`6`	`"""`
`7`	`7`
`8`	`8`	`__author__ = 'Mu Yang <http://muyang.pro>'`
`@@ -23,7 +23,7 @@`
`23`	`23`	`Node as _Node,`
`24`	`24`	`)`
`25`	`25`
`26`		`-from ckipnlp.data.constituency import (`
	`26`	`+from ckipnlp.data.conparse import (`
`27`	`27`	`SUBJECT_ROLES as _SUBJECT_ROLES,`
`28`	`28`	`NEUTRAL_ROLES as _NEUTRAL_ROLES,`
`29`	`29`	`)`
Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,7 @@`
`17`	`17`
`18`	`18`	`from .classic import (`
`19`	`19`	`CkipClassicWordSegmenter,`
`20`		`- CkipClassicConstituencyParser,`
	`20`	`+ CkipClassicConParser,`
`21`	`21`	`)`
`22`	`22`
`23`	`23`	`from .ss import (`