Skip to content

Commit 1c069c2

Browse files
committed
Merge branch 'release/0.8.4'
2 parents d5cb33e + 29fd145 commit 1c069c2

File tree

10 files changed

+197
-52
lines changed

10 files changed

+197
-52
lines changed

README.rst

+7-7
Original file line numberDiff line numberDiff line change
@@ -72,18 +72,18 @@ https://ckipnlp.readthedocs.io/
7272
.. |ReadTheDocs Home| image:: https://img.shields.io/website/https/ckipnlp.readthedocs.io.svg?maxAge=3600&up_message=online&down_message=offline
7373
:target: https://ckipnlp.readthedocs.io
7474

75+
Online Demo
76+
^^^^^^^^^^^^^^
77+
78+
https://ckip.iis.sinica.edu.tw/service/corenlp
79+
7580
Contributers
7681
^^^^^^^^^^^^
7782

7883
* `Mu Yang <https://muyang.pro>`_ at `CKIP <https://ckip.iis.sinica.edu.tw>`_ (Author & Maintainer)
7984
* `Wei-Yun Ma <https://www.iis.sinica.edu.tw/pages/ma/>`_ at `CKIP <https://ckip.iis.sinica.edu.tw>`_ (Maintainer)
8085
* `DouglasWu <[email protected]>`_
8186

82-
External Links
83-
^^^^^^^^^^^^^^
84-
85-
- `Online Demo <https://ckip.iis.sinica.edu.tw/service/corenlp>`_
86-
8787
Installation
8888
------------
8989

@@ -92,9 +92,9 @@ Requirements
9292

9393
* `Python <https://www.python.org>`_ 3.6+
9494
* `TreeLib <https://treelib.readthedocs.io>`_ 1.5+
95-
9695
* `CkipTagger <https://pypi.org/project/ckiptagger>`_ 0.1.1+ [Optional, Recommended]
9796
* `CkipClassic <https://ckip-classic.readthedocs.io>`_ 1.0+ [Optional]
97+
* `TensorFlow / TensorFlow-GPU <https://www.tensorflow.org/>`_ 1.13.1+, <2 [Required by CkipTagger]
9898

9999
Driver Requirements
100100
^^^^^^^^^^^^^^^^^^^
@@ -117,7 +117,7 @@ Installation via Pip
117117
^^^^^^^^^^^^^^^^^^^^
118118

119119
- No backend (not recommended): ``pip install ckipnlp``.
120-
- With CkipTagger backend (recommended): ``pip install ckipnlp[tagger]``
120+
- With CkipTagger backend (recommended): ``pip install ckipnlp[tagger]`` or ``pip install ckipnlp[tagger-gpu]``.
121121
- With CkipClassic backend: Please refer https://ckip-classic.readthedocs.io/en/latest/main/readme.html#installation for CkipClassic installation guide.
122122

123123
Usage

ckipnlp/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
__copyright__ = '2018-2020 CKIP Lab'
1111

1212
__title__ = 'CKIPNLP'
13-
__version__ = '0.8.3'
13+
__version__ = '0.8.4'
1414
__description__ = 'CKIP CoreNLP'
1515
__license__ = 'CC BY-NC-SA 4.0'
1616

ckipnlp/container/base.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def to_text(self):
5353
@classmethod
5454
@_abstractmethod
5555
def from_dict(cls, data):
56-
"""Construct an instance a from python built-in containers."""
56+
"""Construct an instance from python built-in containers."""
5757
return NotImplemented
5858

5959
@_abstractmethod
@@ -66,7 +66,7 @@ def to_dict(self):
6666
@classmethod
6767
@_abstractmethod
6868
def from_list(cls, data):
69-
"""Construct an instance a from python built-in containers."""
69+
"""Construct an instance from python built-in containers."""
7070
return NotImplemented
7171

7272
@_abstractmethod
@@ -187,7 +187,7 @@ def to_text(self):
187187

188188
@classmethod
189189
def from_dict(cls, data):
190-
"""Construct an instance a from python built-in containers.
190+
"""Construct an instance from python built-in containers.
191191
192192
Parameters
193193
----------
@@ -209,7 +209,7 @@ def to_dict(self):
209209

210210
@classmethod
211211
def from_list(cls, data):
212-
"""Construct an instance a from python built-in containers.
212+
"""Construct an instance from python built-in containers.
213213
214214
Parameters
215215
----------

ckipnlp/container/ner.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class NerToken(_BaseTuple, _NerToken):
9090

9191
@classmethod
9292
def from_tagger(cls, data):
93-
"""Construct an instance a from CkipTagger format."""
93+
"""Construct an instance from CkipTagger format."""
9494
idx0, idx1, ner, word = data
9595
return cls(word=word, ner=ner, idx=(idx0, idx1,)) # pylint: disable=no-value-for-parameter
9696

@@ -148,7 +148,7 @@ class NerSentence(_BaseSentence):
148148

149149
@classmethod
150150
def from_tagger(cls, data):
151-
"""Construct an instance a from CkipTagger format."""
151+
"""Construct an instance from CkipTagger format."""
152152
return cls(map(cls.item_class.from_tagger, data))
153153

154154
def to_tagger(self):
@@ -220,7 +220,7 @@ class NerParagraph(_BaseList):
220220

221221
@classmethod
222222
def from_tagger(cls, data):
223-
"""Construct an instance a from CkipTagger format."""
223+
"""Construct an instance from CkipTagger format."""
224224
return cls(map(cls.item_class.from_tagger, data))
225225

226226
def to_tagger(self):

ckipnlp/container/util/parsed_tree.py

+101-15
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,22 @@ class ParsedTree(_Base, _Tree):
283283
284284
List format
285285
Not implemented.
286+
287+
Penn Treebank format
288+
Used for :meth:`from_penn` and :meth:`to_penn`.
289+
290+
.. code-block:: python
291+
292+
[
293+
'S',
294+
[ 'Head:Nab', '中文字', ],
295+
[ 'particle:Td', '耶', ],
296+
]
297+
298+
.. note::
299+
300+
One may use :meth:`to_penn` together with `SvgLing <https://pypi.org/project/svgling/>`_ to generate SVG tree graphs.
301+
286302
"""
287303

288304
node_class = ParsedNode
@@ -303,6 +319,8 @@ def normalize_text(tree_text):
303319
def __str__(self):
304320
self.to_text()
305321

322+
########################################################################################################################
323+
306324
@classmethod
307325
def from_text(cls, data, *, normalize=True):
308326
"""Construct an instance from text format.
@@ -319,33 +337,33 @@ def from_text(cls, data, *, normalize=True):
319337

320338
tree = cls()
321339
node_id = 0
322-
node_queue = [None]
340+
node_stack = [None]
323341
text = ''
324342
ending = True
325343

326344
for char in data:
327345
if char == '(':
328346
node_data = cls.node_class.data_class.from_text(text)
329-
tree.create_node(tag=text, identifier=node_id, parent=node_queue[-1], data=node_data)
347+
tree.create_node(tag=text, identifier=node_id, parent=node_stack[-1], data=node_data)
330348

331-
node_queue.append(node_id)
349+
node_stack.append(node_id)
332350
node_id += 1
333351
text = ''
334352

335353
elif char == ')':
336354
if not ending:
337355
node_data = cls.node_class.data_class.from_text(text)
338-
tree.create_node(tag=text, identifier=node_id, parent=node_queue[-1], data=node_data)
356+
tree.create_node(tag=text, identifier=node_id, parent=node_stack[-1], data=node_data)
339357
node_id += 1
340358

341-
node_queue.pop()
359+
node_stack.pop()
342360
text = ''
343361
ending = True
344362

345363
elif char == '|':
346364
if not ending:
347365
node_data = cls.node_class.data_class.from_text(text)
348-
tree.create_node(tag=text, identifier=node_id, parent=node_queue[-1], data=node_data)
366+
tree.create_node(tag=text, identifier=node_id, parent=node_stack[-1], data=node_data)
349367
node_id += 1
350368

351369
text = ''
@@ -383,7 +401,7 @@ def to_text(self, node_id=None):
383401

384402
@classmethod
385403
def from_dict(cls, data):
386-
"""Construct an instance a from python built-in containers.
404+
"""Construct an instance from python built-in containers.
387405
388406
Parameters
389407
----------
@@ -392,22 +410,22 @@ def from_dict(cls, data):
392410
"""
393411
tree = cls()
394412

395-
queue = _deque()
396-
queue.append((data, None,))
413+
node_queue = _deque()
414+
node_queue.append((data, None,))
397415

398-
while queue:
399-
node_dict, parent_id = queue.popleft()
416+
while node_queue:
417+
node_dict, parent_id = node_queue.popleft()
400418
node_id = node_dict['id']
401419
node_data = cls.node_class.data_class.from_dict(node_dict['data'])
402420
tree.create_node(tag=node_data.to_text(), identifier=node_id, parent=parent_id, data=node_data)
403421

404422
for child in node_dict['children']:
405-
queue.append((child, node_id,))
423+
node_queue.append((child, node_id,))
406424

407425
return tree
408426

409427
def to_dict(self, node_id=None):
410-
"""Construct an instance a from python built-in containers.
428+
"""Transform to python built-in containers.
411429
412430
Parameters
413431
----------
@@ -429,6 +447,70 @@ def to_dict(self, node_id=None):
429447

430448
return tree_dict
431449

450+
@classmethod
451+
def from_penn(cls, data):
452+
"""Construct an instance from Penn Treebank format."""
453+
tree = cls()
454+
455+
node_stack = _deque()
456+
node_stack.append((data, None,))
457+
458+
node_id = 0
459+
460+
while node_stack:
461+
penn_data, parent_id = node_stack.pop()
462+
463+
if not penn_data:
464+
raise SyntaxError(f'Empty node #{node_id}')
465+
466+
if not isinstance(penn_data[0], str):
467+
raise SyntaxError(f'First element of a node must be string, got {type(penn_data[0])}')
468+
469+
if len(penn_data) == 2 and isinstance(penn_data[-1], str):
470+
penn_data = (':'.join(penn_data),)
471+
472+
node_data = cls.node_class.data_class.from_text(penn_data[0])
473+
tree.create_node(tag=node_data.to_text(), identifier=node_id, parent=parent_id, data=node_data)
474+
475+
for child in penn_data[-1:0:-1]:
476+
node_stack.append((child, node_id,))
477+
node_id += 1
478+
479+
return tree
480+
481+
def to_penn(self, node_id=None, *, with_role=True, with_word=True, sep=':'):
482+
"""Transform to Penn Treebank format.
483+
484+
Parameters
485+
----------
486+
node_id : int
487+
Output the plain text format for the subtree under **node_id**.
488+
with_role : bool
489+
Contains role-tag or not.
490+
with_word : bool
491+
Contains word or not.
492+
sep : str
493+
The seperator between role and POS-tag.
494+
495+
Returns
496+
-------
497+
list
498+
"""
499+
if node_id is None:
500+
node_id = self.root
501+
node = self[node_id]
502+
503+
penn_data = [f'{node.data.role}{sep}{node.data.pos}' if with_role and node.data.role else node.data.pos,]
504+
if with_word and node.data.word:
505+
penn_data.append(node.data.word)
506+
507+
for child in self.children(node_id):
508+
penn_data.append(self.to_penn(child.identifier, with_role=with_role, with_word=with_word, sep=sep))
509+
510+
return penn_data
511+
512+
########################################################################################################################
513+
432514
def show(self, *,
433515
key=lambda node: node.identifier,
434516
idhidden=False,
@@ -541,10 +623,14 @@ def get_relations(self, root_id=None, *, semantic=True):
541623
for tail in children:
542624
if tail.data.role != 'Head' and tail not in head_children:
543625
if tail.is_leaf():
544-
yield ParsedRelation(head=head_node, tail=tail, relation=tail) # pylint: disable=no-value-for-parameter
626+
yield ParsedRelation( # pylint: disable=no-value-for-parameter
627+
head=head_node, tail=tail, relation=tail,
628+
)
545629
else:
546630
for node in self.get_heads(tail.identifier, semantic=semantic):
547-
yield ParsedRelation(head=head_node, tail=node, relation=tail) # pylint: disable=no-value-for-parameter
631+
yield ParsedRelation( # pylint: disable=no-value-for-parameter
632+
head=head_node, tail=node, relation=tail,
633+
)
548634

549635
# Recursion
550636
for child in children:

docs/Makefile

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33

44
# You can set these variables from the command line, and also
55
# from the environment for the first two.
6-
SPHINX = sphinx-build
7-
SPHINXOPTS =
8-
6+
SPHINX ?= sphinx-build
7+
SPHINXOPTS ?=
98
DOCDIR = .
109
BUILDDIR = _build
1110
APIDIR = _api
1211

12+
# Remember add the environment variable to readthedocs.org!!!
1313
ENV = SPHINX_APIDOC_OPTIONS=members,show-inheritance
1414

1515
# Put it first so that "make" without argument is like "make help".
1616
help:
1717
@$(SPHINX) -M help "$(DOCDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
1818

19-
.PHONY: help Makefile api clean serve
19+
.PHONY: help Makefile clean serve
2020

2121
clean: Makefile
2222
@$(SPHINX) -M $@ "$(DOCDIR)" "$(BUILDDIR)" $(SPHINXOPTS)

0 commit comments

Comments
 (0)