@@ -283,6 +283,22 @@ class ParsedTree(_Base, _Tree):
283
283
284
284
List format
285
285
Not implemented.
286
+
287
+ Penn Treebank format
288
+ Used for :meth:`from_penn` and :meth:`to_penn`.
289
+
290
+ .. code-block:: python
291
+
292
+ [
293
+ 'S',
294
+ [ 'Head:Nab', '中文字', ],
295
+ [ 'particle:Td', '耶', ],
296
+ ]
297
+
298
+ .. note::
299
+
300
+ One may use :meth:`to_penn` together with `SvgLing <https://pypi.org/project/svgling/>`_ to generate SVG tree graphs.
301
+
286
302
"""
287
303
288
304
node_class = ParsedNode
@@ -303,6 +319,8 @@ def normalize_text(tree_text):
303
319
def __str__ (self ):
304
320
self .to_text ()
305
321
322
+ ########################################################################################################################
323
+
306
324
@classmethod
307
325
def from_text (cls , data , * , normalize = True ):
308
326
"""Construct an instance from text format.
@@ -319,33 +337,33 @@ def from_text(cls, data, *, normalize=True):
319
337
320
338
tree = cls ()
321
339
node_id = 0
322
- node_queue = [None ]
340
+ node_stack = [None ]
323
341
text = ''
324
342
ending = True
325
343
326
344
for char in data :
327
345
if char == '(' :
328
346
node_data = cls .node_class .data_class .from_text (text )
329
- tree .create_node (tag = text , identifier = node_id , parent = node_queue [- 1 ], data = node_data )
347
+ tree .create_node (tag = text , identifier = node_id , parent = node_stack [- 1 ], data = node_data )
330
348
331
- node_queue .append (node_id )
349
+ node_stack .append (node_id )
332
350
node_id += 1
333
351
text = ''
334
352
335
353
elif char == ')' :
336
354
if not ending :
337
355
node_data = cls .node_class .data_class .from_text (text )
338
- tree .create_node (tag = text , identifier = node_id , parent = node_queue [- 1 ], data = node_data )
356
+ tree .create_node (tag = text , identifier = node_id , parent = node_stack [- 1 ], data = node_data )
339
357
node_id += 1
340
358
341
- node_queue .pop ()
359
+ node_stack .pop ()
342
360
text = ''
343
361
ending = True
344
362
345
363
elif char == '|' :
346
364
if not ending :
347
365
node_data = cls .node_class .data_class .from_text (text )
348
- tree .create_node (tag = text , identifier = node_id , parent = node_queue [- 1 ], data = node_data )
366
+ tree .create_node (tag = text , identifier = node_id , parent = node_stack [- 1 ], data = node_data )
349
367
node_id += 1
350
368
351
369
text = ''
@@ -383,7 +401,7 @@ def to_text(self, node_id=None):
383
401
384
402
@classmethod
385
403
def from_dict (cls , data ):
386
- """Construct an instance a from python built-in containers.
404
+ """Construct an instance from python built-in containers.
387
405
388
406
Parameters
389
407
----------
@@ -392,22 +410,22 @@ def from_dict(cls, data):
392
410
"""
393
411
tree = cls ()
394
412
395
- queue = _deque ()
396
- queue .append ((data , None ,))
413
+ node_queue = _deque ()
414
+ node_queue .append ((data , None ,))
397
415
398
- while queue :
399
- node_dict , parent_id = queue .popleft ()
416
+ while node_queue :
417
+ node_dict , parent_id = node_queue .popleft ()
400
418
node_id = node_dict ['id' ]
401
419
node_data = cls .node_class .data_class .from_dict (node_dict ['data' ])
402
420
tree .create_node (tag = node_data .to_text (), identifier = node_id , parent = parent_id , data = node_data )
403
421
404
422
for child in node_dict ['children' ]:
405
- queue .append ((child , node_id ,))
423
+ node_queue .append ((child , node_id ,))
406
424
407
425
return tree
408
426
409
427
def to_dict (self , node_id = None ):
410
- """Construct an instance a from python built-in containers.
428
+ """Transform to python built-in containers.
411
429
412
430
Parameters
413
431
----------
@@ -429,6 +447,70 @@ def to_dict(self, node_id=None):
429
447
430
448
return tree_dict
431
449
450
+ @classmethod
451
+ def from_penn (cls , data ):
452
+ """Construct an instance from Penn Treebank format."""
453
+ tree = cls ()
454
+
455
+ node_stack = _deque ()
456
+ node_stack .append ((data , None ,))
457
+
458
+ node_id = 0
459
+
460
+ while node_stack :
461
+ penn_data , parent_id = node_stack .pop ()
462
+
463
+ if not penn_data :
464
+ raise SyntaxError (f'Empty node #{ node_id } ' )
465
+
466
+ if not isinstance (penn_data [0 ], str ):
467
+ raise SyntaxError (f'First element of a node must be string, got { type (penn_data [0 ])} ' )
468
+
469
+ if len (penn_data ) == 2 and isinstance (penn_data [- 1 ], str ):
470
+ penn_data = (':' .join (penn_data ),)
471
+
472
+ node_data = cls .node_class .data_class .from_text (penn_data [0 ])
473
+ tree .create_node (tag = node_data .to_text (), identifier = node_id , parent = parent_id , data = node_data )
474
+
475
+ for child in penn_data [- 1 :0 :- 1 ]:
476
+ node_stack .append ((child , node_id ,))
477
+ node_id += 1
478
+
479
+ return tree
480
+
481
+ def to_penn (self , node_id = None , * , with_role = True , with_word = True , sep = ':' ):
482
+ """Transform to Penn Treebank format.
483
+
484
+ Parameters
485
+ ----------
486
+ node_id : int
487
+ Output the plain text format for the subtree under **node_id**.
488
+ with_role : bool
489
+ Contains role-tag or not.
490
+ with_word : bool
491
+ Contains word or not.
492
+ sep : str
493
+ The seperator between role and POS-tag.
494
+
495
+ Returns
496
+ -------
497
+ list
498
+ """
499
+ if node_id is None :
500
+ node_id = self .root
501
+ node = self [node_id ]
502
+
503
+ penn_data = [f'{ node .data .role } { sep } { node .data .pos } ' if with_role and node .data .role else node .data .pos ,]
504
+ if with_word and node .data .word :
505
+ penn_data .append (node .data .word )
506
+
507
+ for child in self .children (node_id ):
508
+ penn_data .append (self .to_penn (child .identifier , with_role = with_role , with_word = with_word , sep = sep ))
509
+
510
+ return penn_data
511
+
512
+ ########################################################################################################################
513
+
432
514
def show (self , * ,
433
515
key = lambda node : node .identifier ,
434
516
idhidden = False ,
@@ -541,10 +623,14 @@ def get_relations(self, root_id=None, *, semantic=True):
541
623
for tail in children :
542
624
if tail .data .role != 'Head' and tail not in head_children :
543
625
if tail .is_leaf ():
544
- yield ParsedRelation (head = head_node , tail = tail , relation = tail ) # pylint: disable=no-value-for-parameter
626
+ yield ParsedRelation ( # pylint: disable=no-value-for-parameter
627
+ head = head_node , tail = tail , relation = tail ,
628
+ )
545
629
else :
546
630
for node in self .get_heads (tail .identifier , semantic = semantic ):
547
- yield ParsedRelation (head = head_node , tail = node , relation = tail ) # pylint: disable=no-value-for-parameter
631
+ yield ParsedRelation ( # pylint: disable=no-value-for-parameter
632
+ head = head_node , tail = node , relation = tail ,
633
+ )
548
634
549
635
# Recursion
550
636
for child in children :
0 commit comments