Skip to content

Commit d721bff

Browse files
committed
Merge branch 'release/0.7.2'
2 parents f25f7d2 + 5aedec9 commit d721bff

File tree

11 files changed

+232
-174
lines changed

11 files changed

+232
-174
lines changed

ckipnlp/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
__copyright__ = '2018-2020 CKIP Lab'
77

88
__title__ = 'CKIPNLP'
9-
__version__ = '0.7.1'
9+
__version__ = '0.7.2'
1010
__description__ = 'CKIP CoreNLP Wrappers'
1111
__license__ = 'CC BY-NC-SA 4.0'
1212

ckipnlp/parser/__init__.py

+26-3
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,35 @@ def CkipParser(*, _=None): pass # pylint: disable=redefined-outer-name, invalid-
9191
except: # pylint: disable=bare-except
9292
pass
9393

94+
@staticmethod
95+
def normalize_text(text):
96+
"""Text normalization output.
97+
98+
Replacing keywords ``()+-:|&#`` by by full-width ones.
99+
"""
100+
return (text
101+
.replace('(', '(')
102+
.replace(')', ')')
103+
.replace('+', '+')
104+
.replace('-', '-')
105+
.replace(':', ':')
106+
.replace('|', '|')
107+
.replace('&', '&') # for tree draw
108+
.replace('#', '#') # for tree draw
109+
)
110+
94111
def __call__(self, text):
95112
return self.apply(text)
96113

97-
def apply(self, text):
114+
def apply(self, text, *, normalize=True):
98115
"""Parse a sentence.
99116
100117
Parameters
101118
----------
102119
text : str
103120
the input sentence.
121+
normalize : bool
122+
do text normalization (please refer :meth:`normalize_text`).
104123
105124
Return
106125
------
@@ -110,21 +129,25 @@ def apply(self, text):
110129
.. hint::
111130
One may also call this method as :meth:`__call__`.
112131
"""
113-
return self.apply_list([text])[0]
132+
return self.apply_list([text], normalize=normalize)[0]
114133

115-
def apply_list(self, ilist):
134+
def apply_list(self, ilist, *, normalize=True):
116135
"""Parse a list of sentences.
117136
118137
Parameters
119138
----------
120139
ilist : List[str]
121140
the list of input sentences.
141+
normalize : bool
142+
do text normalization (please refer :meth:`normalize_text`).
122143
123144
Return
124145
------
125146
List[str]
126147
the list of output sentences.
127148
"""
149+
if normalize:
150+
ilist = list(map(self.normalize_text, ilist))
128151
return self.__core.apply_list(ilist)
129152

130153
def apply_file(self, ifile, ofile):

ckipnlp/ws/__init__.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,29 @@ def CkipWs(*, _=None): pass # pylint: disable=redefined-outer-name, invalid-name
7878
except: # pylint: disable=bare-except
7979
pass
8080

81+
@staticmethod
82+
def normalize_text(text):
83+
"""Text normalization output.
84+
85+
Replacing keywords ``()+-:|&#`` by by full-width ones.
86+
"""
87+
return (text
88+
.replace('(', '(')
89+
.replace(')', ')') # for tree draw
90+
)
91+
8192
def __call__(self, text):
8293
return self.apply(text)
8394

84-
def apply(self, text):
85-
"""Segment a sentence.
95+
def apply(self, text, *, normalize=True):
96+
"""Parse a sentence.
8697
8798
Parameters
8899
----------
89100
text : str
90101
the input sentence.
102+
normalize : bool
103+
do text normalization (please refer :meth:`normalize_text`).
91104
92105
Return
93106
------
@@ -97,21 +110,25 @@ def apply(self, text):
97110
.. hint::
98111
One may also call this method as :meth:`__call__`.
99112
"""
100-
return self.apply_list([text])[0]
113+
return self.apply_list([text], normalize=normalize)[0]
101114

102-
def apply_list(self, ilist):
103-
"""Segment a list of sentences.
115+
def apply_list(self, ilist, *, normalize=True):
116+
"""Parse a list of sentences.
104117
105118
Parameters
106119
----------
107120
ilist : List[str]
108121
the list of input sentences.
122+
normalize : bool
123+
do text normalization (please refer :meth:`normalize_text`).
109124
110125
Return
111126
------
112127
List[str]
113128
the list of output sentences.
114129
"""
130+
if normalize:
131+
ilist = list(map(self.normalize_text, ilist))
115132
return self.__core.apply_list(ilist)
116133

117134
def apply_file(self, ifile, ofile, uwfile=''):

ckipparser/__init__.py

-7
This file was deleted.

ckipws/__init__.py

-10
This file was deleted.

setup.py

+67-55
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,67 @@
3232

3333
import ckipnlp as about
3434

35-
with open('README.rst', encoding='utf-8') as fin:
36-
readme = fin.read()
35+
################################################################################
36+
37+
def main():
38+
39+
with open('README.rst', encoding='utf-8') as fin:
40+
readme = fin.read()
41+
42+
setup(
43+
name=about.__name__,
44+
version=about.__version__,
45+
author=about.__author_name__,
46+
author_email=about.__author_email__,
47+
description=about.__description__,
48+
long_description=readme,
49+
long_description_content_type='text/x-rst',
50+
url=about.__url__,
51+
download_url=about.__download_url__,
52+
platforms=['linux_x86_64'],
53+
license=about.__license__,
54+
classifiers=[
55+
'Development Status :: 4 - Beta',
56+
'Environment :: Console',
57+
'Programming Language :: Python',
58+
'Programming Language :: Python :: 3',
59+
'Programming Language :: Python :: 3.5',
60+
'Programming Language :: Python :: 3.6',
61+
'Programming Language :: Python :: 3.7',
62+
'Programming Language :: Python :: 3.8',
63+
'Programming Language :: Python :: 3.9',
64+
'Programming Language :: Python :: 3 :: Only',
65+
'Programming Language :: Cython',
66+
'License :: Free for non-commercial use',
67+
'Operating System :: POSIX :: Linux',
68+
'Natural Language :: Chinese (Traditional)',
69+
],
70+
python_requires='>=3.5',
71+
packages=find_namespace_packages(include=['ckipnlp', 'ckipnlp.*',]),
72+
install_requires=[
73+
'treelib>=1.5.5',
74+
],
75+
ext_modules=cythonize(
76+
[
77+
Extension('ckipnlp._core.ws',
78+
sources=['src/ws/ckipws.pyx'],
79+
libraries=['WordSeg'],
80+
language='c++',
81+
),
82+
Extension('ckipnlp._core.parser',
83+
sources=['src/parser/ckipparser.pyx'],
84+
libraries=['CKIPCoreNLP', 'CKIPParser', 'CKIPWS', 'CKIPSRL'],
85+
language='c++',
86+
),
87+
],
88+
build_dir='build',
89+
),
90+
data_files=[],
91+
cmdclass={
92+
'install': InstallCommand,
93+
'develop': DevelopCommand,
94+
},
95+
)
3796

3897
################################################################################
3998

@@ -97,10 +156,10 @@ def finalize_options(self):
97156

98157
# subdirectory
99158
opt_subdirectory = [
100-
('ws_lib_dir', 'ws_dir', 'lib',),
101-
('ws_share_dir', 'ws_dir', '',),
102-
('parser_lib_dir', 'parser_dir', 'lib',),
103-
('parser_share_dir','parser_dir', '',),
159+
('ws_lib_dir', 'ws_dir', 'lib',),
160+
('ws_share_dir', 'ws_dir', '',),
161+
('parser_lib_dir', 'parser_dir', 'lib',),
162+
('parser_share_dir', 'parser_dir', '',),
104163

105164
('data2_dir', 'ws_share_dir', 'Data2',),
106165
('data2_dir', 'parser_share_dir', 'Data2',),
@@ -199,52 +258,5 @@ def __init__(self, *args, **kwargs):
199258

200259
################################################################################
201260

202-
setup(
203-
name=about.__name__,
204-
version=about.__version__,
205-
author=about.__author_name__,
206-
author_email=about.__author_email__,
207-
description=about.__description__,
208-
long_description=readme,
209-
long_description_content_type='text/x-rst',
210-
url=about.__url__,
211-
download_url=about.__download_url__,
212-
platforms=['linux_x86_64'],
213-
license=about.__license__,
214-
classifiers=[
215-
'Development Status :: 4 - Beta',
216-
'Environment :: Console',
217-
'Programming Language :: Python',
218-
'Programming Language :: Python :: 3',
219-
'Programming Language :: Python :: 3.5',
220-
'Programming Language :: Python :: 3.6',
221-
'Programming Language :: Python :: 3.7',
222-
'Programming Language :: Cython',
223-
'License :: Free for non-commercial use',
224-
'Operating System :: POSIX :: Linux',
225-
'Natural Language :: Chinese (Traditional)',
226-
],
227-
python_requires='>=3.5',
228-
packages=find_namespace_packages(include=['ckipnlp', 'ckipnlp.*', 'ckipws', 'ckipparser',]),
229-
install_requires=[
230-
'treelib>=1.5.5',
231-
],
232-
ext_modules=cythonize(
233-
[
234-
Extension('ckipnlp._core.ws',
235-
sources=['src/ws/ckipws.pyx'],
236-
libraries=['WordSeg'],
237-
),
238-
Extension('ckipnlp._core.parser',
239-
sources=['src/parser/ckipparser.pyx'],
240-
libraries=['CKIPCoreNLP','CKIPParser','CKIPWS','CKIPSRL'],
241-
),
242-
],
243-
build_dir='build',
244-
),
245-
data_files=[],
246-
cmdclass={
247-
'install': InstallCommand,
248-
'develop': DevelopCommand,
249-
},
250-
)
261+
if __name__ == '__main__':
262+
main()

src/parser/cckipparser.pxd

+10-10
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@ __license__ = 'CC BY-NC-SA 4.0'
66

77
cdef extern:
88

9-
ctypedef void* corenlp_t
9+
ctypedef void* corenlp_t;
1010

11-
corenlp_t CKIPCoreNLP_New()
12-
int CKIPCoreNLP_InitData(corenlp_t obj, char *FileName);
13-
int CKIPCoreNLP_ApplyFile(corenlp_t obj, char *input, char *output);
14-
int CKIPCoreNLP_ApplyList(corenlp_t obj, int length, const Py_UNICODE **inputList);
15-
int CKIPCoreNLP_Parse(corenlp_t obj, const Py_UNICODE* pwsText, Py_UNICODE** ppwsResult);
16-
int CKIPCoreNLP_ParseFile(corenlp_t obj, char *input, char *output);
17-
const Py_UNICODE* CKIPCoreNLP_GetResultBegin(corenlp_t obj);
18-
const Py_UNICODE* CKIPCoreNLP_GetResultNext(corenlp_t obj);
19-
void CKIPCoreNLP_Destroy(corenlp_t obj);
11+
corenlp_t CKIPCoreNLP_New() nogil;
12+
int CKIPCoreNLP_InitData(corenlp_t obj, char *FileName) nogil;
13+
int CKIPCoreNLP_ApplyFile(corenlp_t obj, char *input, char *output) nogil;
14+
int CKIPCoreNLP_ApplyList(corenlp_t obj, int length, const Py_UNICODE **inputList) nogil;
15+
int CKIPCoreNLP_Parse(corenlp_t obj, const Py_UNICODE* pwsText, Py_UNICODE** ppwsResult) nogil;
16+
int CKIPCoreNLP_ParseFile(corenlp_t obj, char *input, char *output) nogil;
17+
const Py_UNICODE* CKIPCoreNLP_GetResultBegin(corenlp_t obj) nogil;
18+
const Py_UNICODE* CKIPCoreNLP_GetResultNext(corenlp_t obj) nogil;
19+
void CKIPCoreNLP_Destroy(corenlp_t obj) nogil;

src/parser/ckipparser.pyx

+12-14
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@ __copyright__ = '2018-2020 CKIP Lab'
66
__license__ = 'CC BY-NC-SA 4.0'
77

88
cimport src.parser.cckipparser as cckipparser
9-
from libc.stdlib cimport malloc, free
10-
from cpython.unicode cimport PyUnicode_AsUnicode
9+
cimport cython
10+
from libcpp.vector cimport vector
1111

12+
@cython.final
1213
cdef class CkipParserCore:
1314

1415
cdef cckipparser.corenlp_t __obj
@@ -19,31 +20,28 @@ cdef class CkipParserCore:
1920
def __dealloc__(self):
2021
if self.__obj is not NULL:
2122
cckipparser.CKIPCoreNLP_Destroy(self.__obj)
22-
pass
2323

24-
def init_data(self, inifile):
24+
def init_data(self, str inifile):
2525
ret = cckipparser.CKIPCoreNLP_InitData(self.__obj, inifile.encode())
2626
if not ret:
2727
raise IOError()
2828

29-
def apply_list(self, ilist):
30-
inum = len(ilist)
29+
# def enable_logger(self):
30+
# cckipparser.CKIPCoreNLP_EnableConsoleLogger(self.__obj)
3131

32-
iarr = <const Py_UNICODE**> malloc(sizeof(const Py_UNICODE*) * inum)
33-
for i in range(inum):
34-
iarr[i] = PyUnicode_AsUnicode(ilist[i])
35-
ret = cckipparser.CKIPCoreNLP_ApplyList(self.__obj, inum, iarr)
36-
free(iarr)
32+
def apply_list(self, vector[const Py_UNICODE*] ilist):
33+
34+
ret = cckipparser.CKIPCoreNLP_ApplyList(self.__obj, ilist.size(), ilist.data())
3735
assert ret is not None
3836

39-
olist = []
37+
cdef vector[const Py_UNICODE*] olist
4038
result = cckipparser.CKIPCoreNLP_GetResultBegin(self.__obj)
4139
while result is not NULL:
42-
olist.append(result.strip())
40+
olist.push_back(result)
4341
result = cckipparser.CKIPCoreNLP_GetResultNext(self.__obj)
4442

4543
return olist
4644

47-
def apply_file(self, ifile, ofile):
45+
def apply_file(self, str ifile, str ofile):
4846
ret = cckipparser.CKIPCoreNLP_ApplyFile(self.__obj, ifile.encode(), ofile.encode())
4947
assert ret is not None

0 commit comments

Comments
 (0)