Skip to content

Commit 316f782

Browse files
authored
Merge pull request #103 from bab2min/dev_irregular
Fixed minor bugs
2 parents b2fe5e6 + c403ab0 commit 316f782

File tree

6 files changed

+48
-32
lines changed

6 files changed

+48
-32
lines changed

ModelGenerator/combiningRule.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ VV E
1010

1111
V,XSV,XSA E
1212
^하 어 해,하)(여
13-
^하 다$ 타,)다
14-
^하 도록$ 토)록,)도록-3
15-
^하 게$ 케),)게-3
16-
^하 지$ 치),)지-3
17-
^하 건대$ 컨)대,)건대-3
13+
^하 다$ 타-0.1,)다
14+
^하 도록$ 토)록-0.1,)도록-3
15+
^하 게$ 케)-0.1,)게-3
16+
^하 지$ 치)-0.1,)지-3
17+
^하 건대$ 컨)대-0.1,)건대-3
1818

1919
VX E
2020
ᆯ [ᆫᆸᆺᆯ나-니바-비사-시오] )\2

ModelGenerator/morphemes.txt

+10-7
Original file line numberDiff line numberDiff line change
@@ -858,7 +858,7 @@
858858
주 NNG 3300
859859
새 MM 3298
860860
옮기 VV 3296
861-
그니까 MAJ 3295
861+
그니까 MAJ 3295 =그러니까
862862
목적 NNG 3295
863863
조선 NNP 3293
864864
논의 NNG 3293
@@ -4210,7 +4210,7 @@
42104210
수지 NNG 488
42114211
각자 NNG 487
42124212
유기 NNG 487
4213-
왜냐면 MAG 487
4213+
왜냐면 MAG 487 =왜냐하면
42144214
기 XSN 487
42154215
체질 NNG 487
42164216
스승 NNG 487
@@ -7286,7 +7286,7 @@
72867286
보선 NNG 219
72877287
에요 EC 219
72887288
유쾌 XR 219
7289-
그러면은 MAJ 219
7289+
그러면은 MAJ 219 =그러면
72907290
장기간 NNG 218
72917291
체 NNG 218
72927292
달구 VV 218
@@ -8005,7 +8005,7 @@
80058005
타수 NNG 189
80068006
신흥 NNG 189
80078007
기부금 NNG 189
8008-
그쵸 IC 189
8008+
그쵸 IC 189 =그죠
80098009
실무자 NNG 188
80108010
식용유 NNG 188
80118011
닭고기 NNG 188
@@ -12043,7 +12043,7 @@ SK텔레콤 NNP 103
1204312043
사이즈 NNG 102
1204412044
기성세대 NNG 102
1204512045
악영향 NNG 102
12046-
그니깐 MAJ 102
12046+
그니깐 MAJ 102 =그러니깐
1204712047
운영비 NNG 102
1204812048
사무국 NNG 102
1204912049
소크라테스 NNP 102
@@ -12108,7 +12108,7 @@ SK텔레콤 NNP 103
1210812108
청취자 NNG 101
1210912109
그때그때 MAG 101
1211012110
골키퍼 NNG 101
12111-
긍까 MAJ 101
12111+
긍까 MAJ 101 =그러니까
1211212112
자칭 NNG 101
1211312113
남대문 NNP 101
1211412114
찬스 NNG 101
@@ -28702,7 +28702,7 @@ LG유플러스 NNP 24
2870228702
왜코벌 NNG 24
2870328703
언론업 NNG 24
2870428704
콜롯세움 NNP 24
28705-
그니까는 MAJ 24
28705+
그니까는 MAJ 24 =그러니깐
2870628706
형준 NNP 24
2870728707
어쨌든 MAJ 24
2870828708
쎄이 NNG 24
@@ -68506,3 +68506,6 @@ CNN NNP 5
6850668506
깔라만시 NNG 5
6850768507
스벅 NNP 5 =스타벅스
6850868508
짭쪼롬 XR 5 =짭짤
68509+
이르케 MAG 5 =이렇게
68510+
이케 MAG 5 =이렇게
68511+
어트게 MAG 5 =어떻게

ModelGenerator/sj.knlm

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:46c57e1aee279b9f99ffdd8f5f928091af54fd59060297aeb1176b99e6f4be1f
3-
size 35293503
2+
oid sha256:b90ac65c73daf7d27ad6b360d85cd25c0ad1a1570555c49bc1e3c7f4296462d1
3+
size 35290069

ModelGenerator/sj.morph

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:ad2ebfa617af8a426bede605035e462b00f4fc067bd2cc0e19bad6bc50ef88f7
3-
size 3183002
2+
oid sha256:ad6c9a90eae080dd1fe2201d6be2e9c808de1b6ae8422a8cac13a672f5bf8815
3+
size 3183135

src/Combiner.cpp

+9-1
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,9 @@ Vector<char16_t> RuleSet::getVocabList(const Vector<Pattern::Node>& nodes)
391391
using CRange = pair<const char16_t*, const char16_t*>;
392392
Vector<char16_t> ret;
393393
ret.emplace_back(0);
394+
ret.emplace_back(1);
395+
ret.emplace_back(2);
396+
ret.emplace_back(3);
394397
Vector<CRange> ranges;
395398

396399
for (auto& n : nodes)
@@ -1140,7 +1143,12 @@ pair<KString, size_t> CompiledRule::combineOneImpl(
11401143
{
11411144
for (auto& p : mapbox::util::apply_visitor(CombineVisitor{ leftForm, rightForm }, dfa[it->second]))
11421145
{
1143-
return make_pair(p.str, p.rightBegin);
1146+
if(p.score >= 0) return make_pair(p.str, p.rightBegin);
1147+
KString ret;
1148+
ret.reserve(leftForm.size() + rightForm.size());
1149+
ret.insert(ret.end(), leftForm.begin(), leftForm.end());
1150+
ret.insert(ret.end(), rightForm.begin(), rightForm.end());
1151+
return make_pair(ret, leftForm.size());
11441152
}
11451153
}
11461154

test/test_combiner.cpp

+20-15
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ TEST(KiwiCppCombiner, Joiner)
6666
joiner.add(u"", POSTag::ef);
6767
EXPECT_EQ(joiner.getU16(), u"하셨다");
6868

69+
joiner = rule.newJoiner();
70+
joiner.add(u"", POSTag::vv);
71+
joiner.add(u"", POSTag::ef);
72+
EXPECT_EQ(joiner.getU16(), u"하다");
73+
6974
joiner = rule.newJoiner();
7075
joiner.add(u"", POSTag::vv);
7176
joiner.add(u"어서", POSTag::ec);
@@ -98,6 +103,21 @@ TEST(KiwiCppCombiner, Joiner)
98103
joiner.add(u"", POSTag::ef);
99104
joiner.add(u"!", POSTag::sf);
100105
EXPECT_EQ(joiner.getU16(), u"작은 소리라도 들어!");
106+
107+
joiner = rule.newJoiner();
108+
joiner.add(u"", POSTag::np);
109+
joiner.add(u"", POSTag::jks);
110+
joiner.add(u"", POSTag::vvi);
111+
joiner.add(u"", POSTag::ep);
112+
joiner.add(u"", POSTag::ef);
113+
EXPECT_EQ(joiner.getU16(), u"내가 물었다");
114+
115+
joiner = rule.newJoiner();
116+
joiner.add(u"", POSTag::vv);
117+
joiner.add(u"", POSTag::ec);
118+
joiner.add(u"", POSTag::vx);
119+
joiner.add(u"", POSTag::ef);
120+
EXPECT_EQ(joiner.getU16(), u"돼지다");
101121
}
102122

103123
TEST(KiwiCppCombiner, Allomorph)
@@ -172,19 +192,4 @@ TEST(KiwiCppCombiner, Allomorph)
172192
joiner.add(u"", POSTag::vv);
173193
joiner.add(u"", POSTag::ef);
174194
EXPECT_EQ(joiner.getU16(), u"날아");
175-
176-
joiner = rule.newJoiner();
177-
joiner.add(u"", POSTag::np);
178-
joiner.add(u"", POSTag::jks);
179-
joiner.add(u"", POSTag::vvi);
180-
joiner.add(u"", POSTag::ep);
181-
joiner.add(u"", POSTag::ef);
182-
EXPECT_EQ(joiner.getU16(), u"내가 물었다");
183-
184-
joiner = rule.newJoiner();
185-
joiner.add(u"", POSTag::vv);
186-
joiner.add(u"", POSTag::ec);
187-
joiner.add(u"", POSTag::vx);
188-
joiner.add(u"", POSTag::ef);
189-
EXPECT_EQ(joiner.getU16(), u"돼지다");
190195
}

0 commit comments

Comments
 (0)