diff --git a/src/main/java/com/orange/labs/conllparser/ConllSentence.java b/src/main/java/com/orange/labs/conllparser/ConllSentence.java index 977de70..a92bd6f 100644 --- a/src/main/java/com/orange/labs/conllparser/ConllSentence.java +++ b/src/main/java/com/orange/labs/conllparser/ConllSentence.java @@ -726,7 +726,8 @@ public String toString(boolean strict) { if (getText() != null) { sb.append("# text = ").append(getText().replaceAll("\n", " ").trim()).append('\n'); } else { - sb.append("# text = ").append(getSentence().replaceAll("\n", " ").trim()).append('\n'); + // do not adde "# text = ..." if absent in file and not added by user + //sb.append("# text = ").append(getSentence().replaceAll("\n", " ").trim()).append('\n'); } if (translit != null && !translit.isEmpty()) { diff --git a/src/test/java/TestConllSentence.java b/src/test/java/TestConllSentence.java index c0cc477..90cedc3 100644 --- a/src/test/java/TestConllSentence.java +++ b/src/test/java/TestConllSentence.java @@ -68,7 +68,7 @@ public void test01readSDparse() throws IOException, ConllException { SDParse sdp = new SDParse("the little sleeps mouse\nnsubj(sleeps, mouse)\ndet(mouse, the)\namod(mouse, little)"); - String ref = "# text = the little sleeps mouse\n"+ + String ref = //"# text = the little sleeps mouse\n"+ "1 the _ _ _ _ 4 det _ _\n"+ "2 little _ _ _ _ 4 amod _ _\n"+ "3 sleeps _ _ _ _ 0 root _ _\n"+ diff --git a/src/test/resources/createsubtree10cols.json b/src/test/resources/createsubtree10cols.json index 361b4a9..b362c3e 100644 --- a/src/test/resources/createsubtree10cols.json +++ b/src/test/resources/createsubtree10cols.json @@ -1,6 +1,6 @@ { "sentenceid": 0, "maxsentence": 19, - "ok": "# global.columns \u003d ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC\n# text \u003d sur la vie de Howard Hughes\n1\tsur\tsur\tADP\t_\t_\t3\tcase\t_\t_\n2\tla\tle\tDET\t_\tDefinite\u003dDef|Gender\u003dFem|Number\u003dSing|PronType\u003dArt\t3\tdet\t_\t_\n3\tvie\tvie\tNOUN\t_\tGender\u003dFem|Number\u003dSing\t0\troot\t_\t_\n4\tde\tde\tADP\t_\t_\t5\tcase\t_\t_\n5\tHoward\tHoward\tPROPN\t_\t_\t3\tnmod\t_\t_\n6\tHughes\tHughes\tPROPN\t_\t_\t5\tflat:name\t_\tSpaceAfter\u003dNo\n\n", + "ok": "# global.columns \u003d ID FORM LEMMA UPOS XPOS FEATS HEAD DEPREL DEPS MISC\n1\tsur\tsur\tADP\t_\t_\t3\tcase\t_\t_\n2\tla\tle\tDET\t_\tDefinite\u003dDef|Gender\u003dFem|Number\u003dSing|PronType\u003dArt\t3\tdet\t_\t_\n3\tvie\tvie\tNOUN\t_\tGender\u003dFem|Number\u003dSing\t0\troot\t_\t_\n4\tde\tde\tADP\t_\t_\t5\tcase\t_\t_\n5\tHoward\tHoward\tPROPN\t_\t_\t3\tnmod\t_\t_\n6\tHughes\tHughes\tPROPN\t_\t_\t5\tflat:name\t_\tSpaceAfter\u003dNo\n\n", "changes": 0 } \ No newline at end of file diff --git a/src/test/resources/createsubtree5cols.json b/src/test/resources/createsubtree5cols.json index 6bf2aac..b2f8ced 100644 --- a/src/test/resources/createsubtree5cols.json +++ b/src/test/resources/createsubtree5cols.json @@ -1,6 +1,6 @@ { "sentenceid": 1, "maxsentence": 19, - "ok": "# global.columns \u003d ID LEMMA UPOS HEAD DEPREL\n# text \u003d mais leur contenu diffère donc selon les Facultés\n1\tmais\tCCONJ\t4\tcc\n2\tson\tDET\t3\tnmod:poss\n3\tcontenu\tNOUN\t4\tnsubj\n4\tdifférer\tVERB\t0\troot\n5\tdonc\tADV\t4\tadvmod\n6\tselon\tADP\t8\tcase\n7\tle\tDET\t8\tdet\n8\tFacultés\tPROPN\t4\tobl\n\n", + "ok": "# global.columns \u003d ID LEMMA UPOS HEAD DEPREL\n1\tmais\tCCONJ\t4\tcc\n2\tson\tDET\t3\tnmod:poss\n3\tcontenu\tNOUN\t4\tnsubj\n4\tdifférer\tVERB\t0\troot\n5\tdonc\tADV\t4\tadvmod\n6\tselon\tADP\t8\tcase\n7\tle\tDET\t8\tdet\n8\tFacultés\tPROPN\t4\tobl\n\n", "changes": 0 } \ No newline at end of file diff --git a/src/test/resources/sdparse2.conllu b/src/test/resources/sdparse2.conllu index 2b8e62d..a832abd 100644 --- a/src/test/resources/sdparse2.conllu +++ b/src/test/resources/sdparse2.conllu @@ -1,4 +1,3 @@ -# text = I can can the can . 1 I _ _ _ _ 3 nsubj _ _ 2 can _ _ _ _ 3 aux _ _ 3 can _ _ _ _ 0 root _ _ diff --git a/src/test/resources/sdparse3.conllu b/src/test/resources/sdparse3.conllu index 90b1081..220d918 100644 --- a/src/test/resources/sdparse3.conllu +++ b/src/test/resources/sdparse3.conllu @@ -1,4 +1,3 @@ -# text = POS tags can be attached to ( any part of ) the sentence text . 1 POS _ NNP _ _ 2 dep _ _ 2 tags _ NNS _ _ 5 nsubjpass _ _ 3 can _ MD _ _ 5 aux _ _