diff --git a/CHANGES.md b/CHANGES.md index 21dba9a..631a963 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,8 @@ # Changes +## Version 2.25.6 +* correction for warning if feature is used with UPOS which is not valid (needs use of `--features`) + ## Version 2.25.5 * accept sentences without `# text = ...` (but issue warning) diff --git a/README.md b/README.md index da3af49..c51a6ec 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ The editor provides the following functionalities: * adding Translit= values to the MISC column (transliterating the FORM column) see section [Transliteration](#transliteration) * finding similar or identical sentence in a list of CoNLL-U files, see section [Find Similar Sentences](#find-similar-sentences) -Current version: 2.25.4 (see [change history](CHANGES.md)) +Current version: 2.25.6 (see [change history](CHANGES.md)) ConlluEditor can also be used as front-end to display the results of dependency parsing in the same way as the editor. * dependency tree/dependency hedge @@ -223,7 +223,8 @@ together with the option `--language` * `--features ` comma separated list of files, containing valid feature=value pairs (see https://github.com/UniversalDependencies/tools/tree/master/data/feat_val.ud) in addition to feature=value pairs, a second type of lines is possible to define the list of features which are valid for a given UPOS: for instance `U:NOUN Gender Number Case` Alternatively the new (json) format can be used (https://github.com/UniversalDependencies/tools/blob/master/data/feats.json) -together with the option `--language` +together with the option `--language`. This will highlight features which are not used with a correct +UPOS. * `--language ` use feature and/or deprel definitions in the json files given to the `--features` and `--deprels` options. Without `--language` only the universal features and deprels are used. * `--include_unused` some features defined for a given languages in [feats.json](https://github.com/UniversalDependencies/tools/blob/master/data/feats.json) diff --git a/pom.xml b/pom.xml index 4a47040..547ca9e 100644 --- a/pom.xml +++ b/pom.xml @@ -32,13 +32,13 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. author Johannes Heinecke - version 2.25.5 as of 19th April 2024 + version 2.25.6 as of 29th August 2024 --> 4.0.0 com.orange.labs ConlluEditor - 2.25.5 + 2.25.6 jar diff --git a/src/main/java/com/orange/labs/conllparser/ConllWord.java b/src/main/java/com/orange/labs/conllparser/ConllWord.java index f3edb16..bdf318b 100644 --- a/src/main/java/com/orange/labs/conllparser/ConllWord.java +++ b/src/main/java/com/orange/labs/conllparser/ConllWord.java @@ -28,7 +28,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @author Johannes Heinecke - @version 2.25.3 as of 16th March 2024 + @version 2.25.6 as of 29th August 2024 */ package com.orange.labs.conllparser; @@ -190,7 +190,7 @@ public ConllWord(String conllline, List lastannots, Map public ConllWord(String form) { this(form, null); - /* + /* dependents = new ArrayList<>(); depmap = new TreeMap<>(); this.form = form; @@ -1522,7 +1522,6 @@ public JsonArray getFeaturesJson(ValidFeatures validfeats, ConllSentence.Annotat // check whether feat/val is valid if (validfeats != null) { int rtc = validfeats.isValid(upostag, xpostag, f, val); - if (rtc == 1) { jfeat.addProperty("error", "name"); ae.features++; diff --git a/src/main/java/com/orange/labs/conllparser/ValidFeatures.java b/src/main/java/com/orange/labs/conllparser/ValidFeatures.java index ea27860..2e983ee 100644 --- a/src/main/java/com/orange/labs/conllparser/ValidFeatures.java +++ b/src/main/java/com/orange/labs/conllparser/ValidFeatures.java @@ -1,6 +1,6 @@ /* This library is under the 3-Clause BSD License -Copyright (c) 2020, Orange S.A. +Copyright (c) 2020-2024, Orange S.A. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -28,7 +28,7 @@ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @author Johannes Heinecke - @version 2.9.0 as of 30th December 2020 + @version 2.25.6 as of 29th August 2024 */ package com.orange.labs.conllparser; @@ -54,9 +54,9 @@ * Loads a file which defines all valid features and optionally gives for each * UPOS the list of valid features names. Format: Feature1=Val1 Feature1=Val2 * ... U:NOUN Feature1 Feature2 ... X:NN Feature1 Feature2 - * + * * or - * + * * loads the official data/feats.json file (from https://github.com/UniversalDependencies/tools) * which defines for all UD languages valid features/values for each UPOS * if no language is given, it loads only universal feature/value pairs @@ -108,6 +108,7 @@ public ValidFeatures(List filenames, String lg, boolean include_unused) } } System.err.format("%d valid Features read from %s\n", validFeatures.size(), filenames.toString()); + System.err.println(validFeatures); System.err.println(uposFnames); System.err.println(xposFnames); } @@ -142,8 +143,8 @@ public int isValid(String upos, String xpos, String fname, String fvalue) { } } - - if (uposok || xposok) return 0; + + if (uposok && xposok) return 0; return 1; } @@ -175,12 +176,12 @@ private void readjson(String fn, String lg, boolean include_unused) throws IOExc // if no language is given, read all features of all languages which are type=universal and doc:global, read uvalues and unused_uvalues BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fn), StandardCharsets.UTF_8)); JsonObject jfile = JsonParser.parseReader(br).getAsJsonObject(); - + JsonObject features = jfile.getAsJsonObject("features"); if (features == null) return; if (lg == null) { // get all universal define features - + for (String lgcode : features.keySet()) { JsonObject jlang = features.getAsJsonObject(lgcode); for (String featurename : jlang.keySet()) { @@ -189,7 +190,7 @@ private void readjson(String fn, String lg, boolean include_unused) throws IOExc String type = feature.get("type").getAsString(); String doc = feature.get("doc").getAsString(); if (!type.equals("universal") || !doc.equals("global")) { - // we only use universal features + // we only use universal features continue; } @@ -205,7 +206,7 @@ private void readjson(String fn, String lg, boolean include_unused) throws IOExc //System.err.format("%s=%s\n", featurename, val); addFvalue(featurename, val); } - + } } } else { diff --git a/src/main/java/com/orange/labs/editor/ConlluEditor.java b/src/main/java/com/orange/labs/editor/ConlluEditor.java index 9636445..64c9106 100644 --- a/src/main/java/com/orange/labs/editor/ConlluEditor.java +++ b/src/main/java/com/orange/labs/editor/ConlluEditor.java @@ -2477,7 +2477,7 @@ public static void main(String[] args) { options.addOption(language); Option validator = Option.builder("v").longOpt("validator") - .argName("file") + .argName("LANG") .hasArg() .desc("file with validator configuration") .build(); diff --git a/src/test/resources/featsvalid.json b/src/test/resources/featsvalid.json index cabede9..8c2c0ec 100644 --- a/src/test/resources/featsvalid.json +++ b/src/test/resources/featsvalid.json @@ -223,7 +223,8 @@ "feats": [ { "name": "Gender", - "val": "Masc" + "val": "Masc", + "error": "name" }, { "name": "Number", @@ -335,7 +336,7 @@ ], "info": "_", "errors": { - "invalidFeatures": 19 + "invalidFeatures": 20 }, "comments": "sentence 1\n", "canUndo": false,