Skip to content

Commit

Permalink
correction for warning if feature is used with UPOS which is not vali…
Browse files Browse the repository at this point in the history
…d (needs use of `--features`)
  • Loading branch information
Johannes Heinecke committed Aug 29, 2024
1 parent 484c772 commit 1fcd744
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 20 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changes

## Version 2.25.6
* correction for warning if feature is used with UPOS which is not valid (needs use of `--features`)

## Version 2.25.5
* accept sentences without `# text = ...` (but issue warning)

Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ The editor provides the following functionalities:
* adding Translit= values to the MISC column (transliterating the FORM column) see section [Transliteration](#transliteration)
* finding similar or identical sentence in a list of CoNLL-U files, see section [Find Similar Sentences](#find-similar-sentences)

Current version: 2.25.4 (see [change history](CHANGES.md))
Current version: 2.25.6 (see [change history](CHANGES.md))

ConlluEditor can also be used as front-end to display the results of dependency parsing in the same way as the editor.
* dependency tree/dependency hedge
Expand Down Expand Up @@ -223,7 +223,8 @@ together with the option `--language`
* `--features <file>` comma separated list of files, containing valid feature=value pairs (see https://github.com/UniversalDependencies/tools/tree/master/data/feat_val.ud)
in addition to feature=value pairs, a second type of lines is possible to define the list of features which are valid for a given UPOS: for instance `U:NOUN Gender Number Case`
Alternatively the new (json) format can be used (https://github.com/UniversalDependencies/tools/blob/master/data/feats.json)
together with the option `--language`
together with the option `--language`. This will highlight features which are not used with a correct
UPOS.
* `--language <lg code>` use feature and/or deprel definitions in the json files given to the `--features` and `--deprels`
options. Without `--language` only the universal features and deprels are used.
* `--include_unused` some features defined for a given languages in [feats.json](https://github.com/UniversalDependencies/tools/blob/master/data/feats.json)
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
author Johannes Heinecke
version 2.25.5 as of 19th April 2024
version 2.25.6 as of 29th August 2024
-->

<modelVersion>4.0.0</modelVersion>
<groupId>com.orange.labs</groupId>
<artifactId>ConlluEditor</artifactId>
<version>2.25.5</version>
<version>2.25.6</version>
<packaging>jar</packaging>

<properties>
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/com/orange/labs/conllparser/ConllWord.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@author Johannes Heinecke
@version 2.25.3 as of 16th March 2024
@version 2.25.6 as of 29th August 2024
*/
package com.orange.labs.conllparser;

Expand Down Expand Up @@ -190,7 +190,7 @@ public ConllWord(String conllline, List<String> lastannots, Map<String, Integer>

public ConllWord(String form) {
this(form, null);
/*
/*
dependents = new ArrayList<>();
depmap = new TreeMap<>();
this.form = form;
Expand Down Expand Up @@ -1522,7 +1522,6 @@ public JsonArray getFeaturesJson(ValidFeatures validfeats, ConllSentence.Annotat
// check whether feat/val is valid
if (validfeats != null) {
int rtc = validfeats.isValid(upostag, xpostag, f, val);

if (rtc == 1) {
jfeat.addProperty("error", "name");
ae.features++;
Expand Down
21 changes: 11 additions & 10 deletions src/main/java/com/orange/labs/conllparser/ValidFeatures.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* This library is under the 3-Clause BSD License
Copyright (c) 2020, Orange S.A.
Copyright (c) 2020-2024, Orange S.A.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Expand Down Expand Up @@ -28,7 +28,7 @@
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@author Johannes Heinecke
@version 2.9.0 as of 30th December 2020
@version 2.25.6 as of 29th August 2024
*/
package com.orange.labs.conllparser;

Expand All @@ -54,9 +54,9 @@
* Loads a file which defines all valid features and optionally gives for each
* UPOS the list of valid features names. Format: Feature1=Val1 Feature1=Val2
* ... U:NOUN Feature1 Feature2 ... X:NN Feature1 Feature2
*
*
* or
*
*
* loads the official data/feats.json file (from https://github.com/UniversalDependencies/tools)
* which defines for all UD languages valid features/values for each UPOS
* if no language is given, it loads only universal feature/value pairs
Expand Down Expand Up @@ -108,6 +108,7 @@ public ValidFeatures(List<String> filenames, String lg, boolean include_unused)
}
}
System.err.format("%d valid Features read from %s\n", validFeatures.size(), filenames.toString());
System.err.println(validFeatures);
System.err.println(uposFnames);
System.err.println(xposFnames);
}
Expand Down Expand Up @@ -142,8 +143,8 @@ public int isValid(String upos, String xpos, String fname, String fvalue) {
}
}

if (uposok || xposok) return 0;

if (uposok && xposok) return 0;
return 1;
}

Expand Down Expand Up @@ -175,12 +176,12 @@ private void readjson(String fn, String lg, boolean include_unused) throws IOExc
// if no language is given, read all features of all languages which are type=universal and doc:global, read uvalues and unused_uvalues
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fn), StandardCharsets.UTF_8));
JsonObject jfile = JsonParser.parseReader(br).getAsJsonObject();

JsonObject features = jfile.getAsJsonObject("features");
if (features == null) return;
if (lg == null) {
// get all universal define features

for (String lgcode : features.keySet()) {
JsonObject jlang = features.getAsJsonObject(lgcode);
for (String featurename : jlang.keySet()) {
Expand All @@ -189,7 +190,7 @@ private void readjson(String fn, String lg, boolean include_unused) throws IOExc
String type = feature.get("type").getAsString();
String doc = feature.get("doc").getAsString();
if (!type.equals("universal") || !doc.equals("global")) {
// we only use universal features
// we only use universal features
continue;
}

Expand All @@ -205,7 +206,7 @@ private void readjson(String fn, String lg, boolean include_unused) throws IOExc
//System.err.format("%s=%s\n", featurename, val);
addFvalue(featurename, val);
}

}
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/orange/labs/editor/ConlluEditor.java
Original file line number Diff line number Diff line change
Expand Up @@ -2477,7 +2477,7 @@ public static void main(String[] args) {
options.addOption(language);

Option validator = Option.builder("v").longOpt("validator")
.argName("file")
.argName("LANG")
.hasArg()
.desc("file with validator configuration")
.build();
Expand Down
5 changes: 3 additions & 2 deletions src/test/resources/featsvalid.json
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@
"feats": [
{
"name": "Gender",
"val": "Masc"
"val": "Masc",
"error": "name"
},
{
"name": "Number",
Expand Down Expand Up @@ -335,7 +336,7 @@
],
"info": "_",
"errors": {
"invalidFeatures": 19
"invalidFeatures": 20
},
"comments": "sentence 1\n",
"canUndo": false,
Expand Down

0 comments on commit 1fcd744

Please sign in to comment.