From 6c40b72e66879887ce6948ed3019976c89a43c8e Mon Sep 17 00:00:00 2001 From: Eszti Date: Sat, 23 Nov 2024 22:19:48 +0100 Subject: [PATCH] Prepare arg permutation pipelines --- tuw_nlp/sem/hrg/Documentation.md | 6 +++++- .../sem/hrg/pipeline/config/kbest_100.json | 1 + .../sem/hrg/pipeline/config/kbest_100_ap.json | 21 +++++++++++++++++++ .../sem/hrg/pipeline/config/kbest_200.json | 1 + .../sem/hrg/pipeline/config/kbest_200_ap.json | 21 +++++++++++++++++++ .../sem/hrg/pipeline/config/kbest_300.json | 1 + .../sem/hrg/pipeline/config/kbest_300_ap.json | 21 +++++++++++++++++++ .../sem/hrg/pipeline/config/merge_100_ap.json | 17 +++++++++++++++ .../sem/hrg/pipeline/config/merge_200_ap.json | 17 +++++++++++++++ .../sem/hrg/pipeline/config/merge_300_ap.json | 17 +++++++++++++++ .../pipeline/config/pipeline_dev_100_ap.json | 21 +++++++++++++++++++ .../pipeline/config/pipeline_dev_200_ap.json | 20 ++++++++++++++++++ .../pipeline/config/pipeline_dev_300_ap.json | 20 ++++++++++++++++++ .../hrg/pipeline/config/predict_100_ap.json | 16 ++++++++++++++ .../hrg/pipeline/config/predict_200_ap.json | 16 ++++++++++++++ .../hrg/pipeline/config/predict_300_ap.json | 16 ++++++++++++++ 16 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 tuw_nlp/sem/hrg/pipeline/config/kbest_100_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/kbest_200_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/kbest_300_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/merge_100_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/merge_200_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/merge_300_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_100_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_200_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_300_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/predict_100_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/predict_200_ap.json create mode 100644 tuw_nlp/sem/hrg/pipeline/config/predict_300_ap.json diff --git a/tuw_nlp/sem/hrg/Documentation.md b/tuw_nlp/sem/hrg/Documentation.md index 15be594..9398f4e 100644 --- a/tuw_nlp/sem/hrg/Documentation.md +++ b/tuw_nlp/sem/hrg/Documentation.md @@ -71,12 +71,16 @@ python steps/predict/merge.py -d $DATA_DIR -c pipeline/config/merge_100.json ```bash # Hrg - 100 python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_100.json +# Hrg - 100 - argument permutation +python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_100_ap.json # Hrg - 200 python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_200.json +python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_200_ap.json # Hrg - 300 python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_300.json +python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_300_ap.json ``` ### Create random predictions for comparison @@ -99,7 +103,7 @@ python pipeline/pipeline.py -d $DATA_DIR -c pipeline/config/pipeline_dev_random. ### Evaluate the predictions -We [evaluate](steps/eval/eval.py) our system using a slightly modified version of the [scorer](steps/eval/wire_scorer.py) from the [WiRe paper](https://aclanthology.org/W19-4002/) (since lsoie triples does not necessarily have a second argument, common words are only needed for predicates and first arguments in order for two triplets to match). We present the results of [all](eval/reports/dev_all.md) our systems and a filtered table for the [top estimation](eval/reports/dev_best.md). +We [evaluate](steps/eval/eval.py) our system using a slightly modified version of the [scorer](steps/eval/wire_scorer.py) from the [WiRe paper](https://aclanthology.org/W19-4002/) (since lsoie triples does not necessarily have a second argument, common words are only needed for predicates and first arguments in order for two triplets to match). We present the results of [all](pipeline/output/eval/eval_dev_all.md) our systems and a filtered table for the [top estimation](pipeline/output/eval/eval_dev_best.md). ```bash # Eval all diff --git a/tuw_nlp/sem/hrg/pipeline/config/kbest_100.json b/tuw_nlp/sem/hrg/pipeline/config/kbest_100.json index 9728821..e1aa2fa 100644 --- a/tuw_nlp/sem/hrg/pipeline/config/kbest_100.json +++ b/tuw_nlp/sem/hrg/pipeline/config/kbest_100.json @@ -1,6 +1,7 @@ { "in_dir": "dev_preproc", "out_dir": "dev_100", + "grammar_file": "hrg_100.hrg", "arg_permutation": false, "filters": { diff --git a/tuw_nlp/sem/hrg/pipeline/config/kbest_100_ap.json b/tuw_nlp/sem/hrg/pipeline/config/kbest_100_ap.json new file mode 100644 index 0000000..9fea5b8 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/kbest_100_ap.json @@ -0,0 +1,21 @@ +{ + "in_dir": "dev_preproc", + "out_dir": "dev_100_ap", + "grammar_file": "hrg_100.hrg", + "arg_permutation": true, + "filters": + { + "prec": + { + "pr_metric": "prec" + }, + "rec": + { + "pr_metric": "rec" + }, + "f1": + { + "pr_metric": "f1" + } + } +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/kbest_200.json b/tuw_nlp/sem/hrg/pipeline/config/kbest_200.json index d2c1bb1..04c07d1 100644 --- a/tuw_nlp/sem/hrg/pipeline/config/kbest_200.json +++ b/tuw_nlp/sem/hrg/pipeline/config/kbest_200.json @@ -1,6 +1,7 @@ { "in_dir": "dev_preproc", "out_dir": "dev_200", + "grammar_file": "hrg_200.hrg", "arg_permutation": false, "filters": { diff --git a/tuw_nlp/sem/hrg/pipeline/config/kbest_200_ap.json b/tuw_nlp/sem/hrg/pipeline/config/kbest_200_ap.json new file mode 100644 index 0000000..02ee88b --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/kbest_200_ap.json @@ -0,0 +1,21 @@ +{ + "in_dir": "dev_preproc", + "out_dir": "dev_200_ap", + "grammar_file": "hrg_200.hrg", + "arg_permutation": true, + "filters": + { + "prec": + { + "pr_metric": "prec" + }, + "rec": + { + "pr_metric": "rec" + }, + "f1": + { + "pr_metric": "f1" + } + } +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/kbest_300.json b/tuw_nlp/sem/hrg/pipeline/config/kbest_300.json index ebb89cb..6801d46 100644 --- a/tuw_nlp/sem/hrg/pipeline/config/kbest_300.json +++ b/tuw_nlp/sem/hrg/pipeline/config/kbest_300.json @@ -1,6 +1,7 @@ { "in_dir": "dev_preproc", "out_dir": "dev_300", + "grammar_file": "hrg_300.hrg", "arg_permutation": false, "filters": { diff --git a/tuw_nlp/sem/hrg/pipeline/config/kbest_300_ap.json b/tuw_nlp/sem/hrg/pipeline/config/kbest_300_ap.json new file mode 100644 index 0000000..7caaf8f --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/kbest_300_ap.json @@ -0,0 +1,21 @@ +{ + "in_dir": "dev_preproc", + "out_dir": "dev_300_ap", + "grammar_file": "hrg_300.hrg", + "arg_permutation": true, + "filters": + { + "prec": + { + "pr_metric": "prec" + }, + "rec": + { + "pr_metric": "rec" + }, + "f1": + { + "pr_metric": "f1" + } + } +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/merge_100_ap.json b/tuw_nlp/sem/hrg/pipeline/config/merge_100_ap.json new file mode 100644 index 0000000..49cd315 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/merge_100_ap.json @@ -0,0 +1,17 @@ +{ + "in_dir": "dev_100_ap", + "k": 10, + "bolinas_chart_filters": + [ + "max", + "basic", + "prec", + "rec", + "f1" + ], + "postprocess": + [ + "keep" + ], + "out_dir": "dev_extractions" +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/merge_200_ap.json b/tuw_nlp/sem/hrg/pipeline/config/merge_200_ap.json new file mode 100644 index 0000000..0de3251 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/merge_200_ap.json @@ -0,0 +1,17 @@ +{ + "in_dir": "dev_200_ap", + "k": 10, + "bolinas_chart_filters": + [ + "max", + "basic", + "prec", + "rec", + "f1" + ], + "postprocess": + [ + "keep" + ], + "out_dir": "dev_extractions" +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/merge_300_ap.json b/tuw_nlp/sem/hrg/pipeline/config/merge_300_ap.json new file mode 100644 index 0000000..f0861e1 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/merge_300_ap.json @@ -0,0 +1,17 @@ +{ + "in_dir": "dev_300_ap", + "k": 10, + "bolinas_chart_filters": + [ + "max", + "basic", + "prec", + "rec", + "f1" + ], + "postprocess": + [ + "keep" + ], + "out_dir": "dev_extractions" +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_100_ap.json b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_100_ap.json new file mode 100644 index 0000000..3eb8e52 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_100_ap.json @@ -0,0 +1,21 @@ +{ + "last": 5, + "steps": + [ + { + "step_name": "kbest", + "script_name": "kbest", + "config": "kbest_100_ap.json" + }, + { + "step_name": "predict", + "script_name": "predict", + "config": "predict_100_ap.json" + }, + { + "step_name": "merge", + "script_name": "merge", + "config": "merge_100_ap.json" + } + ] +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_200_ap.json b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_200_ap.json new file mode 100644 index 0000000..a180fdc --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_200_ap.json @@ -0,0 +1,20 @@ +{ + "steps": + [ + { + "step_name": "kbest", + "script_name": "kbest", + "config": "kbest_200_ap.json" + }, + { + "step_name": "predict", + "script_name": "predict", + "config": "predict_200_ap.json" + }, + { + "step_name": "merge", + "script_name": "merge", + "config": "merge_200_ap.json" + } + ] +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_300_ap.json b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_300_ap.json new file mode 100644 index 0000000..33d5c92 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/pipeline_dev_300_ap.json @@ -0,0 +1,20 @@ +{ + "steps": + [ + { + "step_name": "kbest", + "script_name": "kbest", + "config": "kbest_300_ap.json" + }, + { + "step_name": "predict", + "script_name": "predict", + "config": "predict_300_ap.json" + }, + { + "step_name": "merge", + "script_name": "merge", + "config": "merge_300_ap.json" + } + ] +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/predict_100_ap.json b/tuw_nlp/sem/hrg/pipeline/config/predict_100_ap.json new file mode 100644 index 0000000..038638e --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/predict_100_ap.json @@ -0,0 +1,16 @@ +{ + "preproc_dir": "dev_preproc", + "in_dir": "dev_100_ap", + "bolinas_chart_filters": + [ + "basic", + "max", + "prec", + "rec", + "f1" + ], + "postprocess": + [ + "keep" + ] +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/predict_200_ap.json b/tuw_nlp/sem/hrg/pipeline/config/predict_200_ap.json new file mode 100644 index 0000000..14de2c5 --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/predict_200_ap.json @@ -0,0 +1,16 @@ +{ + "preproc_dir": "dev_preproc", + "in_dir": "dev_200_ap", + "bolinas_chart_filters": + [ + "basic", + "max", + "prec", + "rec", + "f1" + ], + "postprocess": + [ + "keep" + ] +} \ No newline at end of file diff --git a/tuw_nlp/sem/hrg/pipeline/config/predict_300_ap.json b/tuw_nlp/sem/hrg/pipeline/config/predict_300_ap.json new file mode 100644 index 0000000..01244df --- /dev/null +++ b/tuw_nlp/sem/hrg/pipeline/config/predict_300_ap.json @@ -0,0 +1,16 @@ +{ + "preproc_dir": "dev_preproc", + "in_dir": "dev_300_ap", + "bolinas_chart_filters": + [ + "basic", + "max", + "prec", + "rec", + "f1" + ], + "postprocess": + [ + "keep" + ] +} \ No newline at end of file