TablewareBox
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_activate_compound/samples.jsonl
-3 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_activate_compound/samples.jsonl
-3
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_inhibitor/samples.jsonl
-3 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_inhibitor/samples.jsonl
-3
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_km/auto_add.sh
+27 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_km/auto_add.sh
+27
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_km/sample_file
+1 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_km/sample_file
+1
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_km/samples.jsonl
+3 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_km/samples.jsonl
+3
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_localization/samples.jsonl
-3 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_localization/samples.jsonl
-3
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_substrate/auto_add.sh
+27 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_substrate/auto_add.sh
+27
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_substrate/sample_file
+1 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_substrate/sample_file
+1
diff --git a/Diff for: ‎evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl
+2-2 b/Diff for: ‎evals/registry/data/00_scipaper_enzyme_substrate/samples.jsonl
+2-2
diff --git a/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_activate_compound.yaml
-18 b/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_activate_compound.yaml
-18
diff --git a/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_inhibitor.yaml renamed to ‎evals/registry/evals/00_scipaper_enzyme_km.yaml
+11-9 b/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_inhibitor.yaml renamed to ‎evals/registry/evals/00_scipaper_enzyme_km.yaml
+11-9
diff --git a/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_localization.yaml
-16 b/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_localization.yaml
-16
diff --git a/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_substrate.yaml
+7-6 b/Diff for: ‎evals/registry/evals/00_scipaper_enzyme_substrate.yaml
+7-6
@@ -0,0 +1,27 @@
+#!/bin/bash
+target_job=$1
+if [[ ${target_job} == "" ]]
+then
+        echo ">>> Error: target_job is not define"
+        exit
+fi
+if [[ ! -f samples.jsonl ]]
+then
+        touch samples.jsonl
+fi
+for paper in /root/uni-finder/enzyme/"${target_job}"/paper/*.pdf
+do
+        echo "find file ${paper}"
+        file_name="${paper##*/}"
+        name=${file_name%.*}
+        key_word=""
+        key_word=$(grep "${name}" samples.jsonl)
+        if [[ ${key_word} == "" ]]
+        then
+                echo "add ${name} to jsonl"
+                sed 's|target_mark|'"${name}"'|g' sample_file | sed 's|target_Job|'"${target_job}"'|g' >> samples.jsonl
+        else
+                echo "${name}: was already in the jsonl"
+        fi
+done
+
@@ -0,0 +1 @@
+{"file_name": "../uni-finder/enzyme/target_Job/paper/target_mark.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/target_mark.pdf", "answerfile_name": "../uni-finder/enzyme/target_Job/answer/target_mark.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/target_mark.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"}
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1d54b5a0607f2e1992cdc213309440f24bb630dc3a3b57bc939e32dd47079aa
+size 6846
@@ -0,0 +1,27 @@
+#!/bin/bash
+target_job=$1
+if [[ ${target_job} == "" ]]
+then
+        echo ">>> Error: target_job is not define"
+        exit
+fi
+if [[ ! -f samples.jsonl ]]
+then
+        touch samples.jsonl
+fi
+for paper in /root/uni-finder/enzyme/"${target_job}"/paper/*.pdf
+do
+        echo "find file ${paper}"
+        file_name="${paper##*/}"
+        name=${file_name%.*}
+        key_word=""
+        key_word=$(grep "${name}" samples.jsonl)
+        if [[ ${key_word} == "" ]]
+        then
+                echo "add ${name} to jsonl"
+                sed 's|target_mark|'"${name}"'|g' sample_file | sed 's|target_Job|'"${target_job}"'|g' >> samples.jsonl
+        else
+                echo "${name}: was already in the jsonl"
+        fi
+done
+
@@ -0,0 +1 @@
+{"file_name": "../uni-finder/enzyme/target_Job/paper/target_mark.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/target_mark.pdf", "answerfile_name": "../uni-finder/enzyme/target_Job/answer/target_mark.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/target_mark.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Products", "Comment (Product)"], "index": "Substrate"}
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6316846852a855013f98ee678e945582013c1269fcad311c8e933859ade77c68
-size 1919
+oid sha256:78a3b4fbbfdb149b3420f6aec13b8022e9becc6ea16370b5f2dbd23fd429c848
+size 7815
@@ -1,18 +1,20 @@
-scipaper_enzyme_inhibitor:
-  id: scipaper_enzyme_inhibitor.val.csv
+scipaper_enzyme_km:
+  id: scipaper_enzyme_km.val.csv
   metrics: [accuracy]
 
-scipaper_enzyme_inhibitor.val.csv:
+scipaper_enzyme_km.val.csv:
   class: evals.elsuite.rag_table_extract:TableExtract
   args:
-    samples_jsonl: 00_scipaper_enzyme_inhibitor/samples.jsonl
+    samples_jsonl: 00_scipaper_enzyme_km/samples.jsonl
     instructions: |
-      Please give a complete list of Inhibitor, Commentand Organism of all substrates in the paper. Usually the substrates' tags are numbers or IUPAC names.
+      Please give a complete list of Substrate, Commentand Organism of all substrates in the paper. Usually the substrates' tags are numbers or IUPAC names.
       1. Output in csv format, write units not in header but in the value like "10.5 µM". Quote the value if it has comma! For example:
       ```csv
-      Inhibitor,Comment,Organism
-      ATP,"competitive inhibition of verapamil-dependent ATPase-activity",Homo sapiens
-      p-xylene,"11.4 mM, slight inhibitor",Bos taurus
-      NH4+, 0.002 mM,Bos taurus
+      Substrate,Comment,Organism,Km Value
+      ATP,"competitive inhibition of verapamil-dependent ATPase-activity",Homo sapiens, 3.5 nM
+      p-xylene,"20 mM Tris-HCl(pH 7.0), 5 mM MgCl2, at 25 ℃"",Bos taurus, 12 nM
+      D-ribose 6-phosphate, - , Homo sapiens, 120 nM
       ```
       2. If there are multiple tables, concat them. Don't give me reference or using "...", give me complete table!
+      3. If no relevant information was found in the paper, use '-' to fill in the form in CSV.
+
@@ -7,13 +7,14 @@ scipaper_enzyme_substrate.val.csv:
   args:
     samples_jsonl: 00_scipaper_enzyme_substrate/samples.jsonl
     instructions: |
-      Please give a complete list of SMILES structures, Km values, Vmax values, target info (protein or cell line), and organism of all substrates in the paper. Usually the substrates' tags are numbers or IUPAC names.
+      Please give a complete list of Substrate, Commentand Organism of all substrates, Products and Comment of Product in the paper. Usually the substrates' tags are numbers or IUPAC names.
       1. Output in csv format, write units not in header but in the value like "10.5 µM". Quote the value if it has comma! For example:
       ```csv
-      Substrate,Inhibitors, Km value,Km max,Comment,organism,Vmax value,SMILES,Target info,Activating Compound,
-      ATP,Cu2+,0.001 mM,-,-,Homo sapiens,-,-,ATP-linker aldehyde,Carboxybenzaldehyde,
-      p-xylene,NADH,0.004 mM,-,-,Homo sapiens,-,C1CCCCC1,-,Methylbenzaldehyde
-      NADPH,benzaldehyde, 0.12 mM,125 mM,enzyme form ATP,Bos taurus,-,-,NH4+
-
+      Substrate,Comment,Organism,Products,"Comment (Product)"
+      "NADH + H+ + O2","20 mM Tris-HCl(pH 7.0)",Homo sapiens,"NAD+ + H2O", -
+      "D-glucose + 6-phosphate","20 mM Tris-HCl(pH 7.0), 5 mM MgCl2, at 25 ℃"",Bos taurus, -
+      "D-ribose 6-phosphate", - , Homo sapiens, "glycerol + phosphate", -
       ```
       2. If there are multiple tables, concat them. Don't give me reference or using "...", give me complete table!
+      3. If no relevant information was found in the paper, use '-' to fill in the form in CSV.
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+{"file_name": "../uni-finder/enzyme/target_Job/paper/target_mark.pdf", "file_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/target_mark.pdf", "answerfile_name": "../uni-finder/enzyme/target_Job/answer/target_mark.csv", "answerfile_link": "https://dp-filetrans-bj.oss-cn-beijing.aliyuncs.com/changjunhan/target_mark.csv", "compare_fields": ["Substrate", "Comment", "Organism", "Km Value (mM)"], "index": "Substrate"}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+version https://git-lfs.github.com/spec/v1`
	`2`	`+oid sha256:b1d54b5a0607f2e1992cdc213309440f24bb630dc3a3b57bc939e32dd47079aa`
	`3`	`+size 6846`