From d6f72188afcd4d90b556bd339e6054b80be3d190 Mon Sep 17 00:00:00 2001 From: "louise.deleger" <louise.deleger@inra.fr> Date: Wed, 7 Apr 2021 16:09:11 +0200 Subject: [PATCH 1/4] additional blacklisted terms --- ancillaries/blacklist.txt | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ancillaries/blacklist.txt b/ancillaries/blacklist.txt index c505a5ce..fca79323 100644 --- a/ancillaries/blacklist.txt +++ b/ancillaries/blacklist.txt @@ -19,7 +19,7 @@ flagellum harvesting antenna host industry -IV secretion system +iv secretion system light - harvesting antenna medium membrane @@ -31,8 +31,8 @@ single polar flagellum spore supply tool -type III secretion system -type IV secretion system +type iii secretion system +type iv secretion system vector antenna clinical @@ -130,4 +130,12 @@ granular community communities well -wells \ No newline at end of file +wells +phylogenetic tree +coalescent tree +isomorphic tree +phyletic tree +gene tree +eukaryotic tree +tree of life +maximum-likelihood tree -- GitLab From 07c225a1ca9d7f77f1894408d159117c88cb40d6 Mon Sep 17 00:00:00 2001 From: "louise.deleger" <louise.deleger@inra.fr> Date: Wed, 7 Apr 2021 16:09:51 +0200 Subject: [PATCH 2/4] lower case for blacklist --- plans/tomap-habitats-generic.plan | 4 ++-- plans/tomap-habitats.plan | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plans/tomap-habitats-generic.plan b/plans/tomap-habitats-generic.plan index 7ca9a1fd..93683ca0 100644 --- a/plans/tomap-habitats-generic.plan +++ b/plans/tomap-habitats-generic.plan @@ -253,7 +253,7 @@ <!-- Remove terms with blacklisted syntactic heads --> <delete-blacklisted-heads class="Action"> - <target>documents.sections.layer:habitats[@explain_significant-head in "ancillaries/blacklisted-heads.txt" and not (@concept-name =~ " " and (str:lower(@lemma-string) ?= str:lower(@concept-name) or @score == "1.0"))]</target> + <target>documents.sections.layer:habitats[str:lower(@explain_significant-head) in "ancillaries/blacklisted-heads.txt" and not (@concept-name =~ " " and (str:lower(@lemma-string) ?= str:lower(@concept-name) or @score == "1.0"))]</target> <action>remove:habitats</action> <removeFromLayer/> </delete-blacklisted-heads> @@ -449,7 +449,7 @@ <!-- remove blacklisted terms --> <delete-blacklisted class="Action"> - <target>documents.sections.layer:habitats[@selected == "true" and @lemma-string in "ancillaries/blacklist.txt"]</target> + <target>documents.sections.layer:habitats[@selected == "true" and str:lower(@lemma-string) in "ancillaries/blacklist.txt"]</target> <action>remove:habitats</action> <removeFromLayer/> </delete-blacklisted> diff --git a/plans/tomap-habitats.plan b/plans/tomap-habitats.plan index e5dcec1c..f0f5f18e 100644 --- a/plans/tomap-habitats.plan +++ b/plans/tomap-habitats.plan @@ -449,7 +449,7 @@ <!-- remove blacklisted terms --> <delete-blacklisted class="Action"> - <target>documents.sections.layer:habitats[@selected == "true" and @lemma-string in "ancillaries/blacklist.txt"]</target> + <target>documents.sections.layer:habitats[@selected == "true" and str:lower(@lemma-string) in "ancillaries/blacklist.txt"]</target> <action>remove:habitats</action> <removeFromLayer/> </delete-blacklisted> -- GitLab From 595a2e2b7633cb90d46d252990a9477012245f32 Mon Sep 17 00:00:00 2001 From: "louise.deleger" <louise.deleger@inra.fr> Date: Wed, 7 Apr 2021 16:12:51 +0200 Subject: [PATCH 3/4] changed PUBMED_BATCHES_HOME --- config/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.yaml b/config/config.yaml index 9b2c74a1..b416bf10 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -34,7 +34,7 @@ DSMZ_RESULT : "corpora/dsmz/test-3.3.txt" ## pubmed PUBMED_FOLDER_NAME : "microbes-2019" PUBMED_CORPUS_HOME : "corpora/microbes-2019" -PUBMED_BATCHES_HOME : "corpora/microbes-2019/batch" +PUBMED_BATCHES_HOME : "corpora/pubmed/batches" PUBMED_HABITAT_RESULT : "ancillaries/Florilege/2019-12-12/PubMed-Habitat-2019-12-12.txt" PUBMED_PHENOTYPE_RESULT : "ancillaries/Florilege/2019-12-12/PubMed-Phenotype-2019-12-12.txt" -- GitLab From 3d127d0a268d354823d8582a695cc367e6e65e6d Mon Sep 17 00:00:00 2001 From: Mouhamadou Ba <mouhamadou.ba@inra.fr> Date: Wed, 7 Apr 2021 17:04:36 +0200 Subject: [PATCH 4/4] Revert "changed PUBMED_BATCHES_HOME" This reverts commit 595a2e2b7633cb90d46d252990a9477012245f32. --- config/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.yaml b/config/config.yaml index b416bf10..9b2c74a1 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -34,7 +34,7 @@ DSMZ_RESULT : "corpora/dsmz/test-3.3.txt" ## pubmed PUBMED_FOLDER_NAME : "microbes-2019" PUBMED_CORPUS_HOME : "corpora/microbes-2019" -PUBMED_BATCHES_HOME : "corpora/pubmed/batches" +PUBMED_BATCHES_HOME : "corpora/microbes-2019/batch" PUBMED_HABITAT_RESULT : "ancillaries/Florilege/2019-12-12/PubMed-Habitat-2019-12-12.txt" PUBMED_PHENOTYPE_RESULT : "ancillaries/Florilege/2019-12-12/PubMed-Phenotype-2019-12-12.txt" -- GitLab