From 030ff98a90b4f01984bd496751f0a172b7b3eff4 Mon Sep 17 00:00:00 2001 From: Robert Bossy <Robert.Bossy@inra.fr> Date: Fri, 25 Mar 2022 10:37:51 +0100 Subject: [PATCH 1/2] parametrize taxa.plan --- plans/entities.plan | 9 +++++---- plans/taxa.plan | 18 ++++++++++++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/plans/entities.plan b/plans/entities.plan index 8523ddb1..b54e298c 100644 --- a/plans/entities.plan +++ b/plans/entities.plan @@ -131,12 +131,12 @@ </param--> <param name="taxid_microorganisms"> - <alias module="taxa.microorganisms.taxids" param="mappingFile"/> + <alias module="taxa" param="taxid_microorganisms"/> <alias module="microorganisms-after-strains.taxids" param="mappingFile"/> </param> <param name="taxa+id_full"> - <alias module="taxa.dict" param="dictFile"/> + <alias module="taxa" param="taxaDict"/> </param> @@ -236,8 +236,9 @@ <setFeatures/> </set-year> - <taxa file="plans/taxa.plan"/> - + <taxa file="plans/taxa.plan"> + <sectionFilter>@name == "title" or @name == "abstract" or @name == "text"</sectionFilter> + </taxa> <!-- Project stopwords --> <stopwordsprojector class="TabularProjector"> diff --git a/plans/taxa.plan b/plans/taxa.plan index df5d51b3..a250a16d 100644 --- a/plans/taxa.plan +++ b/plans/taxa.plan @@ -1,9 +1,23 @@ <alvisnlp-plan id="taxa"> + <param name="taxaDict"> + <alias module="dict" param="dictFile"/> + </param> + + <param name="compiledDict"> + <alias module="dict" param="trieSource"/> + </param> + + <param name="sectionFilter"> + <alias module="dict" param="sectionFilter"/> + </param> + + <param name="taxid_microorganisms"> + <alias module="microorganisms.taxids" param="mappingFile"/> + </param> <dict class="TabularProjector"> - <sectionFilter>@name == "title" or @name == "abstract" or @name == "text"</sectionFilter> <targetLayerName>taxa</targetLayerName> - <dictFile>ancillaries/extended-microorganisms-taxonomy/taxa+id_full.txt</dictFile> + <!--<dictFile>ancillaries/extended-microorganisms-taxonomy/taxa+id_full.txt</dictFile>--> <!--<trieSource>ancillaries/extended-microorganisms-taxonomy/taxa+id_full.trie</trieSource>--> <matchStartCaseInsensitive/> <valueFeatures>,taxid,canonical-name,path,pos,rank,species-taxid,species-name</valueFeatures> -- GitLab From 8a1b552577ab89a0b42bd5c035cd8c210cf51aca Mon Sep 17 00:00:00 2001 From: Robert Bossy <Robert.Bossy@inra.fr> Date: Fri, 25 Mar 2022 11:59:32 +0100 Subject: [PATCH 2/2] all plans reference taxa.plan. removed taxa_generic.plan --- plans/map_microorganisms.plan | 12 ++--- plans/taxa_generic.plan | 85 ----------------------------------- 2 files changed, 4 insertions(+), 93 deletions(-) delete mode 100644 plans/taxa_generic.plan diff --git a/plans/map_microorganisms.plan b/plans/map_microorganisms.plan index 06d8a3c4..c4d29456 100644 --- a/plans/map_microorganisms.plan +++ b/plans/map_microorganisms.plan @@ -8,19 +8,15 @@ <alias module="print-mapping" param="corpusFile"/> </param> - <param name="taxid_microorganisms"> + <param name="taxid_microorganisms"> <alias module="microorganisms-after-strains.taxids" param="mappingFile"/> - <alias module="taxa.microorganisms.taxids" param="mappingFile"/> + <alias module="taxa" param="taxid_microorganisms"/> </param> <param name="taxa+id_full"> - <alias module="taxa.dict" param="dictFile"/> + <alias module="taxa.dict" param="taxaDict"/> </param> - <param name="NCBI_taxa_ontobiotope"> - <alias module="taxa.dict" param="dictFile"/> - </param> - <read class="TextFileReader"> @@ -28,7 +24,7 @@ <linesLimit>1</linesLimit> </read> - <taxa file="plans/taxa_generic.plan"/> + <taxa file="plans/taxa.plan"/> <syntax file="plans/syntax.plan"/> diff --git a/plans/taxa_generic.plan b/plans/taxa_generic.plan deleted file mode 100644 index c7b8d74c..00000000 --- a/plans/taxa_generic.plan +++ /dev/null @@ -1,85 +0,0 @@ -<alvisnlp-plan id="taxa"> - <dict class="TabularProjector"> - <targetLayerName>taxa</targetLayerName> - <matchStartCaseInsensitive/> - <valueFeatures>,taxid,canonical-name,path,pos,rank,species-taxid,species-name</valueFeatures> - <constantAnnotationFeatures>source=NCBI</constantAnnotationFeatures> - </dict> - - <disambiguate> - <not-ambiguous class="Action"> - <target>documents.sections.layer:taxa[not span:taxa]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </not-ambiguous> - - <disambiguate-coreferences class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous != "yes" and $ as x.(section.nav:sections-before.layer:taxa|before:taxa)[@not-ambiguous == "yes" and @taxid == x.@taxid]]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </disambiguate-coreferences> - - <disambiguate-coreferences-more-specific class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous != "yes" and $ as x.(section.nav:sections-before.layer:taxa|before:taxa)[@not-ambiguous == "yes" and x.@path ^= @path]]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </disambiguate-coreferences-more-specific> - - <disambiguate-coreference-more-general class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous != "yes" and $ as x.(section.nav:sections-before.layer:taxa|before:taxa)[@not-ambiguous == "yes" and @path ^= x.@path]]</target> - <action>set:feat:not-ambiguous("yes")</action> - <setFeatures/> - </disambiguate-coreference-more-general> - - <remove-alternative-to-not-ambiguous class="Action"> - <target>documents.sections.layer:taxa[@not-ambiguous == "yes"].span:taxa[@not-ambiguous != "yes"]</target> - <action>remove:taxa</action> - <removeFromLayer/> - </remove-alternative-to-not-ambiguous> - </disambiguate> - - <!-- - --> - - <!-- tag microorganisms --> - <microorganisms> - <taxids class="FileMapper"> - <target>documents.sections.layer:taxa</target> - <form>@taxid</form> - <targetFeatures>microorganism</targetFeatures> - </taxids> - - <layer class="Action"> - <target>documents.sections.layer:taxa[@microorganism]</target> - <action>add:microorganism</action> - <addToLayer/> - </layer> - - <overlaps class="RemoveOverlaps"> - <layerName>microorganism</layerName> - </overlaps> - </microorganisms> - - <!-- tag bacteria --> - <bacteria> - <tag class="Action"> - <target>documents.sections.layer:taxa[@microorganism and @path ^= "/ncbi:1/ncbi:131567/ncbi:2/"]</target> - <action>set:feat:bacteria("true")|add:bacteria</action> - <setFeatures/> - <addToLayer/> - </tag> - - <check class="Assert"> - <target>$</target> - <assertion>documents.sections.layer:bacteria</assertion> - <message>"no bacteria, maybe the Eubacteria path is wrong"</message> - <severe>false</severe> - </check> - - <overlaps class="RemoveOverlaps"> - <layerName>bacteria</layerName> - </overlaps> - </bacteria> - - <strains file="plans/strains-1.plan"/> -</alvisnlp-plan> -- GitLab