Mercurial > repos > kkonganti > cfsan_lexmapr2
diff lexmapr/definitions.py @ 0:f5c39d0447be
"planemo upload"
author | kkonganti |
---|---|
date | Wed, 31 Aug 2022 14:32:07 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lexmapr/definitions.py Wed Aug 31 14:32:07 2022 -0400 @@ -0,0 +1,210 @@ +"""Static definitions""" + +import os + + +# root path +ROOT = os.path.dirname(__file__) + +# URL to list of OLS ontologies where download link is given +embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies' + +# beginning of URL to ontology PURL +purl_link = 'http://purl.obolibrary.org/obo/' + +# directory for downloaded OWL files +owl_dir = 'lexmapr/owl_files' + +# path to database with synonyms from predefined resources and from OWL files +synonym_db = 'lexmapr/owl_files/label_synonyms.db' + +# path to database with all ontologies of interest +ontol_db = 'lexmapr/owl_files/ontol_table.db' + + +# ontologies of interest +ontol_interest = [#'BFO', + #'CHEBI', + #'ENVO', + 'FOODON', + #'GENEPIO', + 'NCBITAXON', # NCBITaxon is not valid as of April 2022 + #'OGMS', + #'PATO', + #'PCO', + #'UBERON', + ] + +# ontology accessions that do not have labels or are placeholders as of April 2022 +# will skip in database building +missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370', + 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021', + ] + +# terms indicating that the metadata was not given/collected; will output empty results +not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous', + 'not collected','missing','unidentified','unknown','none','unamed','other', + 'undetermined','not known','no history given','no source specified','null', + 'unspecified','not reported','not available not collected','not isolated', + 'not available','not provided','xxx','mising','misng','other','unidentified', + 'not determined other','reported later','intact unknown','not determined', + 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ', + 'not supplied','not specified', + ] + +# below are bin definitions +# TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer' +# can collect as 'for *' in text? +#fo_consumer = [] + +fo_product = ['algal food product:FOODON_00001184', + 'amphibian:FOODON_03411624', + 'amphibian or reptile food product:FOODON_00002200', + 'animal based refined or partially-refined food product:FOODON_00001595', + 'avian egg food product:FOODON_00001105', + 'avian food product:FOODON_001251', + 'bakery food product:FOODON_00001626', + 'cell-based technology food product:FOODON_00003376', + 'dairy food product:FOODON_00001256', + 'dietary supplement:FOODON_03401298', + 'fish egg food product:FOODON_00001250', + 'fish food product:FOODON_00001248', + 'food product analog:FOODON_00001871', + 'food product component:FOODON_00001714', + 'fungus food product:FOODON_00001143', + 'game animal food product:FOODON_00002477', + 'insect food product:FOODON_00001177', + 'meat food product:FOODON_00002477', + 'microbial food product:FOODON_00001145', + 'plant food product:FOODON_00001015', + 'poultry food product:FOODON_00001283', + 'prepared food product:FOODON_00001180', + 'processed food product:FOODON_03311737', + 'reptile egg food product:FOODON_00002199', + 'seafood product:FOODON_00001046', + 'shellfish food product:FOODON_00001293', + 'soup food product:FOODON_00002257', + 'sustainable agriculture food product:FOODON_00003375', + 'vegetarian food product:FOODON_00003194', + 'vertebrate animal food product:FOODON_00001092', + ] + +fo_quality = ['food (acidified):FOODON_03301625', + 'food (adulterated):FOODON_00003367', + 'food (baked):FOODON_00002456', + 'food (batter-coated):FOODON_00002662', + 'food (blanched):FOODON_00002767', + 'food (blend):FOODON_00003889', + 'food (boiled):FOODON_00002688', + 'food (breaded):FOODON_00002661', + 'food (broiled or grilled):FOODON_00002647', + 'food (canned):FOODON_00002418', + 'food (chilled):FOODON_00002642', + 'food (chopped):FOODON_00002777', + 'food (cleaned):FOODON_00002708', + 'food (colored):FOODON_00002650', + 'food (comminuted):FOODON_00002754', + 'food (cooked):FOODON_00001181', + 'food (deep-fried):FOODON_03307052', + 'food (dehydrated):FOODON_00002643', + 'food (dried):FOODON_03307539', + 'food (fat or oil coated):FOODON_03460233', + 'food (fermented):FOODON_00001258', + 'food (filled):FOODON_00002644', + 'food (flavored):FOODON_00002646', + 'food (freeze-dried):FOODON_03301752', + 'food (fresh):FOODON_00002457', + 'food (fried):FOODON_00002660', + 'food (frozen):FOODON_03302148', + 'food (genetically-modified):FOODON_03530251', + 'food (ground):FOODON_00002713', + 'food (harvested):FOODON_00003398', + 'food (heat treated):FOODON_03316043', + 'food (hulled):FOODON_00002720', + 'food (hydrolized):FOODON_00002653', + 'food (irradiated):FOODON_03305364', + 'food (juiced):FOODON_00003499', + 'food (liquid):FOODON_03430130', + 'food (milled):FOODON_00002649', + 'food (not genetically-modified):FOODON_00003379', + 'food (organically grown):FOODON_03306690', + 'food (packaged):FOODON_00002739', + 'food (packed in high pressurised containers):FOODON_03317139', + 'food (pan-fried):FOODON_00002463', + 'food (paste):FOODON_00003887', + 'food (pasteurized):FOODON_00002654', + 'food (peeled):FOODON_00002655', + 'food (pickled):FOODON_00001079', + 'food (powdered):FOODON_00002976', + 'food (precooked):FOODON_00002971', + 'food (precooked, frozen):FOODON_03305323', + 'food (preserved):FOODON_00002158', + 'food (puffed):FOODON_00002656', + 'food (raw):FOODON_03311126', + 'food (rehydrated):FOODON_00002755', + 'food (roasted):FOODON_00002744', + 'food (salted):FOODON_03460173', + 'food (seasoned):FOODON_00002733', + 'fruit (seedless):FOODON_00003461', + 'food (semiliquid):FOODON_03430103', + 'food (semisolid):FOODON_03430144', + 'food (sliced):FOODON_00002455', + 'food (smoked or smoke-flavored):FOODON_03460172', + 'food (solid):FOODON_03430151', + 'food (spoiled):FOODON_00003366', + 'food (starch or flour thickened):FOODON_03315268', + 'food (steamed):FOODON_00002657', + 'food (sugar-free):FOODON_03315838', + 'food (textured):FOODON_00002658', + 'food (toasted):FOODON_00002659', + 'food (unprocessed):FOODON_03316056', + 'food (unstandardized):FOODON_03315636', + ] + +fo_organism = ['algae:FOODON_03411301', + 'animal:FOODON_00003004', + 'fungus:FOODON_03411261', + 'lichen:FOODON_03412345', + 'whole plant:PO_0000003', + ] + +ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types + 'Ecdysozoa:NCBITaxon_1206794', + 'Echinodermata:NCBITaxon_7586', + 'Fungi:NCBITaxon_4751', + 'Spiralia:NCBITaxon_2697495', + 'Viridiplantae:NCBITaxon_33090', + 'Amphibia:NCBITaxon_8292', + #'Sauropsida:NCBITaxon_8457', + 'Aves:NCBITaxon_8782', + 'Crocodylia:NCBITaxon_1294634', + 'Testudinata:NCBITaxon_2841271', + 'Lepidosauria:NCBITaxon_8504', + #'Mammalia:NCBITaxon_40674', + 'Artiodactyla:NCBITaxon_91561', + 'Carnivora:NCBITaxon_33554', + 'Chiroptera:NCBITaxon_9397', + 'Chrysochloridae:NCBITaxon_9389', + 'Eulipotyphla:NCBITaxon_9362', + 'Hyracoidea:NCBITaxon_9810', + 'Macroscelidea:NCBITaxon_28734', + 'Metatheria:NCBITaxon_9263', + 'Ornithorhynchidae:NCBITaxon_9256', + 'Perissodactyla:NCBITaxon_9787', + 'Pholidota:NCBITaxon_9971', + 'Primates:NCBITaxon_9443', + 'Proboscidea:NCBITaxon_9779', + 'Rodentia:NCBITaxon_9989', + 'Sirenia:NCBITaxon_9774', + 'Tachyglossidae:NCBITaxon_9259', + 'Tenrecidae:NCBITaxon_9369', + 'Tubulidentata:NCBITaxon_9815', + 'Xenarthra:NCBITaxon_9348', + ] + +arg_bins = {#'fo_consumer':fo_consumer, + 'fo_product':fo_product, + 'fo_quality':fo_quality, + 'fo_organism':fo_organism, + 'ncbi_taxon':ncbi_taxon, + }