Mercurial > repos > kkonganti > cfsan_lexmapr2
view lexmapr/definitions.py @ 1:5244e7465767
"planemo upload"
author | kkonganti |
---|---|
date | Wed, 31 Aug 2022 14:32:14 -0400 |
parents | f5c39d0447be |
children |
line wrap: on
line source
"""Static definitions""" import os # root path ROOT = os.path.dirname(__file__) # URL to list of OLS ontologies where download link is given embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies' # beginning of URL to ontology PURL purl_link = 'http://purl.obolibrary.org/obo/' # directory for downloaded OWL files owl_dir = 'lexmapr/owl_files' # path to database with synonyms from predefined resources and from OWL files synonym_db = 'lexmapr/owl_files/label_synonyms.db' # path to database with all ontologies of interest ontol_db = 'lexmapr/owl_files/ontol_table.db' # ontologies of interest ontol_interest = [#'BFO', #'CHEBI', #'ENVO', 'FOODON', #'GENEPIO', 'NCBITAXON', # NCBITaxon is not valid as of April 2022 #'OGMS', #'PATO', #'PCO', #'UBERON', ] # ontology accessions that do not have labels or are placeholders as of April 2022 # will skip in database building missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370', 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021', ] # terms indicating that the metadata was not given/collected; will output empty results not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous', 'not collected','missing','unidentified','unknown','none','unamed','other', 'undetermined','not known','no history given','no source specified','null', 'unspecified','not reported','not available not collected','not isolated', 'not available','not provided','xxx','mising','misng','other','unidentified', 'not determined other','reported later','intact unknown','not determined', 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ', 'not supplied','not specified', ] # below are bin definitions # TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer' # can collect as 'for *' in text? #fo_consumer = [] fo_product = ['algal food product:FOODON_00001184', 'amphibian:FOODON_03411624', 'amphibian or reptile food product:FOODON_00002200', 'animal based refined or partially-refined food product:FOODON_00001595', 'avian egg food product:FOODON_00001105', 'avian food product:FOODON_001251', 'bakery food product:FOODON_00001626', 'cell-based technology food product:FOODON_00003376', 'dairy food product:FOODON_00001256', 'dietary supplement:FOODON_03401298', 'fish egg food product:FOODON_00001250', 'fish food product:FOODON_00001248', 'food product analog:FOODON_00001871', 'food product component:FOODON_00001714', 'fungus food product:FOODON_00001143', 'game animal food product:FOODON_00002477', 'insect food product:FOODON_00001177', 'meat food product:FOODON_00002477', 'microbial food product:FOODON_00001145', 'plant food product:FOODON_00001015', 'poultry food product:FOODON_00001283', 'prepared food product:FOODON_00001180', 'processed food product:FOODON_03311737', 'reptile egg food product:FOODON_00002199', 'seafood product:FOODON_00001046', 'shellfish food product:FOODON_00001293', 'soup food product:FOODON_00002257', 'sustainable agriculture food product:FOODON_00003375', 'vegetarian food product:FOODON_00003194', 'vertebrate animal food product:FOODON_00001092', ] fo_quality = ['food (acidified):FOODON_03301625', 'food (adulterated):FOODON_00003367', 'food (baked):FOODON_00002456', 'food (batter-coated):FOODON_00002662', 'food (blanched):FOODON_00002767', 'food (blend):FOODON_00003889', 'food (boiled):FOODON_00002688', 'food (breaded):FOODON_00002661', 'food (broiled or grilled):FOODON_00002647', 'food (canned):FOODON_00002418', 'food (chilled):FOODON_00002642', 'food (chopped):FOODON_00002777', 'food (cleaned):FOODON_00002708', 'food (colored):FOODON_00002650', 'food (comminuted):FOODON_00002754', 'food (cooked):FOODON_00001181', 'food (deep-fried):FOODON_03307052', 'food (dehydrated):FOODON_00002643', 'food (dried):FOODON_03307539', 'food (fat or oil coated):FOODON_03460233', 'food (fermented):FOODON_00001258', 'food (filled):FOODON_00002644', 'food (flavored):FOODON_00002646', 'food (freeze-dried):FOODON_03301752', 'food (fresh):FOODON_00002457', 'food (fried):FOODON_00002660', 'food (frozen):FOODON_03302148', 'food (genetically-modified):FOODON_03530251', 'food (ground):FOODON_00002713', 'food (harvested):FOODON_00003398', 'food (heat treated):FOODON_03316043', 'food (hulled):FOODON_00002720', 'food (hydrolized):FOODON_00002653', 'food (irradiated):FOODON_03305364', 'food (juiced):FOODON_00003499', 'food (liquid):FOODON_03430130', 'food (milled):FOODON_00002649', 'food (not genetically-modified):FOODON_00003379', 'food (organically grown):FOODON_03306690', 'food (packaged):FOODON_00002739', 'food (packed in high pressurised containers):FOODON_03317139', 'food (pan-fried):FOODON_00002463', 'food (paste):FOODON_00003887', 'food (pasteurized):FOODON_00002654', 'food (peeled):FOODON_00002655', 'food (pickled):FOODON_00001079', 'food (powdered):FOODON_00002976', 'food (precooked):FOODON_00002971', 'food (precooked, frozen):FOODON_03305323', 'food (preserved):FOODON_00002158', 'food (puffed):FOODON_00002656', 'food (raw):FOODON_03311126', 'food (rehydrated):FOODON_00002755', 'food (roasted):FOODON_00002744', 'food (salted):FOODON_03460173', 'food (seasoned):FOODON_00002733', 'fruit (seedless):FOODON_00003461', 'food (semiliquid):FOODON_03430103', 'food (semisolid):FOODON_03430144', 'food (sliced):FOODON_00002455', 'food (smoked or smoke-flavored):FOODON_03460172', 'food (solid):FOODON_03430151', 'food (spoiled):FOODON_00003366', 'food (starch or flour thickened):FOODON_03315268', 'food (steamed):FOODON_00002657', 'food (sugar-free):FOODON_03315838', 'food (textured):FOODON_00002658', 'food (toasted):FOODON_00002659', 'food (unprocessed):FOODON_03316056', 'food (unstandardized):FOODON_03315636', ] fo_organism = ['algae:FOODON_03411301', 'animal:FOODON_00003004', 'fungus:FOODON_03411261', 'lichen:FOODON_03412345', 'whole plant:PO_0000003', ] ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types 'Ecdysozoa:NCBITaxon_1206794', 'Echinodermata:NCBITaxon_7586', 'Fungi:NCBITaxon_4751', 'Spiralia:NCBITaxon_2697495', 'Viridiplantae:NCBITaxon_33090', 'Amphibia:NCBITaxon_8292', #'Sauropsida:NCBITaxon_8457', 'Aves:NCBITaxon_8782', 'Crocodylia:NCBITaxon_1294634', 'Testudinata:NCBITaxon_2841271', 'Lepidosauria:NCBITaxon_8504', #'Mammalia:NCBITaxon_40674', 'Artiodactyla:NCBITaxon_91561', 'Carnivora:NCBITaxon_33554', 'Chiroptera:NCBITaxon_9397', 'Chrysochloridae:NCBITaxon_9389', 'Eulipotyphla:NCBITaxon_9362', 'Hyracoidea:NCBITaxon_9810', 'Macroscelidea:NCBITaxon_28734', 'Metatheria:NCBITaxon_9263', 'Ornithorhynchidae:NCBITaxon_9256', 'Perissodactyla:NCBITaxon_9787', 'Pholidota:NCBITaxon_9971', 'Primates:NCBITaxon_9443', 'Proboscidea:NCBITaxon_9779', 'Rodentia:NCBITaxon_9989', 'Sirenia:NCBITaxon_9774', 'Tachyglossidae:NCBITaxon_9259', 'Tenrecidae:NCBITaxon_9369', 'Tubulidentata:NCBITaxon_9815', 'Xenarthra:NCBITaxon_9348', ] arg_bins = {#'fo_consumer':fo_consumer, 'fo_product':fo_product, 'fo_quality':fo_quality, 'fo_organism':fo_organism, 'ncbi_taxon':ncbi_taxon, }