cstrittmatter@0: """Static definitions""" cstrittmatter@0: cstrittmatter@0: import os cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: # root path cstrittmatter@0: ROOT = os.path.dirname(__file__) cstrittmatter@0: cstrittmatter@0: # URL to list of OLS ontologies where download link is given cstrittmatter@0: embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies' cstrittmatter@0: cstrittmatter@0: # beginning of URL to ontology PURL cstrittmatter@0: purl_link = 'http://purl.obolibrary.org/obo/' cstrittmatter@0: cstrittmatter@0: # directory for downloaded OWL files cstrittmatter@0: owl_dir = 'lexmapr/owl_files' cstrittmatter@0: cstrittmatter@0: # path to database with synonyms from predefined resources and from OWL files cstrittmatter@0: synonym_db = 'lexmapr/owl_files/label_synonyms.db' cstrittmatter@0: cstrittmatter@0: # path to database with all ontologies of interest cstrittmatter@0: ontol_db = 'lexmapr/owl_files/ontol_table.db' cstrittmatter@0: cstrittmatter@0: cstrittmatter@0: # ontologies of interest cstrittmatter@0: ontol_interest = [#'BFO', cstrittmatter@0: #'CHEBI', cstrittmatter@0: #'ENVO', cstrittmatter@0: 'FOODON', cstrittmatter@0: #'GENEPIO', cstrittmatter@0: 'NCBITAXON', # NCBITaxon is not valid as of April 2022 cstrittmatter@0: #'OGMS', cstrittmatter@0: #'PATO', cstrittmatter@0: #'PCO', cstrittmatter@0: #'UBERON', cstrittmatter@0: ] cstrittmatter@0: cstrittmatter@0: # ontology accessions that do not have labels or are placeholders as of April 2022 cstrittmatter@0: # will skip in database building cstrittmatter@0: missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370', cstrittmatter@0: 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021', cstrittmatter@0: ] cstrittmatter@0: cstrittmatter@0: # terms indicating that the metadata was not given/collected; will output empty results cstrittmatter@0: not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous', cstrittmatter@0: 'not collected','missing','unidentified','unknown','none','unamed','other', cstrittmatter@0: 'undetermined','not known','no history given','no source specified','null', cstrittmatter@0: 'unspecified','not reported','not available not collected','not isolated', cstrittmatter@0: 'not available','not provided','xxx','mising','misng','other','unidentified', cstrittmatter@0: 'not determined other','reported later','intact unknown','not determined', cstrittmatter@0: 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ', cstrittmatter@0: 'not supplied','not specified', cstrittmatter@0: ] cstrittmatter@0: cstrittmatter@0: # below are bin definitions cstrittmatter@0: # TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer' cstrittmatter@0: # can collect as 'for *' in text? cstrittmatter@0: #fo_consumer = [] cstrittmatter@0: cstrittmatter@0: fo_product = ['algal food product:FOODON_00001184', cstrittmatter@0: 'amphibian:FOODON_03411624', cstrittmatter@0: 'amphibian or reptile food product:FOODON_00002200', cstrittmatter@0: 'animal based refined or partially-refined food product:FOODON_00001595', cstrittmatter@0: 'avian egg food product:FOODON_00001105', cstrittmatter@0: 'avian food product:FOODON_001251', cstrittmatter@0: 'bakery food product:FOODON_00001626', cstrittmatter@0: 'cell-based technology food product:FOODON_00003376', cstrittmatter@0: 'dairy food product:FOODON_00001256', cstrittmatter@0: 'dietary supplement:FOODON_03401298', cstrittmatter@0: 'fish egg food product:FOODON_00001250', cstrittmatter@0: 'fish food product:FOODON_00001248', cstrittmatter@0: 'food product analog:FOODON_00001871', cstrittmatter@0: 'food product component:FOODON_00001714', cstrittmatter@0: 'fungus food product:FOODON_00001143', cstrittmatter@0: 'game animal food product:FOODON_00002477', cstrittmatter@0: 'insect food product:FOODON_00001177', cstrittmatter@0: 'meat food product:FOODON_00002477', cstrittmatter@0: 'microbial food product:FOODON_00001145', cstrittmatter@0: 'plant food product:FOODON_00001015', cstrittmatter@0: 'poultry food product:FOODON_00001283', cstrittmatter@0: 'prepared food product:FOODON_00001180', cstrittmatter@0: 'processed food product:FOODON_03311737', cstrittmatter@0: 'reptile egg food product:FOODON_00002199', cstrittmatter@0: 'seafood product:FOODON_00001046', cstrittmatter@0: 'shellfish food product:FOODON_00001293', cstrittmatter@0: 'soup food product:FOODON_00002257', cstrittmatter@0: 'sustainable agriculture food product:FOODON_00003375', cstrittmatter@0: 'vegetarian food product:FOODON_00003194', cstrittmatter@0: 'vertebrate animal food product:FOODON_00001092', cstrittmatter@0: ] cstrittmatter@0: cstrittmatter@0: fo_quality = ['food (acidified):FOODON_03301625', cstrittmatter@0: 'food (adulterated):FOODON_00003367', cstrittmatter@0: 'food (baked):FOODON_00002456', cstrittmatter@0: 'food (batter-coated):FOODON_00002662', cstrittmatter@0: 'food (blanched):FOODON_00002767', cstrittmatter@0: 'food (blend):FOODON_00003889', cstrittmatter@0: 'food (boiled):FOODON_00002688', cstrittmatter@0: 'food (breaded):FOODON_00002661', cstrittmatter@0: 'food (broiled or grilled):FOODON_00002647', cstrittmatter@0: 'food (canned):FOODON_00002418', cstrittmatter@0: 'food (chilled):FOODON_00002642', cstrittmatter@0: 'food (chopped):FOODON_00002777', cstrittmatter@0: 'food (cleaned):FOODON_00002708', cstrittmatter@0: 'food (colored):FOODON_00002650', cstrittmatter@0: 'food (comminuted):FOODON_00002754', cstrittmatter@0: 'food (cooked):FOODON_00001181', cstrittmatter@0: 'food (deep-fried):FOODON_03307052', cstrittmatter@0: 'food (dehydrated):FOODON_00002643', cstrittmatter@0: 'food (dried):FOODON_03307539', cstrittmatter@0: 'food (fat or oil coated):FOODON_03460233', cstrittmatter@0: 'food (fermented):FOODON_00001258', cstrittmatter@0: 'food (filled):FOODON_00002644', cstrittmatter@0: 'food (flavored):FOODON_00002646', cstrittmatter@0: 'food (freeze-dried):FOODON_03301752', cstrittmatter@0: 'food (fresh):FOODON_00002457', cstrittmatter@0: 'food (fried):FOODON_00002660', cstrittmatter@0: 'food (frozen):FOODON_03302148', cstrittmatter@0: 'food (genetically-modified):FOODON_03530251', cstrittmatter@0: 'food (ground):FOODON_00002713', cstrittmatter@0: 'food (harvested):FOODON_00003398', cstrittmatter@0: 'food (heat treated):FOODON_03316043', cstrittmatter@0: 'food (hulled):FOODON_00002720', cstrittmatter@0: 'food (hydrolized):FOODON_00002653', cstrittmatter@0: 'food (irradiated):FOODON_03305364', cstrittmatter@0: 'food (juiced):FOODON_00003499', cstrittmatter@0: 'food (liquid):FOODON_03430130', cstrittmatter@0: 'food (milled):FOODON_00002649', cstrittmatter@0: 'food (not genetically-modified):FOODON_00003379', cstrittmatter@0: 'food (organically grown):FOODON_03306690', cstrittmatter@0: 'food (packaged):FOODON_00002739', cstrittmatter@0: 'food (packed in high pressurised containers):FOODON_03317139', cstrittmatter@0: 'food (pan-fried):FOODON_00002463', cstrittmatter@0: 'food (paste):FOODON_00003887', cstrittmatter@0: 'food (pasteurized):FOODON_00002654', cstrittmatter@0: 'food (peeled):FOODON_00002655', cstrittmatter@0: 'food (pickled):FOODON_00001079', cstrittmatter@0: 'food (powdered):FOODON_00002976', cstrittmatter@0: 'food (precooked):FOODON_00002971', cstrittmatter@0: 'food (precooked, frozen):FOODON_03305323', cstrittmatter@0: 'food (preserved):FOODON_00002158', cstrittmatter@0: 'food (puffed):FOODON_00002656', cstrittmatter@0: 'food (raw):FOODON_03311126', cstrittmatter@0: 'food (rehydrated):FOODON_00002755', cstrittmatter@0: 'food (roasted):FOODON_00002744', cstrittmatter@0: 'food (salted):FOODON_03460173', cstrittmatter@0: 'food (seasoned):FOODON_00002733', cstrittmatter@0: 'fruit (seedless):FOODON_00003461', cstrittmatter@0: 'food (semiliquid):FOODON_03430103', cstrittmatter@0: 'food (semisolid):FOODON_03430144', cstrittmatter@0: 'food (sliced):FOODON_00002455', cstrittmatter@0: 'food (smoked or smoke-flavored):FOODON_03460172', cstrittmatter@0: 'food (solid):FOODON_03430151', cstrittmatter@0: 'food (spoiled):FOODON_00003366', cstrittmatter@0: 'food (starch or flour thickened):FOODON_03315268', cstrittmatter@0: 'food (steamed):FOODON_00002657', cstrittmatter@0: 'food (sugar-free):FOODON_03315838', cstrittmatter@0: 'food (textured):FOODON_00002658', cstrittmatter@0: 'food (toasted):FOODON_00002659', cstrittmatter@0: 'food (unprocessed):FOODON_03316056', cstrittmatter@0: 'food (unstandardized):FOODON_03315636', cstrittmatter@0: ] cstrittmatter@0: cstrittmatter@0: fo_organism = ['algae:FOODON_03411301', cstrittmatter@0: 'animal:FOODON_00003004', cstrittmatter@0: 'fungus:FOODON_03411261', cstrittmatter@0: 'lichen:FOODON_03412345', cstrittmatter@0: 'whole plant:PO_0000003', cstrittmatter@0: ] cstrittmatter@0: cstrittmatter@0: ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types cstrittmatter@0: 'Ecdysozoa:NCBITaxon_1206794', cstrittmatter@0: 'Echinodermata:NCBITaxon_7586', cstrittmatter@0: 'Fungi:NCBITaxon_4751', cstrittmatter@0: 'Spiralia:NCBITaxon_2697495', cstrittmatter@0: 'Viridiplantae:NCBITaxon_33090', cstrittmatter@0: 'Amphibia:NCBITaxon_8292', cstrittmatter@0: #'Sauropsida:NCBITaxon_8457', cstrittmatter@0: 'Aves:NCBITaxon_8782', cstrittmatter@0: 'Crocodylia:NCBITaxon_1294634', cstrittmatter@0: 'Testudinata:NCBITaxon_2841271', cstrittmatter@0: 'Lepidosauria:NCBITaxon_8504', cstrittmatter@0: #'Mammalia:NCBITaxon_40674', cstrittmatter@0: 'Artiodactyla:NCBITaxon_91561', cstrittmatter@0: 'Carnivora:NCBITaxon_33554', cstrittmatter@0: 'Chiroptera:NCBITaxon_9397', cstrittmatter@0: 'Chrysochloridae:NCBITaxon_9389', cstrittmatter@0: 'Eulipotyphla:NCBITaxon_9362', cstrittmatter@0: 'Hyracoidea:NCBITaxon_9810', cstrittmatter@0: 'Macroscelidea:NCBITaxon_28734', cstrittmatter@0: 'Metatheria:NCBITaxon_9263', cstrittmatter@0: 'Ornithorhynchidae:NCBITaxon_9256', cstrittmatter@0: 'Perissodactyla:NCBITaxon_9787', cstrittmatter@0: 'Pholidota:NCBITaxon_9971', cstrittmatter@0: 'Primates:NCBITaxon_9443', cstrittmatter@0: 'Proboscidea:NCBITaxon_9779', cstrittmatter@0: 'Rodentia:NCBITaxon_9989', cstrittmatter@0: 'Sirenia:NCBITaxon_9774', cstrittmatter@0: 'Tachyglossidae:NCBITaxon_9259', cstrittmatter@0: 'Tenrecidae:NCBITaxon_9369', cstrittmatter@0: 'Tubulidentata:NCBITaxon_9815', cstrittmatter@0: 'Xenarthra:NCBITaxon_9348', cstrittmatter@0: ] cstrittmatter@0: cstrittmatter@0: arg_bins = {#'fo_consumer':fo_consumer, cstrittmatter@0: 'fo_product':fo_product, cstrittmatter@0: 'fo_quality':fo_quality, cstrittmatter@0: 'fo_organism':fo_organism, cstrittmatter@0: 'ncbi_taxon':ncbi_taxon, cstrittmatter@0: }