kkonganti@0: """Static definitions""" kkonganti@0: kkonganti@0: import os kkonganti@0: kkonganti@0: kkonganti@0: # root path kkonganti@0: ROOT = os.path.dirname(__file__) kkonganti@0: kkonganti@0: # URL to list of OLS ontologies where download link is given kkonganti@0: embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies' kkonganti@0: kkonganti@0: # beginning of URL to ontology PURL kkonganti@0: purl_link = 'http://purl.obolibrary.org/obo/' kkonganti@0: kkonganti@0: # directory for downloaded OWL files kkonganti@0: owl_dir = 'lexmapr/owl_files' kkonganti@0: kkonganti@0: # path to database with synonyms from predefined resources and from OWL files kkonganti@0: synonym_db = 'lexmapr/owl_files/label_synonyms.db' kkonganti@0: kkonganti@0: # path to database with all ontologies of interest kkonganti@0: ontol_db = 'lexmapr/owl_files/ontol_table.db' kkonganti@0: kkonganti@0: kkonganti@0: # ontologies of interest kkonganti@0: ontol_interest = [#'BFO', kkonganti@0: #'CHEBI', kkonganti@0: #'ENVO', kkonganti@0: 'FOODON', kkonganti@0: #'GENEPIO', kkonganti@0: 'NCBITAXON', # NCBITaxon is not valid as of April 2022 kkonganti@0: #'OGMS', kkonganti@0: #'PATO', kkonganti@0: #'PCO', kkonganti@0: #'UBERON', kkonganti@0: ] kkonganti@0: kkonganti@0: # ontology accessions that do not have labels or are placeholders as of April 2022 kkonganti@0: # will skip in database building kkonganti@0: missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370', kkonganti@0: 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021', kkonganti@0: ] kkonganti@0: kkonganti@0: # terms indicating that the metadata was not given/collected; will output empty results kkonganti@0: not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous', kkonganti@0: 'not collected','missing','unidentified','unknown','none','unamed','other', kkonganti@0: 'undetermined','not known','no history given','no source specified','null', kkonganti@0: 'unspecified','not reported','not available not collected','not isolated', kkonganti@0: 'not available','not provided','xxx','mising','misng','other','unidentified', kkonganti@0: 'not determined other','reported later','intact unknown','not determined', kkonganti@0: 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ', kkonganti@0: 'not supplied','not specified', kkonganti@0: ] kkonganti@0: kkonganti@0: # below are bin definitions kkonganti@0: # TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer' kkonganti@0: # can collect as 'for *' in text? kkonganti@0: #fo_consumer = [] kkonganti@0: kkonganti@0: fo_product = ['algal food product:FOODON_00001184', kkonganti@0: 'amphibian:FOODON_03411624', kkonganti@0: 'amphibian or reptile food product:FOODON_00002200', kkonganti@0: 'animal based refined or partially-refined food product:FOODON_00001595', kkonganti@0: 'avian egg food product:FOODON_00001105', kkonganti@0: 'avian food product:FOODON_001251', kkonganti@0: 'bakery food product:FOODON_00001626', kkonganti@0: 'cell-based technology food product:FOODON_00003376', kkonganti@0: 'dairy food product:FOODON_00001256', kkonganti@0: 'dietary supplement:FOODON_03401298', kkonganti@0: 'fish egg food product:FOODON_00001250', kkonganti@0: 'fish food product:FOODON_00001248', kkonganti@0: 'food product analog:FOODON_00001871', kkonganti@0: 'food product component:FOODON_00001714', kkonganti@0: 'fungus food product:FOODON_00001143', kkonganti@0: 'game animal food product:FOODON_00002477', kkonganti@0: 'insect food product:FOODON_00001177', kkonganti@0: 'meat food product:FOODON_00002477', kkonganti@0: 'microbial food product:FOODON_00001145', kkonganti@0: 'plant food product:FOODON_00001015', kkonganti@0: 'poultry food product:FOODON_00001283', kkonganti@0: 'prepared food product:FOODON_00001180', kkonganti@0: 'processed food product:FOODON_03311737', kkonganti@0: 'reptile egg food product:FOODON_00002199', kkonganti@0: 'seafood product:FOODON_00001046', kkonganti@0: 'shellfish food product:FOODON_00001293', kkonganti@0: 'soup food product:FOODON_00002257', kkonganti@0: 'sustainable agriculture food product:FOODON_00003375', kkonganti@0: 'vegetarian food product:FOODON_00003194', kkonganti@0: 'vertebrate animal food product:FOODON_00001092', kkonganti@0: ] kkonganti@0: kkonganti@0: fo_quality = ['food (acidified):FOODON_03301625', kkonganti@0: 'food (adulterated):FOODON_00003367', kkonganti@0: 'food (baked):FOODON_00002456', kkonganti@0: 'food (batter-coated):FOODON_00002662', kkonganti@0: 'food (blanched):FOODON_00002767', kkonganti@0: 'food (blend):FOODON_00003889', kkonganti@0: 'food (boiled):FOODON_00002688', kkonganti@0: 'food (breaded):FOODON_00002661', kkonganti@0: 'food (broiled or grilled):FOODON_00002647', kkonganti@0: 'food (canned):FOODON_00002418', kkonganti@0: 'food (chilled):FOODON_00002642', kkonganti@0: 'food (chopped):FOODON_00002777', kkonganti@0: 'food (cleaned):FOODON_00002708', kkonganti@0: 'food (colored):FOODON_00002650', kkonganti@0: 'food (comminuted):FOODON_00002754', kkonganti@0: 'food (cooked):FOODON_00001181', kkonganti@0: 'food (deep-fried):FOODON_03307052', kkonganti@0: 'food (dehydrated):FOODON_00002643', kkonganti@0: 'food (dried):FOODON_03307539', kkonganti@0: 'food (fat or oil coated):FOODON_03460233', kkonganti@0: 'food (fermented):FOODON_00001258', kkonganti@0: 'food (filled):FOODON_00002644', kkonganti@0: 'food (flavored):FOODON_00002646', kkonganti@0: 'food (freeze-dried):FOODON_03301752', kkonganti@0: 'food (fresh):FOODON_00002457', kkonganti@0: 'food (fried):FOODON_00002660', kkonganti@0: 'food (frozen):FOODON_03302148', kkonganti@0: 'food (genetically-modified):FOODON_03530251', kkonganti@0: 'food (ground):FOODON_00002713', kkonganti@0: 'food (harvested):FOODON_00003398', kkonganti@0: 'food (heat treated):FOODON_03316043', kkonganti@0: 'food (hulled):FOODON_00002720', kkonganti@0: 'food (hydrolized):FOODON_00002653', kkonganti@0: 'food (irradiated):FOODON_03305364', kkonganti@0: 'food (juiced):FOODON_00003499', kkonganti@0: 'food (liquid):FOODON_03430130', kkonganti@0: 'food (milled):FOODON_00002649', kkonganti@0: 'food (not genetically-modified):FOODON_00003379', kkonganti@0: 'food (organically grown):FOODON_03306690', kkonganti@0: 'food (packaged):FOODON_00002739', kkonganti@0: 'food (packed in high pressurised containers):FOODON_03317139', kkonganti@0: 'food (pan-fried):FOODON_00002463', kkonganti@0: 'food (paste):FOODON_00003887', kkonganti@0: 'food (pasteurized):FOODON_00002654', kkonganti@0: 'food (peeled):FOODON_00002655', kkonganti@0: 'food (pickled):FOODON_00001079', kkonganti@0: 'food (powdered):FOODON_00002976', kkonganti@0: 'food (precooked):FOODON_00002971', kkonganti@0: 'food (precooked, frozen):FOODON_03305323', kkonganti@0: 'food (preserved):FOODON_00002158', kkonganti@0: 'food (puffed):FOODON_00002656', kkonganti@0: 'food (raw):FOODON_03311126', kkonganti@0: 'food (rehydrated):FOODON_00002755', kkonganti@0: 'food (roasted):FOODON_00002744', kkonganti@0: 'food (salted):FOODON_03460173', kkonganti@0: 'food (seasoned):FOODON_00002733', kkonganti@0: 'fruit (seedless):FOODON_00003461', kkonganti@0: 'food (semiliquid):FOODON_03430103', kkonganti@0: 'food (semisolid):FOODON_03430144', kkonganti@0: 'food (sliced):FOODON_00002455', kkonganti@0: 'food (smoked or smoke-flavored):FOODON_03460172', kkonganti@0: 'food (solid):FOODON_03430151', kkonganti@0: 'food (spoiled):FOODON_00003366', kkonganti@0: 'food (starch or flour thickened):FOODON_03315268', kkonganti@0: 'food (steamed):FOODON_00002657', kkonganti@0: 'food (sugar-free):FOODON_03315838', kkonganti@0: 'food (textured):FOODON_00002658', kkonganti@0: 'food (toasted):FOODON_00002659', kkonganti@0: 'food (unprocessed):FOODON_03316056', kkonganti@0: 'food (unstandardized):FOODON_03315636', kkonganti@0: ] kkonganti@0: kkonganti@0: fo_organism = ['algae:FOODON_03411301', kkonganti@0: 'animal:FOODON_00003004', kkonganti@0: 'fungus:FOODON_03411261', kkonganti@0: 'lichen:FOODON_03412345', kkonganti@0: 'whole plant:PO_0000003', kkonganti@0: ] kkonganti@0: kkonganti@0: ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types kkonganti@0: 'Ecdysozoa:NCBITaxon_1206794', kkonganti@0: 'Echinodermata:NCBITaxon_7586', kkonganti@0: 'Fungi:NCBITaxon_4751', kkonganti@0: 'Spiralia:NCBITaxon_2697495', kkonganti@0: 'Viridiplantae:NCBITaxon_33090', kkonganti@0: 'Amphibia:NCBITaxon_8292', kkonganti@0: #'Sauropsida:NCBITaxon_8457', kkonganti@0: 'Aves:NCBITaxon_8782', kkonganti@0: 'Crocodylia:NCBITaxon_1294634', kkonganti@0: 'Testudinata:NCBITaxon_2841271', kkonganti@0: 'Lepidosauria:NCBITaxon_8504', kkonganti@0: #'Mammalia:NCBITaxon_40674', kkonganti@0: 'Artiodactyla:NCBITaxon_91561', kkonganti@0: 'Carnivora:NCBITaxon_33554', kkonganti@0: 'Chiroptera:NCBITaxon_9397', kkonganti@0: 'Chrysochloridae:NCBITaxon_9389', kkonganti@0: 'Eulipotyphla:NCBITaxon_9362', kkonganti@0: 'Hyracoidea:NCBITaxon_9810', kkonganti@0: 'Macroscelidea:NCBITaxon_28734', kkonganti@0: 'Metatheria:NCBITaxon_9263', kkonganti@0: 'Ornithorhynchidae:NCBITaxon_9256', kkonganti@0: 'Perissodactyla:NCBITaxon_9787', kkonganti@0: 'Pholidota:NCBITaxon_9971', kkonganti@0: 'Primates:NCBITaxon_9443', kkonganti@0: 'Proboscidea:NCBITaxon_9779', kkonganti@0: 'Rodentia:NCBITaxon_9989', kkonganti@0: 'Sirenia:NCBITaxon_9774', kkonganti@0: 'Tachyglossidae:NCBITaxon_9259', kkonganti@0: 'Tenrecidae:NCBITaxon_9369', kkonganti@0: 'Tubulidentata:NCBITaxon_9815', kkonganti@0: 'Xenarthra:NCBITaxon_9348', kkonganti@0: ] kkonganti@0: kkonganti@0: arg_bins = {#'fo_consumer':fo_consumer, kkonganti@0: 'fo_product':fo_product, kkonganti@0: 'fo_quality':fo_quality, kkonganti@0: 'fo_organism':fo_organism, kkonganti@0: 'ncbi_taxon':ncbi_taxon, kkonganti@0: }