annotate lexmapr/definitions.py @ 4:819eff1bd7ac tip

"planemo upload"
author cstrittmatter
date Wed, 29 Jun 2022 15:30:52 -0400
parents f298f3e5c515
children
rev   line source
cstrittmatter@0 1 """Static definitions"""
cstrittmatter@0 2
cstrittmatter@0 3 import os
cstrittmatter@0 4
cstrittmatter@0 5
cstrittmatter@0 6 # root path
cstrittmatter@0 7 ROOT = os.path.dirname(__file__)
cstrittmatter@0 8
cstrittmatter@0 9 # URL to list of OLS ontologies where download link is given
cstrittmatter@0 10 embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies'
cstrittmatter@0 11
cstrittmatter@0 12 # beginning of URL to ontology PURL
cstrittmatter@0 13 purl_link = 'http://purl.obolibrary.org/obo/'
cstrittmatter@0 14
cstrittmatter@0 15 # directory for downloaded OWL files
cstrittmatter@0 16 owl_dir = 'lexmapr/owl_files'
cstrittmatter@0 17
cstrittmatter@0 18 # path to database with synonyms from predefined resources and from OWL files
cstrittmatter@0 19 synonym_db = 'lexmapr/owl_files/label_synonyms.db'
cstrittmatter@0 20
cstrittmatter@0 21 # path to database with all ontologies of interest
cstrittmatter@0 22 ontol_db = 'lexmapr/owl_files/ontol_table.db'
cstrittmatter@0 23
cstrittmatter@0 24
cstrittmatter@0 25 # ontologies of interest
cstrittmatter@0 26 ontol_interest = [#'BFO',
cstrittmatter@0 27 #'CHEBI',
cstrittmatter@0 28 #'ENVO',
cstrittmatter@0 29 'FOODON',
cstrittmatter@0 30 #'GENEPIO',
cstrittmatter@0 31 'NCBITAXON', # NCBITaxon is not valid as of April 2022
cstrittmatter@0 32 #'OGMS',
cstrittmatter@0 33 #'PATO',
cstrittmatter@0 34 #'PCO',
cstrittmatter@0 35 #'UBERON',
cstrittmatter@0 36 ]
cstrittmatter@0 37
cstrittmatter@0 38 # ontology accessions that do not have labels or are placeholders as of April 2022
cstrittmatter@0 39 # will skip in database building
cstrittmatter@0 40 missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370',
cstrittmatter@0 41 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021',
cstrittmatter@0 42 ]
cstrittmatter@0 43
cstrittmatter@0 44 # terms indicating that the metadata was not given/collected; will output empty results
cstrittmatter@0 45 not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous',
cstrittmatter@0 46 'not collected','missing','unidentified','unknown','none','unamed','other',
cstrittmatter@0 47 'undetermined','not known','no history given','no source specified','null',
cstrittmatter@0 48 'unspecified','not reported','not available not collected','not isolated',
cstrittmatter@0 49 'not available','not provided','xxx','mising','misng','other','unidentified',
cstrittmatter@0 50 'not determined other','reported later','intact unknown','not determined',
cstrittmatter@0 51 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ',
cstrittmatter@0 52 'not supplied','not specified',
cstrittmatter@0 53 ]
cstrittmatter@0 54
cstrittmatter@0 55 # below are bin definitions
cstrittmatter@0 56 # TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer'
cstrittmatter@0 57 # can collect as 'for *' in text?
cstrittmatter@0 58 #fo_consumer = []
cstrittmatter@0 59
cstrittmatter@0 60 fo_product = ['algal food product:FOODON_00001184',
cstrittmatter@0 61 'amphibian:FOODON_03411624',
cstrittmatter@0 62 'amphibian or reptile food product:FOODON_00002200',
cstrittmatter@0 63 'animal based refined or partially-refined food product:FOODON_00001595',
cstrittmatter@0 64 'avian egg food product:FOODON_00001105',
cstrittmatter@0 65 'avian food product:FOODON_001251',
cstrittmatter@0 66 'bakery food product:FOODON_00001626',
cstrittmatter@0 67 'cell-based technology food product:FOODON_00003376',
cstrittmatter@0 68 'dairy food product:FOODON_00001256',
cstrittmatter@0 69 'dietary supplement:FOODON_03401298',
cstrittmatter@0 70 'fish egg food product:FOODON_00001250',
cstrittmatter@0 71 'fish food product:FOODON_00001248',
cstrittmatter@0 72 'food product analog:FOODON_00001871',
cstrittmatter@0 73 'food product component:FOODON_00001714',
cstrittmatter@0 74 'fungus food product:FOODON_00001143',
cstrittmatter@0 75 'game animal food product:FOODON_00002477',
cstrittmatter@0 76 'insect food product:FOODON_00001177',
cstrittmatter@0 77 'meat food product:FOODON_00002477',
cstrittmatter@0 78 'microbial food product:FOODON_00001145',
cstrittmatter@0 79 'plant food product:FOODON_00001015',
cstrittmatter@0 80 'poultry food product:FOODON_00001283',
cstrittmatter@0 81 'prepared food product:FOODON_00001180',
cstrittmatter@0 82 'processed food product:FOODON_03311737',
cstrittmatter@0 83 'reptile egg food product:FOODON_00002199',
cstrittmatter@0 84 'seafood product:FOODON_00001046',
cstrittmatter@0 85 'shellfish food product:FOODON_00001293',
cstrittmatter@0 86 'soup food product:FOODON_00002257',
cstrittmatter@0 87 'sustainable agriculture food product:FOODON_00003375',
cstrittmatter@0 88 'vegetarian food product:FOODON_00003194',
cstrittmatter@0 89 'vertebrate animal food product:FOODON_00001092',
cstrittmatter@0 90 ]
cstrittmatter@0 91
cstrittmatter@0 92 fo_quality = ['food (acidified):FOODON_03301625',
cstrittmatter@0 93 'food (adulterated):FOODON_00003367',
cstrittmatter@0 94 'food (baked):FOODON_00002456',
cstrittmatter@0 95 'food (batter-coated):FOODON_00002662',
cstrittmatter@0 96 'food (blanched):FOODON_00002767',
cstrittmatter@0 97 'food (blend):FOODON_00003889',
cstrittmatter@0 98 'food (boiled):FOODON_00002688',
cstrittmatter@0 99 'food (breaded):FOODON_00002661',
cstrittmatter@0 100 'food (broiled or grilled):FOODON_00002647',
cstrittmatter@0 101 'food (canned):FOODON_00002418',
cstrittmatter@0 102 'food (chilled):FOODON_00002642',
cstrittmatter@0 103 'food (chopped):FOODON_00002777',
cstrittmatter@0 104 'food (cleaned):FOODON_00002708',
cstrittmatter@0 105 'food (colored):FOODON_00002650',
cstrittmatter@0 106 'food (comminuted):FOODON_00002754',
cstrittmatter@0 107 'food (cooked):FOODON_00001181',
cstrittmatter@0 108 'food (deep-fried):FOODON_03307052',
cstrittmatter@0 109 'food (dehydrated):FOODON_00002643',
cstrittmatter@0 110 'food (dried):FOODON_03307539',
cstrittmatter@0 111 'food (fat or oil coated):FOODON_03460233',
cstrittmatter@0 112 'food (fermented):FOODON_00001258',
cstrittmatter@0 113 'food (filled):FOODON_00002644',
cstrittmatter@0 114 'food (flavored):FOODON_00002646',
cstrittmatter@0 115 'food (freeze-dried):FOODON_03301752',
cstrittmatter@0 116 'food (fresh):FOODON_00002457',
cstrittmatter@0 117 'food (fried):FOODON_00002660',
cstrittmatter@0 118 'food (frozen):FOODON_03302148',
cstrittmatter@0 119 'food (genetically-modified):FOODON_03530251',
cstrittmatter@0 120 'food (ground):FOODON_00002713',
cstrittmatter@0 121 'food (harvested):FOODON_00003398',
cstrittmatter@0 122 'food (heat treated):FOODON_03316043',
cstrittmatter@0 123 'food (hulled):FOODON_00002720',
cstrittmatter@0 124 'food (hydrolized):FOODON_00002653',
cstrittmatter@0 125 'food (irradiated):FOODON_03305364',
cstrittmatter@0 126 'food (juiced):FOODON_00003499',
cstrittmatter@0 127 'food (liquid):FOODON_03430130',
cstrittmatter@0 128 'food (milled):FOODON_00002649',
cstrittmatter@0 129 'food (not genetically-modified):FOODON_00003379',
cstrittmatter@0 130 'food (organically grown):FOODON_03306690',
cstrittmatter@0 131 'food (packaged):FOODON_00002739',
cstrittmatter@0 132 'food (packed in high pressurised containers):FOODON_03317139',
cstrittmatter@0 133 'food (pan-fried):FOODON_00002463',
cstrittmatter@0 134 'food (paste):FOODON_00003887',
cstrittmatter@0 135 'food (pasteurized):FOODON_00002654',
cstrittmatter@0 136 'food (peeled):FOODON_00002655',
cstrittmatter@0 137 'food (pickled):FOODON_00001079',
cstrittmatter@0 138 'food (powdered):FOODON_00002976',
cstrittmatter@0 139 'food (precooked):FOODON_00002971',
cstrittmatter@0 140 'food (precooked, frozen):FOODON_03305323',
cstrittmatter@0 141 'food (preserved):FOODON_00002158',
cstrittmatter@0 142 'food (puffed):FOODON_00002656',
cstrittmatter@0 143 'food (raw):FOODON_03311126',
cstrittmatter@0 144 'food (rehydrated):FOODON_00002755',
cstrittmatter@0 145 'food (roasted):FOODON_00002744',
cstrittmatter@0 146 'food (salted):FOODON_03460173',
cstrittmatter@0 147 'food (seasoned):FOODON_00002733',
cstrittmatter@0 148 'fruit (seedless):FOODON_00003461',
cstrittmatter@0 149 'food (semiliquid):FOODON_03430103',
cstrittmatter@0 150 'food (semisolid):FOODON_03430144',
cstrittmatter@0 151 'food (sliced):FOODON_00002455',
cstrittmatter@0 152 'food (smoked or smoke-flavored):FOODON_03460172',
cstrittmatter@0 153 'food (solid):FOODON_03430151',
cstrittmatter@0 154 'food (spoiled):FOODON_00003366',
cstrittmatter@0 155 'food (starch or flour thickened):FOODON_03315268',
cstrittmatter@0 156 'food (steamed):FOODON_00002657',
cstrittmatter@0 157 'food (sugar-free):FOODON_03315838',
cstrittmatter@0 158 'food (textured):FOODON_00002658',
cstrittmatter@0 159 'food (toasted):FOODON_00002659',
cstrittmatter@0 160 'food (unprocessed):FOODON_03316056',
cstrittmatter@0 161 'food (unstandardized):FOODON_03315636',
cstrittmatter@0 162 ]
cstrittmatter@0 163
cstrittmatter@0 164 fo_organism = ['algae:FOODON_03411301',
cstrittmatter@0 165 'animal:FOODON_00003004',
cstrittmatter@0 166 'fungus:FOODON_03411261',
cstrittmatter@0 167 'lichen:FOODON_03412345',
cstrittmatter@0 168 'whole plant:PO_0000003',
cstrittmatter@0 169 ]
cstrittmatter@0 170
cstrittmatter@0 171 ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types
cstrittmatter@0 172 'Ecdysozoa:NCBITaxon_1206794',
cstrittmatter@0 173 'Echinodermata:NCBITaxon_7586',
cstrittmatter@0 174 'Fungi:NCBITaxon_4751',
cstrittmatter@0 175 'Spiralia:NCBITaxon_2697495',
cstrittmatter@0 176 'Viridiplantae:NCBITaxon_33090',
cstrittmatter@0 177 'Amphibia:NCBITaxon_8292',
cstrittmatter@0 178 #'Sauropsida:NCBITaxon_8457',
cstrittmatter@0 179 'Aves:NCBITaxon_8782',
cstrittmatter@0 180 'Crocodylia:NCBITaxon_1294634',
cstrittmatter@0 181 'Testudinata:NCBITaxon_2841271',
cstrittmatter@0 182 'Lepidosauria:NCBITaxon_8504',
cstrittmatter@0 183 #'Mammalia:NCBITaxon_40674',
cstrittmatter@0 184 'Artiodactyla:NCBITaxon_91561',
cstrittmatter@0 185 'Carnivora:NCBITaxon_33554',
cstrittmatter@0 186 'Chiroptera:NCBITaxon_9397',
cstrittmatter@0 187 'Chrysochloridae:NCBITaxon_9389',
cstrittmatter@0 188 'Eulipotyphla:NCBITaxon_9362',
cstrittmatter@0 189 'Hyracoidea:NCBITaxon_9810',
cstrittmatter@0 190 'Macroscelidea:NCBITaxon_28734',
cstrittmatter@0 191 'Metatheria:NCBITaxon_9263',
cstrittmatter@0 192 'Ornithorhynchidae:NCBITaxon_9256',
cstrittmatter@0 193 'Perissodactyla:NCBITaxon_9787',
cstrittmatter@0 194 'Pholidota:NCBITaxon_9971',
cstrittmatter@0 195 'Primates:NCBITaxon_9443',
cstrittmatter@0 196 'Proboscidea:NCBITaxon_9779',
cstrittmatter@0 197 'Rodentia:NCBITaxon_9989',
cstrittmatter@0 198 'Sirenia:NCBITaxon_9774',
cstrittmatter@0 199 'Tachyglossidae:NCBITaxon_9259',
cstrittmatter@0 200 'Tenrecidae:NCBITaxon_9369',
cstrittmatter@0 201 'Tubulidentata:NCBITaxon_9815',
cstrittmatter@0 202 'Xenarthra:NCBITaxon_9348',
cstrittmatter@0 203 ]
cstrittmatter@0 204
cstrittmatter@0 205 arg_bins = {#'fo_consumer':fo_consumer,
cstrittmatter@0 206 'fo_product':fo_product,
cstrittmatter@0 207 'fo_quality':fo_quality,
cstrittmatter@0 208 'fo_organism':fo_organism,
cstrittmatter@0 209 'ncbi_taxon':ncbi_taxon,
cstrittmatter@0 210 }