kkonganti@0
|
1 """Static definitions"""
|
kkonganti@0
|
2
|
kkonganti@0
|
3 import os
|
kkonganti@0
|
4
|
kkonganti@0
|
5
|
kkonganti@0
|
6 # root path
|
kkonganti@0
|
7 ROOT = os.path.dirname(__file__)
|
kkonganti@0
|
8
|
kkonganti@0
|
9 # URL to list of OLS ontologies where download link is given
|
kkonganti@0
|
10 embl_ontologies = 'https://www.ebi.ac.uk/ols/ontologies'
|
kkonganti@0
|
11
|
kkonganti@0
|
12 # beginning of URL to ontology PURL
|
kkonganti@0
|
13 purl_link = 'http://purl.obolibrary.org/obo/'
|
kkonganti@0
|
14
|
kkonganti@0
|
15 # directory for downloaded OWL files
|
kkonganti@0
|
16 owl_dir = 'lexmapr/owl_files'
|
kkonganti@0
|
17
|
kkonganti@0
|
18 # path to database with synonyms from predefined resources and from OWL files
|
kkonganti@0
|
19 synonym_db = 'lexmapr/owl_files/label_synonyms.db'
|
kkonganti@0
|
20
|
kkonganti@0
|
21 # path to database with all ontologies of interest
|
kkonganti@0
|
22 ontol_db = 'lexmapr/owl_files/ontol_table.db'
|
kkonganti@0
|
23
|
kkonganti@0
|
24
|
kkonganti@0
|
25 # ontologies of interest
|
kkonganti@0
|
26 ontol_interest = [#'BFO',
|
kkonganti@0
|
27 #'CHEBI',
|
kkonganti@0
|
28 #'ENVO',
|
kkonganti@0
|
29 'FOODON',
|
kkonganti@0
|
30 #'GENEPIO',
|
kkonganti@0
|
31 'NCBITAXON', # NCBITaxon is not valid as of April 2022
|
kkonganti@0
|
32 #'OGMS',
|
kkonganti@0
|
33 #'PATO',
|
kkonganti@0
|
34 #'PCO',
|
kkonganti@0
|
35 #'UBERON',
|
kkonganti@0
|
36 ]
|
kkonganti@0
|
37
|
kkonganti@0
|
38 # ontology accessions that do not have labels or are placeholders as of April 2022
|
kkonganti@0
|
39 # will skip in database building
|
kkonganti@0
|
40 missing_ontol_labels = ['GENEPIO_0001367','GENEPIO_0001368','GENEPIO_0001369','GENEPIO_0001370',
|
kkonganti@0
|
41 'GENEPIO_0001372','GENEPIO_0001373','_MIAA_0000021',
|
kkonganti@0
|
42 ]
|
kkonganti@0
|
43
|
kkonganti@0
|
44 # terms indicating that the metadata was not given/collected; will output empty results
|
kkonganti@0
|
45 not_provided = ['not applicable','unknown','n a','not provided','not available','miscellaneous',
|
kkonganti@0
|
46 'not collected','missing','unidentified','unknown','none','unamed','other',
|
kkonganti@0
|
47 'undetermined','not known','no history given','no source specified','null',
|
kkonganti@0
|
48 'unspecified','not reported','not available not collected','not isolated',
|
kkonganti@0
|
49 'not available','not provided','xxx','mising','misng','other','unidentified',
|
kkonganti@0
|
50 'not determined other','reported later','intact unknown','not determined',
|
kkonganti@0
|
51 'not ascertained','unk','nd','nd others','nd other','etc','na','',' ',
|
kkonganti@0
|
52 'not supplied','not specified',
|
kkonganti@0
|
53 ]
|
kkonganti@0
|
54
|
kkonganti@0
|
55 # below are bin definitions
|
kkonganti@0
|
56 # TODO: food consumer group:FOODON_03510136 changed, formatted as '* as food consumer'
|
kkonganti@0
|
57 # can collect as 'for *' in text?
|
kkonganti@0
|
58 #fo_consumer = []
|
kkonganti@0
|
59
|
kkonganti@0
|
60 fo_product = ['algal food product:FOODON_00001184',
|
kkonganti@0
|
61 'amphibian:FOODON_03411624',
|
kkonganti@0
|
62 'amphibian or reptile food product:FOODON_00002200',
|
kkonganti@0
|
63 'animal based refined or partially-refined food product:FOODON_00001595',
|
kkonganti@0
|
64 'avian egg food product:FOODON_00001105',
|
kkonganti@0
|
65 'avian food product:FOODON_001251',
|
kkonganti@0
|
66 'bakery food product:FOODON_00001626',
|
kkonganti@0
|
67 'cell-based technology food product:FOODON_00003376',
|
kkonganti@0
|
68 'dairy food product:FOODON_00001256',
|
kkonganti@0
|
69 'dietary supplement:FOODON_03401298',
|
kkonganti@0
|
70 'fish egg food product:FOODON_00001250',
|
kkonganti@0
|
71 'fish food product:FOODON_00001248',
|
kkonganti@0
|
72 'food product analog:FOODON_00001871',
|
kkonganti@0
|
73 'food product component:FOODON_00001714',
|
kkonganti@0
|
74 'fungus food product:FOODON_00001143',
|
kkonganti@0
|
75 'game animal food product:FOODON_00002477',
|
kkonganti@0
|
76 'insect food product:FOODON_00001177',
|
kkonganti@0
|
77 'meat food product:FOODON_00002477',
|
kkonganti@0
|
78 'microbial food product:FOODON_00001145',
|
kkonganti@0
|
79 'plant food product:FOODON_00001015',
|
kkonganti@0
|
80 'poultry food product:FOODON_00001283',
|
kkonganti@0
|
81 'prepared food product:FOODON_00001180',
|
kkonganti@0
|
82 'processed food product:FOODON_03311737',
|
kkonganti@0
|
83 'reptile egg food product:FOODON_00002199',
|
kkonganti@0
|
84 'seafood product:FOODON_00001046',
|
kkonganti@0
|
85 'shellfish food product:FOODON_00001293',
|
kkonganti@0
|
86 'soup food product:FOODON_00002257',
|
kkonganti@0
|
87 'sustainable agriculture food product:FOODON_00003375',
|
kkonganti@0
|
88 'vegetarian food product:FOODON_00003194',
|
kkonganti@0
|
89 'vertebrate animal food product:FOODON_00001092',
|
kkonganti@0
|
90 ]
|
kkonganti@0
|
91
|
kkonganti@0
|
92 fo_quality = ['food (acidified):FOODON_03301625',
|
kkonganti@0
|
93 'food (adulterated):FOODON_00003367',
|
kkonganti@0
|
94 'food (baked):FOODON_00002456',
|
kkonganti@0
|
95 'food (batter-coated):FOODON_00002662',
|
kkonganti@0
|
96 'food (blanched):FOODON_00002767',
|
kkonganti@0
|
97 'food (blend):FOODON_00003889',
|
kkonganti@0
|
98 'food (boiled):FOODON_00002688',
|
kkonganti@0
|
99 'food (breaded):FOODON_00002661',
|
kkonganti@0
|
100 'food (broiled or grilled):FOODON_00002647',
|
kkonganti@0
|
101 'food (canned):FOODON_00002418',
|
kkonganti@0
|
102 'food (chilled):FOODON_00002642',
|
kkonganti@0
|
103 'food (chopped):FOODON_00002777',
|
kkonganti@0
|
104 'food (cleaned):FOODON_00002708',
|
kkonganti@0
|
105 'food (colored):FOODON_00002650',
|
kkonganti@0
|
106 'food (comminuted):FOODON_00002754',
|
kkonganti@0
|
107 'food (cooked):FOODON_00001181',
|
kkonganti@0
|
108 'food (deep-fried):FOODON_03307052',
|
kkonganti@0
|
109 'food (dehydrated):FOODON_00002643',
|
kkonganti@0
|
110 'food (dried):FOODON_03307539',
|
kkonganti@0
|
111 'food (fat or oil coated):FOODON_03460233',
|
kkonganti@0
|
112 'food (fermented):FOODON_00001258',
|
kkonganti@0
|
113 'food (filled):FOODON_00002644',
|
kkonganti@0
|
114 'food (flavored):FOODON_00002646',
|
kkonganti@0
|
115 'food (freeze-dried):FOODON_03301752',
|
kkonganti@0
|
116 'food (fresh):FOODON_00002457',
|
kkonganti@0
|
117 'food (fried):FOODON_00002660',
|
kkonganti@0
|
118 'food (frozen):FOODON_03302148',
|
kkonganti@0
|
119 'food (genetically-modified):FOODON_03530251',
|
kkonganti@0
|
120 'food (ground):FOODON_00002713',
|
kkonganti@0
|
121 'food (harvested):FOODON_00003398',
|
kkonganti@0
|
122 'food (heat treated):FOODON_03316043',
|
kkonganti@0
|
123 'food (hulled):FOODON_00002720',
|
kkonganti@0
|
124 'food (hydrolized):FOODON_00002653',
|
kkonganti@0
|
125 'food (irradiated):FOODON_03305364',
|
kkonganti@0
|
126 'food (juiced):FOODON_00003499',
|
kkonganti@0
|
127 'food (liquid):FOODON_03430130',
|
kkonganti@0
|
128 'food (milled):FOODON_00002649',
|
kkonganti@0
|
129 'food (not genetically-modified):FOODON_00003379',
|
kkonganti@0
|
130 'food (organically grown):FOODON_03306690',
|
kkonganti@0
|
131 'food (packaged):FOODON_00002739',
|
kkonganti@0
|
132 'food (packed in high pressurised containers):FOODON_03317139',
|
kkonganti@0
|
133 'food (pan-fried):FOODON_00002463',
|
kkonganti@0
|
134 'food (paste):FOODON_00003887',
|
kkonganti@0
|
135 'food (pasteurized):FOODON_00002654',
|
kkonganti@0
|
136 'food (peeled):FOODON_00002655',
|
kkonganti@0
|
137 'food (pickled):FOODON_00001079',
|
kkonganti@0
|
138 'food (powdered):FOODON_00002976',
|
kkonganti@0
|
139 'food (precooked):FOODON_00002971',
|
kkonganti@0
|
140 'food (precooked, frozen):FOODON_03305323',
|
kkonganti@0
|
141 'food (preserved):FOODON_00002158',
|
kkonganti@0
|
142 'food (puffed):FOODON_00002656',
|
kkonganti@0
|
143 'food (raw):FOODON_03311126',
|
kkonganti@0
|
144 'food (rehydrated):FOODON_00002755',
|
kkonganti@0
|
145 'food (roasted):FOODON_00002744',
|
kkonganti@0
|
146 'food (salted):FOODON_03460173',
|
kkonganti@0
|
147 'food (seasoned):FOODON_00002733',
|
kkonganti@0
|
148 'fruit (seedless):FOODON_00003461',
|
kkonganti@0
|
149 'food (semiliquid):FOODON_03430103',
|
kkonganti@0
|
150 'food (semisolid):FOODON_03430144',
|
kkonganti@0
|
151 'food (sliced):FOODON_00002455',
|
kkonganti@0
|
152 'food (smoked or smoke-flavored):FOODON_03460172',
|
kkonganti@0
|
153 'food (solid):FOODON_03430151',
|
kkonganti@0
|
154 'food (spoiled):FOODON_00003366',
|
kkonganti@0
|
155 'food (starch or flour thickened):FOODON_03315268',
|
kkonganti@0
|
156 'food (steamed):FOODON_00002657',
|
kkonganti@0
|
157 'food (sugar-free):FOODON_03315838',
|
kkonganti@0
|
158 'food (textured):FOODON_00002658',
|
kkonganti@0
|
159 'food (toasted):FOODON_00002659',
|
kkonganti@0
|
160 'food (unprocessed):FOODON_03316056',
|
kkonganti@0
|
161 'food (unstandardized):FOODON_03315636',
|
kkonganti@0
|
162 ]
|
kkonganti@0
|
163
|
kkonganti@0
|
164 fo_organism = ['algae:FOODON_03411301',
|
kkonganti@0
|
165 'animal:FOODON_00003004',
|
kkonganti@0
|
166 'fungus:FOODON_03411261',
|
kkonganti@0
|
167 'lichen:FOODON_03412345',
|
kkonganti@0
|
168 'whole plant:PO_0000003',
|
kkonganti@0
|
169 ]
|
kkonganti@0
|
170
|
kkonganti@0
|
171 ncbi_taxon = ['Actinopterygii:NCBITaxon_7898', #mix of taxon types
|
kkonganti@0
|
172 'Ecdysozoa:NCBITaxon_1206794',
|
kkonganti@0
|
173 'Echinodermata:NCBITaxon_7586',
|
kkonganti@0
|
174 'Fungi:NCBITaxon_4751',
|
kkonganti@0
|
175 'Spiralia:NCBITaxon_2697495',
|
kkonganti@0
|
176 'Viridiplantae:NCBITaxon_33090',
|
kkonganti@0
|
177 'Amphibia:NCBITaxon_8292',
|
kkonganti@0
|
178 #'Sauropsida:NCBITaxon_8457',
|
kkonganti@0
|
179 'Aves:NCBITaxon_8782',
|
kkonganti@0
|
180 'Crocodylia:NCBITaxon_1294634',
|
kkonganti@0
|
181 'Testudinata:NCBITaxon_2841271',
|
kkonganti@0
|
182 'Lepidosauria:NCBITaxon_8504',
|
kkonganti@0
|
183 #'Mammalia:NCBITaxon_40674',
|
kkonganti@0
|
184 'Artiodactyla:NCBITaxon_91561',
|
kkonganti@0
|
185 'Carnivora:NCBITaxon_33554',
|
kkonganti@0
|
186 'Chiroptera:NCBITaxon_9397',
|
kkonganti@0
|
187 'Chrysochloridae:NCBITaxon_9389',
|
kkonganti@0
|
188 'Eulipotyphla:NCBITaxon_9362',
|
kkonganti@0
|
189 'Hyracoidea:NCBITaxon_9810',
|
kkonganti@0
|
190 'Macroscelidea:NCBITaxon_28734',
|
kkonganti@0
|
191 'Metatheria:NCBITaxon_9263',
|
kkonganti@0
|
192 'Ornithorhynchidae:NCBITaxon_9256',
|
kkonganti@0
|
193 'Perissodactyla:NCBITaxon_9787',
|
kkonganti@0
|
194 'Pholidota:NCBITaxon_9971',
|
kkonganti@0
|
195 'Primates:NCBITaxon_9443',
|
kkonganti@0
|
196 'Proboscidea:NCBITaxon_9779',
|
kkonganti@0
|
197 'Rodentia:NCBITaxon_9989',
|
kkonganti@0
|
198 'Sirenia:NCBITaxon_9774',
|
kkonganti@0
|
199 'Tachyglossidae:NCBITaxon_9259',
|
kkonganti@0
|
200 'Tenrecidae:NCBITaxon_9369',
|
kkonganti@0
|
201 'Tubulidentata:NCBITaxon_9815',
|
kkonganti@0
|
202 'Xenarthra:NCBITaxon_9348',
|
kkonganti@0
|
203 ]
|
kkonganti@0
|
204
|
kkonganti@0
|
205 arg_bins = {#'fo_consumer':fo_consumer,
|
kkonganti@0
|
206 'fo_product':fo_product,
|
kkonganti@0
|
207 'fo_quality':fo_quality,
|
kkonganti@0
|
208 'fo_organism':fo_organism,
|
kkonganti@0
|
209 'ncbi_taxon':ncbi_taxon,
|
kkonganti@0
|
210 }
|