comparison 0.2.0/bin/microreact_post.py @ 11:a5f31c44f8c9

planemo upload
author kkonganti
date Mon, 15 Jul 2024 16:11:44 -0400
parents
children
comparison
equal deleted inserted replaced
10:ddf7a172bf30 11:a5f31c44f8c9
1 #!/usr/bin/env python3
2
3 import argparse
4 import base64
5 import csv
6 import inspect
7 import json
8 import logging
9 import os
10 import pprint
11 import secrets
12 import string
13
14 import requests
15 from requests.packages import urllib3
16
17
18 # Multiple inheritence for pretty printing of help text.
19 class MultiArgFormatClasses(
20 argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter
21 ):
22 pass
23
24
25 # Basic checks
26 def check_file_extension(folder_path) -> os.PathLike:
27 if not os.path.isdir(folder_path):
28 logging.error(f"The provided path: '{folder_path}' is not a valid directory.")
29 exit(1)
30
31 tree_and_metadata_files = [
32 file
33 for file in os.listdir(folder_path)
34 if file.endswith(".nwk") or file.endswith(".csv")
35 ]
36
37 if len(tree_and_metadata_files) != 2:
38 logging.error(
39 "We need exactly one .nwk file and one metadata "
40 + "file in CSV (.csv) format."
41 )
42 exit(1)
43
44 for file in tree_and_metadata_files:
45 file_path = os.path.join(folder_path, file)
46
47 if os.path.isfile(file_path):
48 extension = os.path.splitext(file)
49 if extension[1] not in [".csv", ".nwk"]:
50 logging.error(f"{file} is not the correct extension: .nwk or .csv")
51 exit(1)
52
53 if file_path.endswith(".nwk"):
54 nwk_file = file_path
55
56 return nwk_file
57
58
59 # Checking the CSV file
60 def uppercase_headers(folder_path) -> list:
61 for filename in os.listdir(folder_path):
62 if filename.endswith(".csv"):
63 filepath = os.path.join(folder_path, filename)
64 with open(filepath, "r", newline="") as file:
65 reader = csv.reader(file)
66 headers = next(reader)
67 if all(header.isupper() for header in headers):
68 pass
69 else:
70 headers = [header.upper() for header in headers]
71 with open(filepath, "w", newline="") as file:
72 writer = csv.writer(file)
73 writer.writerow(headers)
74 for row in reader:
75 writer.writerow(row)
76 file.close()
77 file.close()
78 return headers
79
80
81 def check_csv(folder_path) -> os.PathLike:
82 for filename in os.listdir(folder_path):
83 if filename.endswith(".csv"):
84 filepath = os.path.join(folder_path, filename)
85 with open(filepath, "r", newline="") as file:
86 reader = csv.reader(file)
87
88 # Checking for headers and first column named "ID"
89 headers = next(reader, None)
90 if headers is None:
91 logging.error("Error: CSV file has no column headers.")
92 exit(1)
93 if headers[0] != "ID":
94 logging.error("Error: First column header is not 'ID'.")
95 exit(1)
96
97 # Check if all values in "ID" column are unique
98 col_values = set()
99 for row in reader:
100 id_value = row[0].strip()
101 if id_value in col_values:
102 logging.error(f"Duplicate ID found: {id_value}")
103 exit(1)
104 col_values.add(id_value)
105
106 # Checking that columns are equal across all rows
107 num_columns = None
108 for i, row in enumerate(reader):
109 if num_columns is None:
110 num_columns = len(row)
111 elif len(row) != num_columns:
112 logging.error(
113 f"Error: Unequal number of columns in row {i + 1}"
114 )
115 exit(1)
116 file.close()
117 return filepath
118
119
120 # Encode files to base64 for uploading
121 def encode_file(file) -> str:
122 with open(file, "r") as f:
123 file = f.read()
124 file_64 = base64.b64encode(file.encode()).decode()
125 f.close()
126 return file_64
127
128
129 # Creating the .microreact JSON file
130 def gen_ran_string(length=4) -> str:
131 letters = string.ascii_letters
132 return "".join(secrets.choice(letters) for i in range(length))
133
134
135 # Microreact JSON template
136 def create_json(
137 metadata_csv, tree_path, metadata_64, tree_64, ProjectName, folder_path
138 ) -> os.PathLike:
139 file_csv = gen_ran_string()
140 file_tree = gen_ran_string()
141 csv_size = os.path.getsize(metadata_csv)
142 tree_size = os.path.getsize(tree_path)
143 headers = uppercase_headers(folder_path)
144 columns = [{"field": "ID", "fixed": False}]
145
146 for header in headers[1:]:
147 columns.append({"field": header, "fixed": False})
148
149 microreact_data = {
150 "charts": {},
151 "datasets": {
152 "dataset-1": {"id": "dataset-1", "file": file_csv, "idFieldName": "ID"}
153 },
154 "files": {
155 file_csv: {
156 "blob": f"data:text/csv;base64,{metadata_64}",
157 "format": "text/csv",
158 "id": file_csv,
159 "name": os.path.basename(metadata_csv),
160 "size": csv_size,
161 "type": "data",
162 },
163 file_tree: {
164 "blob": f"data:application/octet-stream;base64,{tree_64}",
165 "format": "text/x-nh",
166 "id": file_tree,
167 "name": os.path.basename(tree_path),
168 "size": tree_size,
169 "type": "tree",
170 },
171 },
172 "filters": {
173 "dataFilters": [],
174 "chartFilters": [],
175 "searchOperator": "includes",
176 "searchValue": "",
177 "selection": [],
178 "selectionBreakdownField": None,
179 },
180 "maps": {},
181 "meta": {"name": ProjectName},
182 "trees": {
183 "tree-1": {
184 "alignLabels": False,
185 "blockHeaderFontSize": 13,
186 "blockPadding": 0,
187 "blocks": ["MLST_SEQUENCE_TYPE", "ISOLATION_SOURCE"],
188 "blockSize": 14,
189 "branchLengthsDigits": 4,
190 "controls": True,
191 "fontSize": 16,
192 "hideOrphanDataRows": False,
193 "ids": None,
194 "internalLabelsFilterRange": [0, 100],
195 "internalLabelsFontSize": 13,
196 "lasso": False,
197 "nodeSize": 14,
198 "path": None,
199 "roundBranchLengths": True,
200 "scaleLineAlpha": True,
201 "showBlockHeaders": True,
202 "showBlockLabels": False,
203 "showBranchLengths": False,
204 "showEdges": True,
205 "showInternalLabels": False,
206 "showLabels": True,
207 "showLeafLabels": True,
208 "showPiecharts": True,
209 "showShapeBorders": True,
210 "showShapes": True,
211 "styleLeafLabels": False,
212 "styleNodeEdges": False,
213 "subtreeIds": None,
214 "type": "rc",
215 "title": "Tree",
216 "labelField": "ID",
217 "file": file_tree,
218 }
219 },
220 "tables": {
221 "table-1": {
222 "displayMode": "cosy",
223 "hideUnselected": False,
224 "title": "Metadata",
225 "paneId": "table-1",
226 "columns": columns,
227 "file": file_csv,
228 }
229 },
230 "views": [],
231 "schema": "https://microreact.org/schema/v1.json",
232 }
233 micro_path = os.path.join(os.getcwd(), ProjectName + ".microreact")
234
235 with open(micro_path, "w") as microreact_file:
236 json.dump(microreact_data, microreact_file, indent=2)
237 microreact_file.close()
238 microreact_file.close()
239
240 return micro_path
241
242
243 # Main
244 def main() -> None:
245 """
246 Will take as input a folder containing 2 files, a tree file and a metadata CSV file
247 and upload it to a new project named "Cronology" and get a publicly shareable link from
248 microreact.org once the upload is successful
249
250 """
251 # Debug print.
252 ppp = pprint.PrettyPrinter(width=55)
253
254 # Set logging.
255 logging.basicConfig(
256 format="\n"
257 + "=" * 55
258 + "\n%(asctime)s - %(levelname)s\n"
259 + "=" * 55
260 + "\n%(message)s\n\n",
261 level=logging.DEBUG,
262 )
263
264 # Turn off SSL warnings
265 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
266
267 prog_name = os.path.basename(inspect.stack()[0].filename)
268
269 parser = argparse.ArgumentParser(
270 prog=prog_name, description=main.__doc__, formatter_class=MultiArgFormatClasses
271 )
272
273 # Add required arguments
274 required = parser.add_argument_group("required arguments")
275
276 required.add_argument(
277 "-dir",
278 dest="dir",
279 default=False,
280 required=True,
281 help="UNIX path to diretory containing the tree and all other\ndataset and annotation files."
282 + " Your tree file and metadata files must\nhave the extension .nwk and .csv",
283 )
284 required.add_argument(
285 "-atp",
286 dest="AccessTokenPath",
287 default=False,
288 required=True,
289 help="The path to your API Access Token needed for uploading.\n"
290 + "File must be a .txt file.",
291 )
292 parser.add_argument(
293 "-name",
294 dest="ProjectName",
295 default="Project",
296 required=False,
297 help="Name for the project you want to upload",
298 )
299
300 # Define defaults
301 args = parser.parse_args()
302 upload_url = "https://microreact.org/api/projects/create"
303 folder_path = args.dir
304 ProjectName = args.ProjectName
305 micro_url_info_path = os.path.join(os.getcwd(), "microreact_url.txt")
306
307 with open(args.AccessTokenPath, "r") as token:
308 atp = token.readline()
309 token.close()
310
311 tree_path = check_file_extension(folder_path)
312 metadata_csv = check_csv(folder_path)
313 metadata_64 = encode_file(metadata_csv)
314 tree_64 = encode_file(tree_path)
315
316 # Prepare the data to be sent in the request
317 micro_path = create_json(
318 metadata_csv, tree_path, metadata_64, tree_64, ProjectName, folder_path
319 )
320
321 f = open(micro_path)
322 data = json.load(f)
323 f.close()
324
325 # Additional parameters, including the MicroReact API key
326 headers = {"Content-type": "application/json; charset=UTF-8", "Access-Token": atp}
327
328 # Make the POST request to Microreact
329 r = requests.post(upload_url, json=data, headers=headers, verify=False)
330
331 if not r.ok:
332 if r.status_code == 400:
333 logging.error("Microreact API call failed with response " + r.text + "\n")
334 else:
335 logging.error(
336 "Microreact API call failed with unknown response code "
337 + str(r.status_code)
338 + "\n"
339 )
340 exit(1)
341 if r.status_code == 200:
342 r_json = json.loads(r.text)
343 with open(micro_url_info_path, "w") as out_fh:
344 out_fh.write(
345 f"Uploaded successfully!\n\nYour project URL:\n{r_json['url']}"
346 )
347 out_fh.close()
348
349
350 if __name__ == "__main__":
351 main()