annotate 0.2.0/bin/microreact_post.py @ 11:a5f31c44f8c9

planemo upload
author kkonganti
date Mon, 15 Jul 2024 16:11:44 -0400
parents
children
rev   line source
kkonganti@11 1 #!/usr/bin/env python3
kkonganti@11 2
kkonganti@11 3 import argparse
kkonganti@11 4 import base64
kkonganti@11 5 import csv
kkonganti@11 6 import inspect
kkonganti@11 7 import json
kkonganti@11 8 import logging
kkonganti@11 9 import os
kkonganti@11 10 import pprint
kkonganti@11 11 import secrets
kkonganti@11 12 import string
kkonganti@11 13
kkonganti@11 14 import requests
kkonganti@11 15 from requests.packages import urllib3
kkonganti@11 16
kkonganti@11 17
kkonganti@11 18 # Multiple inheritence for pretty printing of help text.
kkonganti@11 19 class MultiArgFormatClasses(
kkonganti@11 20 argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter
kkonganti@11 21 ):
kkonganti@11 22 pass
kkonganti@11 23
kkonganti@11 24
kkonganti@11 25 # Basic checks
kkonganti@11 26 def check_file_extension(folder_path) -> os.PathLike:
kkonganti@11 27 if not os.path.isdir(folder_path):
kkonganti@11 28 logging.error(f"The provided path: '{folder_path}' is not a valid directory.")
kkonganti@11 29 exit(1)
kkonganti@11 30
kkonganti@11 31 tree_and_metadata_files = [
kkonganti@11 32 file
kkonganti@11 33 for file in os.listdir(folder_path)
kkonganti@11 34 if file.endswith(".nwk") or file.endswith(".csv")
kkonganti@11 35 ]
kkonganti@11 36
kkonganti@11 37 if len(tree_and_metadata_files) != 2:
kkonganti@11 38 logging.error(
kkonganti@11 39 "We need exactly one .nwk file and one metadata "
kkonganti@11 40 + "file in CSV (.csv) format."
kkonganti@11 41 )
kkonganti@11 42 exit(1)
kkonganti@11 43
kkonganti@11 44 for file in tree_and_metadata_files:
kkonganti@11 45 file_path = os.path.join(folder_path, file)
kkonganti@11 46
kkonganti@11 47 if os.path.isfile(file_path):
kkonganti@11 48 extension = os.path.splitext(file)
kkonganti@11 49 if extension[1] not in [".csv", ".nwk"]:
kkonganti@11 50 logging.error(f"{file} is not the correct extension: .nwk or .csv")
kkonganti@11 51 exit(1)
kkonganti@11 52
kkonganti@11 53 if file_path.endswith(".nwk"):
kkonganti@11 54 nwk_file = file_path
kkonganti@11 55
kkonganti@11 56 return nwk_file
kkonganti@11 57
kkonganti@11 58
kkonganti@11 59 # Checking the CSV file
kkonganti@11 60 def uppercase_headers(folder_path) -> list:
kkonganti@11 61 for filename in os.listdir(folder_path):
kkonganti@11 62 if filename.endswith(".csv"):
kkonganti@11 63 filepath = os.path.join(folder_path, filename)
kkonganti@11 64 with open(filepath, "r", newline="") as file:
kkonganti@11 65 reader = csv.reader(file)
kkonganti@11 66 headers = next(reader)
kkonganti@11 67 if all(header.isupper() for header in headers):
kkonganti@11 68 pass
kkonganti@11 69 else:
kkonganti@11 70 headers = [header.upper() for header in headers]
kkonganti@11 71 with open(filepath, "w", newline="") as file:
kkonganti@11 72 writer = csv.writer(file)
kkonganti@11 73 writer.writerow(headers)
kkonganti@11 74 for row in reader:
kkonganti@11 75 writer.writerow(row)
kkonganti@11 76 file.close()
kkonganti@11 77 file.close()
kkonganti@11 78 return headers
kkonganti@11 79
kkonganti@11 80
kkonganti@11 81 def check_csv(folder_path) -> os.PathLike:
kkonganti@11 82 for filename in os.listdir(folder_path):
kkonganti@11 83 if filename.endswith(".csv"):
kkonganti@11 84 filepath = os.path.join(folder_path, filename)
kkonganti@11 85 with open(filepath, "r", newline="") as file:
kkonganti@11 86 reader = csv.reader(file)
kkonganti@11 87
kkonganti@11 88 # Checking for headers and first column named "ID"
kkonganti@11 89 headers = next(reader, None)
kkonganti@11 90 if headers is None:
kkonganti@11 91 logging.error("Error: CSV file has no column headers.")
kkonganti@11 92 exit(1)
kkonganti@11 93 if headers[0] != "ID":
kkonganti@11 94 logging.error("Error: First column header is not 'ID'.")
kkonganti@11 95 exit(1)
kkonganti@11 96
kkonganti@11 97 # Check if all values in "ID" column are unique
kkonganti@11 98 col_values = set()
kkonganti@11 99 for row in reader:
kkonganti@11 100 id_value = row[0].strip()
kkonganti@11 101 if id_value in col_values:
kkonganti@11 102 logging.error(f"Duplicate ID found: {id_value}")
kkonganti@11 103 exit(1)
kkonganti@11 104 col_values.add(id_value)
kkonganti@11 105
kkonganti@11 106 # Checking that columns are equal across all rows
kkonganti@11 107 num_columns = None
kkonganti@11 108 for i, row in enumerate(reader):
kkonganti@11 109 if num_columns is None:
kkonganti@11 110 num_columns = len(row)
kkonganti@11 111 elif len(row) != num_columns:
kkonganti@11 112 logging.error(
kkonganti@11 113 f"Error: Unequal number of columns in row {i + 1}"
kkonganti@11 114 )
kkonganti@11 115 exit(1)
kkonganti@11 116 file.close()
kkonganti@11 117 return filepath
kkonganti@11 118
kkonganti@11 119
kkonganti@11 120 # Encode files to base64 for uploading
kkonganti@11 121 def encode_file(file) -> str:
kkonganti@11 122 with open(file, "r") as f:
kkonganti@11 123 file = f.read()
kkonganti@11 124 file_64 = base64.b64encode(file.encode()).decode()
kkonganti@11 125 f.close()
kkonganti@11 126 return file_64
kkonganti@11 127
kkonganti@11 128
kkonganti@11 129 # Creating the .microreact JSON file
kkonganti@11 130 def gen_ran_string(length=4) -> str:
kkonganti@11 131 letters = string.ascii_letters
kkonganti@11 132 return "".join(secrets.choice(letters) for i in range(length))
kkonganti@11 133
kkonganti@11 134
kkonganti@11 135 # Microreact JSON template
kkonganti@11 136 def create_json(
kkonganti@11 137 metadata_csv, tree_path, metadata_64, tree_64, ProjectName, folder_path
kkonganti@11 138 ) -> os.PathLike:
kkonganti@11 139 file_csv = gen_ran_string()
kkonganti@11 140 file_tree = gen_ran_string()
kkonganti@11 141 csv_size = os.path.getsize(metadata_csv)
kkonganti@11 142 tree_size = os.path.getsize(tree_path)
kkonganti@11 143 headers = uppercase_headers(folder_path)
kkonganti@11 144 columns = [{"field": "ID", "fixed": False}]
kkonganti@11 145
kkonganti@11 146 for header in headers[1:]:
kkonganti@11 147 columns.append({"field": header, "fixed": False})
kkonganti@11 148
kkonganti@11 149 microreact_data = {
kkonganti@11 150 "charts": {},
kkonganti@11 151 "datasets": {
kkonganti@11 152 "dataset-1": {"id": "dataset-1", "file": file_csv, "idFieldName": "ID"}
kkonganti@11 153 },
kkonganti@11 154 "files": {
kkonganti@11 155 file_csv: {
kkonganti@11 156 "blob": f"data:text/csv;base64,{metadata_64}",
kkonganti@11 157 "format": "text/csv",
kkonganti@11 158 "id": file_csv,
kkonganti@11 159 "name": os.path.basename(metadata_csv),
kkonganti@11 160 "size": csv_size,
kkonganti@11 161 "type": "data",
kkonganti@11 162 },
kkonganti@11 163 file_tree: {
kkonganti@11 164 "blob": f"data:application/octet-stream;base64,{tree_64}",
kkonganti@11 165 "format": "text/x-nh",
kkonganti@11 166 "id": file_tree,
kkonganti@11 167 "name": os.path.basename(tree_path),
kkonganti@11 168 "size": tree_size,
kkonganti@11 169 "type": "tree",
kkonganti@11 170 },
kkonganti@11 171 },
kkonganti@11 172 "filters": {
kkonganti@11 173 "dataFilters": [],
kkonganti@11 174 "chartFilters": [],
kkonganti@11 175 "searchOperator": "includes",
kkonganti@11 176 "searchValue": "",
kkonganti@11 177 "selection": [],
kkonganti@11 178 "selectionBreakdownField": None,
kkonganti@11 179 },
kkonganti@11 180 "maps": {},
kkonganti@11 181 "meta": {"name": ProjectName},
kkonganti@11 182 "trees": {
kkonganti@11 183 "tree-1": {
kkonganti@11 184 "alignLabels": False,
kkonganti@11 185 "blockHeaderFontSize": 13,
kkonganti@11 186 "blockPadding": 0,
kkonganti@11 187 "blocks": ["MLST_SEQUENCE_TYPE", "ISOLATION_SOURCE"],
kkonganti@11 188 "blockSize": 14,
kkonganti@11 189 "branchLengthsDigits": 4,
kkonganti@11 190 "controls": True,
kkonganti@11 191 "fontSize": 16,
kkonganti@11 192 "hideOrphanDataRows": False,
kkonganti@11 193 "ids": None,
kkonganti@11 194 "internalLabelsFilterRange": [0, 100],
kkonganti@11 195 "internalLabelsFontSize": 13,
kkonganti@11 196 "lasso": False,
kkonganti@11 197 "nodeSize": 14,
kkonganti@11 198 "path": None,
kkonganti@11 199 "roundBranchLengths": True,
kkonganti@11 200 "scaleLineAlpha": True,
kkonganti@11 201 "showBlockHeaders": True,
kkonganti@11 202 "showBlockLabels": False,
kkonganti@11 203 "showBranchLengths": False,
kkonganti@11 204 "showEdges": True,
kkonganti@11 205 "showInternalLabels": False,
kkonganti@11 206 "showLabels": True,
kkonganti@11 207 "showLeafLabels": True,
kkonganti@11 208 "showPiecharts": True,
kkonganti@11 209 "showShapeBorders": True,
kkonganti@11 210 "showShapes": True,
kkonganti@11 211 "styleLeafLabels": False,
kkonganti@11 212 "styleNodeEdges": False,
kkonganti@11 213 "subtreeIds": None,
kkonganti@11 214 "type": "rc",
kkonganti@11 215 "title": "Tree",
kkonganti@11 216 "labelField": "ID",
kkonganti@11 217 "file": file_tree,
kkonganti@11 218 }
kkonganti@11 219 },
kkonganti@11 220 "tables": {
kkonganti@11 221 "table-1": {
kkonganti@11 222 "displayMode": "cosy",
kkonganti@11 223 "hideUnselected": False,
kkonganti@11 224 "title": "Metadata",
kkonganti@11 225 "paneId": "table-1",
kkonganti@11 226 "columns": columns,
kkonganti@11 227 "file": file_csv,
kkonganti@11 228 }
kkonganti@11 229 },
kkonganti@11 230 "views": [],
kkonganti@11 231 "schema": "https://microreact.org/schema/v1.json",
kkonganti@11 232 }
kkonganti@11 233 micro_path = os.path.join(os.getcwd(), ProjectName + ".microreact")
kkonganti@11 234
kkonganti@11 235 with open(micro_path, "w") as microreact_file:
kkonganti@11 236 json.dump(microreact_data, microreact_file, indent=2)
kkonganti@11 237 microreact_file.close()
kkonganti@11 238 microreact_file.close()
kkonganti@11 239
kkonganti@11 240 return micro_path
kkonganti@11 241
kkonganti@11 242
kkonganti@11 243 # Main
kkonganti@11 244 def main() -> None:
kkonganti@11 245 """
kkonganti@11 246 Will take as input a folder containing 2 files, a tree file and a metadata CSV file
kkonganti@11 247 and upload it to a new project named "Cronology" and get a publicly shareable link from
kkonganti@11 248 microreact.org once the upload is successful
kkonganti@11 249
kkonganti@11 250 """
kkonganti@11 251 # Debug print.
kkonganti@11 252 ppp = pprint.PrettyPrinter(width=55)
kkonganti@11 253
kkonganti@11 254 # Set logging.
kkonganti@11 255 logging.basicConfig(
kkonganti@11 256 format="\n"
kkonganti@11 257 + "=" * 55
kkonganti@11 258 + "\n%(asctime)s - %(levelname)s\n"
kkonganti@11 259 + "=" * 55
kkonganti@11 260 + "\n%(message)s\n\n",
kkonganti@11 261 level=logging.DEBUG,
kkonganti@11 262 )
kkonganti@11 263
kkonganti@11 264 # Turn off SSL warnings
kkonganti@11 265 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
kkonganti@11 266
kkonganti@11 267 prog_name = os.path.basename(inspect.stack()[0].filename)
kkonganti@11 268
kkonganti@11 269 parser = argparse.ArgumentParser(
kkonganti@11 270 prog=prog_name, description=main.__doc__, formatter_class=MultiArgFormatClasses
kkonganti@11 271 )
kkonganti@11 272
kkonganti@11 273 # Add required arguments
kkonganti@11 274 required = parser.add_argument_group("required arguments")
kkonganti@11 275
kkonganti@11 276 required.add_argument(
kkonganti@11 277 "-dir",
kkonganti@11 278 dest="dir",
kkonganti@11 279 default=False,
kkonganti@11 280 required=True,
kkonganti@11 281 help="UNIX path to diretory containing the tree and all other\ndataset and annotation files."
kkonganti@11 282 + " Your tree file and metadata files must\nhave the extension .nwk and .csv",
kkonganti@11 283 )
kkonganti@11 284 required.add_argument(
kkonganti@11 285 "-atp",
kkonganti@11 286 dest="AccessTokenPath",
kkonganti@11 287 default=False,
kkonganti@11 288 required=True,
kkonganti@11 289 help="The path to your API Access Token needed for uploading.\n"
kkonganti@11 290 + "File must be a .txt file.",
kkonganti@11 291 )
kkonganti@11 292 parser.add_argument(
kkonganti@11 293 "-name",
kkonganti@11 294 dest="ProjectName",
kkonganti@11 295 default="Project",
kkonganti@11 296 required=False,
kkonganti@11 297 help="Name for the project you want to upload",
kkonganti@11 298 )
kkonganti@11 299
kkonganti@11 300 # Define defaults
kkonganti@11 301 args = parser.parse_args()
kkonganti@11 302 upload_url = "https://microreact.org/api/projects/create"
kkonganti@11 303 folder_path = args.dir
kkonganti@11 304 ProjectName = args.ProjectName
kkonganti@11 305 micro_url_info_path = os.path.join(os.getcwd(), "microreact_url.txt")
kkonganti@11 306
kkonganti@11 307 with open(args.AccessTokenPath, "r") as token:
kkonganti@11 308 atp = token.readline()
kkonganti@11 309 token.close()
kkonganti@11 310
kkonganti@11 311 tree_path = check_file_extension(folder_path)
kkonganti@11 312 metadata_csv = check_csv(folder_path)
kkonganti@11 313 metadata_64 = encode_file(metadata_csv)
kkonganti@11 314 tree_64 = encode_file(tree_path)
kkonganti@11 315
kkonganti@11 316 # Prepare the data to be sent in the request
kkonganti@11 317 micro_path = create_json(
kkonganti@11 318 metadata_csv, tree_path, metadata_64, tree_64, ProjectName, folder_path
kkonganti@11 319 )
kkonganti@11 320
kkonganti@11 321 f = open(micro_path)
kkonganti@11 322 data = json.load(f)
kkonganti@11 323 f.close()
kkonganti@11 324
kkonganti@11 325 # Additional parameters, including the MicroReact API key
kkonganti@11 326 headers = {"Content-type": "application/json; charset=UTF-8", "Access-Token": atp}
kkonganti@11 327
kkonganti@11 328 # Make the POST request to Microreact
kkonganti@11 329 r = requests.post(upload_url, json=data, headers=headers, verify=False)
kkonganti@11 330
kkonganti@11 331 if not r.ok:
kkonganti@11 332 if r.status_code == 400:
kkonganti@11 333 logging.error("Microreact API call failed with response " + r.text + "\n")
kkonganti@11 334 else:
kkonganti@11 335 logging.error(
kkonganti@11 336 "Microreact API call failed with unknown response code "
kkonganti@11 337 + str(r.status_code)
kkonganti@11 338 + "\n"
kkonganti@11 339 )
kkonganti@11 340 exit(1)
kkonganti@11 341 if r.status_code == 200:
kkonganti@11 342 r_json = json.loads(r.text)
kkonganti@11 343 with open(micro_url_info_path, "w") as out_fh:
kkonganti@11 344 out_fh.write(
kkonganti@11 345 f"Uploaded successfully!\n\nYour project URL:\n{r_json['url']}"
kkonganti@11 346 )
kkonganti@11 347 out_fh.close()
kkonganti@11 348
kkonganti@11 349
kkonganti@11 350 if __name__ == "__main__":
kkonganti@11 351 main()