kkonganti@11: #!/usr/bin/env python3 kkonganti@11: kkonganti@11: import argparse kkonganti@11: import base64 kkonganti@11: import csv kkonganti@11: import inspect kkonganti@11: import json kkonganti@11: import logging kkonganti@11: import os kkonganti@11: import pprint kkonganti@11: import secrets kkonganti@11: import string kkonganti@11: kkonganti@11: import requests kkonganti@11: from requests.packages import urllib3 kkonganti@11: kkonganti@11: kkonganti@11: # Multiple inheritence for pretty printing of help text. kkonganti@11: class MultiArgFormatClasses( kkonganti@11: argparse.RawTextHelpFormatter, argparse.ArgumentDefaultsHelpFormatter kkonganti@11: ): kkonganti@11: pass kkonganti@11: kkonganti@11: kkonganti@11: # Basic checks kkonganti@11: def check_file_extension(folder_path) -> os.PathLike: kkonganti@11: if not os.path.isdir(folder_path): kkonganti@11: logging.error(f"The provided path: '{folder_path}' is not a valid directory.") kkonganti@11: exit(1) kkonganti@11: kkonganti@11: tree_and_metadata_files = [ kkonganti@11: file kkonganti@11: for file in os.listdir(folder_path) kkonganti@11: if file.endswith(".nwk") or file.endswith(".csv") kkonganti@11: ] kkonganti@11: kkonganti@11: if len(tree_and_metadata_files) != 2: kkonganti@11: logging.error( kkonganti@11: "We need exactly one .nwk file and one metadata " kkonganti@11: + "file in CSV (.csv) format." kkonganti@11: ) kkonganti@11: exit(1) kkonganti@11: kkonganti@11: for file in tree_and_metadata_files: kkonganti@11: file_path = os.path.join(folder_path, file) kkonganti@11: kkonganti@11: if os.path.isfile(file_path): kkonganti@11: extension = os.path.splitext(file) kkonganti@11: if extension[1] not in [".csv", ".nwk"]: kkonganti@11: logging.error(f"{file} is not the correct extension: .nwk or .csv") kkonganti@11: exit(1) kkonganti@11: kkonganti@11: if file_path.endswith(".nwk"): kkonganti@11: nwk_file = file_path kkonganti@11: kkonganti@11: return nwk_file kkonganti@11: kkonganti@11: kkonganti@11: # Checking the CSV file kkonganti@11: def uppercase_headers(folder_path) -> list: kkonganti@11: for filename in os.listdir(folder_path): kkonganti@11: if filename.endswith(".csv"): kkonganti@11: filepath = os.path.join(folder_path, filename) kkonganti@11: with open(filepath, "r", newline="") as file: kkonganti@11: reader = csv.reader(file) kkonganti@11: headers = next(reader) kkonganti@11: if all(header.isupper() for header in headers): kkonganti@11: pass kkonganti@11: else: kkonganti@11: headers = [header.upper() for header in headers] kkonganti@11: with open(filepath, "w", newline="") as file: kkonganti@11: writer = csv.writer(file) kkonganti@11: writer.writerow(headers) kkonganti@11: for row in reader: kkonganti@11: writer.writerow(row) kkonganti@11: file.close() kkonganti@11: file.close() kkonganti@11: return headers kkonganti@11: kkonganti@11: kkonganti@11: def check_csv(folder_path) -> os.PathLike: kkonganti@11: for filename in os.listdir(folder_path): kkonganti@11: if filename.endswith(".csv"): kkonganti@11: filepath = os.path.join(folder_path, filename) kkonganti@11: with open(filepath, "r", newline="") as file: kkonganti@11: reader = csv.reader(file) kkonganti@11: kkonganti@11: # Checking for headers and first column named "ID" kkonganti@11: headers = next(reader, None) kkonganti@11: if headers is None: kkonganti@11: logging.error("Error: CSV file has no column headers.") kkonganti@11: exit(1) kkonganti@11: if headers[0] != "ID": kkonganti@11: logging.error("Error: First column header is not 'ID'.") kkonganti@11: exit(1) kkonganti@11: kkonganti@11: # Check if all values in "ID" column are unique kkonganti@11: col_values = set() kkonganti@11: for row in reader: kkonganti@11: id_value = row[0].strip() kkonganti@11: if id_value in col_values: kkonganti@11: logging.error(f"Duplicate ID found: {id_value}") kkonganti@11: exit(1) kkonganti@11: col_values.add(id_value) kkonganti@11: kkonganti@11: # Checking that columns are equal across all rows kkonganti@11: num_columns = None kkonganti@11: for i, row in enumerate(reader): kkonganti@11: if num_columns is None: kkonganti@11: num_columns = len(row) kkonganti@11: elif len(row) != num_columns: kkonganti@11: logging.error( kkonganti@11: f"Error: Unequal number of columns in row {i + 1}" kkonganti@11: ) kkonganti@11: exit(1) kkonganti@11: file.close() kkonganti@11: return filepath kkonganti@11: kkonganti@11: kkonganti@11: # Encode files to base64 for uploading kkonganti@11: def encode_file(file) -> str: kkonganti@11: with open(file, "r") as f: kkonganti@11: file = f.read() kkonganti@11: file_64 = base64.b64encode(file.encode()).decode() kkonganti@11: f.close() kkonganti@11: return file_64 kkonganti@11: kkonganti@11: kkonganti@11: # Creating the .microreact JSON file kkonganti@11: def gen_ran_string(length=4) -> str: kkonganti@11: letters = string.ascii_letters kkonganti@11: return "".join(secrets.choice(letters) for i in range(length)) kkonganti@11: kkonganti@11: kkonganti@11: # Microreact JSON template kkonganti@11: def create_json( kkonganti@11: metadata_csv, tree_path, metadata_64, tree_64, ProjectName, folder_path kkonganti@11: ) -> os.PathLike: kkonganti@11: file_csv = gen_ran_string() kkonganti@11: file_tree = gen_ran_string() kkonganti@11: csv_size = os.path.getsize(metadata_csv) kkonganti@11: tree_size = os.path.getsize(tree_path) kkonganti@11: headers = uppercase_headers(folder_path) kkonganti@11: columns = [{"field": "ID", "fixed": False}] kkonganti@11: kkonganti@11: for header in headers[1:]: kkonganti@11: columns.append({"field": header, "fixed": False}) kkonganti@11: kkonganti@11: microreact_data = { kkonganti@11: "charts": {}, kkonganti@11: "datasets": { kkonganti@11: "dataset-1": {"id": "dataset-1", "file": file_csv, "idFieldName": "ID"} kkonganti@11: }, kkonganti@11: "files": { kkonganti@11: file_csv: { kkonganti@11: "blob": f"data:text/csv;base64,{metadata_64}", kkonganti@11: "format": "text/csv", kkonganti@11: "id": file_csv, kkonganti@11: "name": os.path.basename(metadata_csv), kkonganti@11: "size": csv_size, kkonganti@11: "type": "data", kkonganti@11: }, kkonganti@11: file_tree: { kkonganti@11: "blob": f"data:application/octet-stream;base64,{tree_64}", kkonganti@11: "format": "text/x-nh", kkonganti@11: "id": file_tree, kkonganti@11: "name": os.path.basename(tree_path), kkonganti@11: "size": tree_size, kkonganti@11: "type": "tree", kkonganti@11: }, kkonganti@11: }, kkonganti@11: "filters": { kkonganti@11: "dataFilters": [], kkonganti@11: "chartFilters": [], kkonganti@11: "searchOperator": "includes", kkonganti@11: "searchValue": "", kkonganti@11: "selection": [], kkonganti@11: "selectionBreakdownField": None, kkonganti@11: }, kkonganti@11: "maps": {}, kkonganti@11: "meta": {"name": ProjectName}, kkonganti@11: "trees": { kkonganti@11: "tree-1": { kkonganti@11: "alignLabels": False, kkonganti@11: "blockHeaderFontSize": 13, kkonganti@11: "blockPadding": 0, kkonganti@11: "blocks": ["MLST_SEQUENCE_TYPE", "ISOLATION_SOURCE"], kkonganti@11: "blockSize": 14, kkonganti@11: "branchLengthsDigits": 4, kkonganti@11: "controls": True, kkonganti@11: "fontSize": 16, kkonganti@11: "hideOrphanDataRows": False, kkonganti@11: "ids": None, kkonganti@11: "internalLabelsFilterRange": [0, 100], kkonganti@11: "internalLabelsFontSize": 13, kkonganti@11: "lasso": False, kkonganti@11: "nodeSize": 14, kkonganti@11: "path": None, kkonganti@11: "roundBranchLengths": True, kkonganti@11: "scaleLineAlpha": True, kkonganti@11: "showBlockHeaders": True, kkonganti@11: "showBlockLabels": False, kkonganti@11: "showBranchLengths": False, kkonganti@11: "showEdges": True, kkonganti@11: "showInternalLabels": False, kkonganti@11: "showLabels": True, kkonganti@11: "showLeafLabels": True, kkonganti@11: "showPiecharts": True, kkonganti@11: "showShapeBorders": True, kkonganti@11: "showShapes": True, kkonganti@11: "styleLeafLabels": False, kkonganti@11: "styleNodeEdges": False, kkonganti@11: "subtreeIds": None, kkonganti@11: "type": "rc", kkonganti@11: "title": "Tree", kkonganti@11: "labelField": "ID", kkonganti@11: "file": file_tree, kkonganti@11: } kkonganti@11: }, kkonganti@11: "tables": { kkonganti@11: "table-1": { kkonganti@11: "displayMode": "cosy", kkonganti@11: "hideUnselected": False, kkonganti@11: "title": "Metadata", kkonganti@11: "paneId": "table-1", kkonganti@11: "columns": columns, kkonganti@11: "file": file_csv, kkonganti@11: } kkonganti@11: }, kkonganti@11: "views": [], kkonganti@11: "schema": "https://microreact.org/schema/v1.json", kkonganti@11: } kkonganti@11: micro_path = os.path.join(os.getcwd(), ProjectName + ".microreact") kkonganti@11: kkonganti@11: with open(micro_path, "w") as microreact_file: kkonganti@11: json.dump(microreact_data, microreact_file, indent=2) kkonganti@11: microreact_file.close() kkonganti@11: microreact_file.close() kkonganti@11: kkonganti@11: return micro_path kkonganti@11: kkonganti@11: kkonganti@11: # Main kkonganti@11: def main() -> None: kkonganti@11: """ kkonganti@11: Will take as input a folder containing 2 files, a tree file and a metadata CSV file kkonganti@11: and upload it to a new project named "Cronology" and get a publicly shareable link from kkonganti@11: microreact.org once the upload is successful kkonganti@11: kkonganti@11: """ kkonganti@11: # Debug print. kkonganti@11: ppp = pprint.PrettyPrinter(width=55) kkonganti@11: kkonganti@11: # Set logging. kkonganti@11: logging.basicConfig( kkonganti@11: format="\n" kkonganti@11: + "=" * 55 kkonganti@11: + "\n%(asctime)s - %(levelname)s\n" kkonganti@11: + "=" * 55 kkonganti@11: + "\n%(message)s\n\n", kkonganti@11: level=logging.DEBUG, kkonganti@11: ) kkonganti@11: kkonganti@11: # Turn off SSL warnings kkonganti@11: urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) kkonganti@11: kkonganti@11: prog_name = os.path.basename(inspect.stack()[0].filename) kkonganti@11: kkonganti@11: parser = argparse.ArgumentParser( kkonganti@11: prog=prog_name, description=main.__doc__, formatter_class=MultiArgFormatClasses kkonganti@11: ) kkonganti@11: kkonganti@11: # Add required arguments kkonganti@11: required = parser.add_argument_group("required arguments") kkonganti@11: kkonganti@11: required.add_argument( kkonganti@11: "-dir", kkonganti@11: dest="dir", kkonganti@11: default=False, kkonganti@11: required=True, kkonganti@11: help="UNIX path to diretory containing the tree and all other\ndataset and annotation files." kkonganti@11: + " Your tree file and metadata files must\nhave the extension .nwk and .csv", kkonganti@11: ) kkonganti@11: required.add_argument( kkonganti@11: "-atp", kkonganti@11: dest="AccessTokenPath", kkonganti@11: default=False, kkonganti@11: required=True, kkonganti@11: help="The path to your API Access Token needed for uploading.\n" kkonganti@11: + "File must be a .txt file.", kkonganti@11: ) kkonganti@11: parser.add_argument( kkonganti@11: "-name", kkonganti@11: dest="ProjectName", kkonganti@11: default="Project", kkonganti@11: required=False, kkonganti@11: help="Name for the project you want to upload", kkonganti@11: ) kkonganti@11: kkonganti@11: # Define defaults kkonganti@11: args = parser.parse_args() kkonganti@11: upload_url = "https://microreact.org/api/projects/create" kkonganti@11: folder_path = args.dir kkonganti@11: ProjectName = args.ProjectName kkonganti@11: micro_url_info_path = os.path.join(os.getcwd(), "microreact_url.txt") kkonganti@11: kkonganti@11: with open(args.AccessTokenPath, "r") as token: kkonganti@11: atp = token.readline() kkonganti@11: token.close() kkonganti@11: kkonganti@11: tree_path = check_file_extension(folder_path) kkonganti@11: metadata_csv = check_csv(folder_path) kkonganti@11: metadata_64 = encode_file(metadata_csv) kkonganti@11: tree_64 = encode_file(tree_path) kkonganti@11: kkonganti@11: # Prepare the data to be sent in the request kkonganti@11: micro_path = create_json( kkonganti@11: metadata_csv, tree_path, metadata_64, tree_64, ProjectName, folder_path kkonganti@11: ) kkonganti@11: kkonganti@11: f = open(micro_path) kkonganti@11: data = json.load(f) kkonganti@11: f.close() kkonganti@11: kkonganti@11: # Additional parameters, including the MicroReact API key kkonganti@11: headers = {"Content-type": "application/json; charset=UTF-8", "Access-Token": atp} kkonganti@11: kkonganti@11: # Make the POST request to Microreact kkonganti@11: r = requests.post(upload_url, json=data, headers=headers, verify=False) kkonganti@11: kkonganti@11: if not r.ok: kkonganti@11: if r.status_code == 400: kkonganti@11: logging.error("Microreact API call failed with response " + r.text + "\n") kkonganti@11: else: kkonganti@11: logging.error( kkonganti@11: "Microreact API call failed with unknown response code " kkonganti@11: + str(r.status_code) kkonganti@11: + "\n" kkonganti@11: ) kkonganti@11: exit(1) kkonganti@11: if r.status_code == 200: kkonganti@11: r_json = json.loads(r.text) kkonganti@11: with open(micro_url_info_path, "w") as out_fh: kkonganti@11: out_fh.write( kkonganti@11: f"Uploaded successfully!\n\nYour project URL:\n{r_json['url']}" kkonganti@11: ) kkonganti@11: out_fh.close() kkonganti@11: kkonganti@11: kkonganti@11: if __name__ == "__main__": kkonganti@11: main()