Mercurial > repos > rliterman > csp2
view CSP2/bin/compileMUMmer.py @ 28:893a6993efe3
"planemo upload"
author | rliterman |
---|---|
date | Wed, 04 Dec 2024 13:48:13 -0500 |
parents | 01431fa12065 |
children |
line wrap: on
line source
#!/usr/bin/env python3 import sys import os import pandas as pd import re from pybedtools import BedTool,helpers,Interval import warnings import numpy as np import hashlib from Bio import SeqIO import subprocess import uuid import traceback import shutil import time import argparse warnings.filterwarnings("ignore") def parseMUmmerReport(mum_report_dir,report_id): report_data = [] with open(mum_report_dir+"/"+report_id+".report", 'r') as report_file: for line in report_file.readlines(): split_line = re.split(r'\s+', line.strip()) if len(split_line) == 3: report_data.append( { 'Measure': split_line[0].split("(", 1)[0], 'Ref_Value':split_line[1].split("(", 1)[0], 'Query_Value':split_line[2].split("(", 1)[0] }) report_data = pd.DataFrame(report_data) report_data = report_data[report_data['Measure'].isin(['TotalSeqs','TotalBases','AlignedBases', 'TotalSNPs','TotalGSNPs','TotalIndels','TotalGIndels', 'Breakpoints','Relocations','Translocations','Inversions', 'Insertions','TandemIns'])] report_data = report_data.drop_duplicates(subset=['Measure']) # Drop M-to-M report_data = report_data.astype({"Ref_Value":"int","Query_Value":"int"}) # Fetch assembly data ref_bases = report_data[report_data['Measure'] == 'TotalBases']['Ref_Value'].iloc[0] query_bases = report_data[report_data['Measure'] == 'TotalBases']['Query_Value'].iloc[0] # Fetch alignment data ref_aligned = report_data[report_data['Measure'] == 'AlignedBases']['Ref_Value'].iloc[0] query_aligned = report_data[report_data['Measure'] == 'AlignedBases']['Query_Value'].iloc[0] percent_ref_aligned = 100*(float(ref_aligned)/ref_bases) percent_query_aligned = 100*(float(query_aligned)/query_bases) # Fetch SNP data g_snps = report_data[report_data['Measure'] == 'TotalGSNPs']['Ref_Value'].iloc[0] g_indels = report_data[report_data['Measure'] == 'TotalGIndels']['Ref_Value'].iloc[0] ref_breakpoints = report_data[report_data['Measure'] == 'Breakpoints']['Ref_Value'].iloc[0] query_breakpoints = report_data[report_data['Measure'] == 'Breakpoints']['Query_Value'].iloc[0] ref_relocations = report_data[report_data['Measure'] == 'Relocations']['Ref_Value'].iloc[0] query_relocations = report_data[report_data['Measure'] == 'Relocations']['Query_Value'].iloc[0] ref_translocations = report_data[report_data['Measure'] == 'Translocations']['Ref_Value'].iloc[0] query_translocations = report_data[report_data['Measure'] == 'Translocations']['Query_Value'].iloc[0] ref_inversions = report_data[report_data['Measure'] == 'Inversions']['Ref_Value'].iloc[0] query_inversions = report_data[report_data['Measure'] == 'Inversions']['Query_Value'].iloc[0] ref_insertions = report_data[report_data['Measure'] == 'Insertions']['Ref_Value'].iloc[0] query_insertions = report_data[report_data['Measure'] == 'Insertions']['Query_Value'].iloc[0] ref_tandem = report_data[report_data['Measure'] == 'TandemIns']['Ref_Value'].iloc[0] query_tandem = report_data[report_data['Measure'] == 'TandemIns']['Query_Value'].iloc[0] return [ref_bases,percent_ref_aligned, query_bases,percent_query_aligned, g_snps,g_indels, ref_breakpoints,query_breakpoints, ref_relocations,query_relocations, ref_translocations,query_translocations, ref_inversions,query_inversions, ref_insertions,query_insertions, ref_tandem,query_tandem] def parseMUmmerCoords(mum_coords_dir,report_id,reference_chr_bed,query_chr_bed): coords_file = pd.read_csv(mum_coords_dir+"/"+report_id+".1coords",sep="\t",index_col=False, names=['Ref_Start','Ref_End','Query_Start','Query_End', 'Ref_Aligned','Query_Aligned','Perc_Iden', 'Ref_Length','Query_Length','Ref_Cov', 'Query_Cov','Ref_Contig','Query_Contig'], dtype={ 'Ref_Start': int, 'Ref_End': int, 'Query_Start': int, 'Query_End': int, 'Ref_Aligned': int, 'Query_Aligned': int, 'Perc_Iden': float, 'Ref_Length': int, 'Query_Length': int, 'Ref_Cov': float, 'Query_Cov': float, 'Ref_Contig': str, 'Query_Contig': str }) if coords_file.shape[0] > 0: coords_file['Ref_Start'] = (coords_file['Ref_Start'] - 1) coords_file = coords_file.apply(adjust_query_row, axis=1) merged_ref_bed = BedTool.from_dataframe(coords_file[['Ref_Contig','Ref_Start','Ref_End']]).sort().merge() merged_query_bed = BedTool.from_dataframe(coords_file[['Query_Contig','Query_Start','Query_End']]).sort().merge() covered_ref_regions = reference_chr_bed.intersect(merged_ref_bed) uncovered_ref_regions = reference_chr_bed.subtract(merged_ref_bed) covered_query_regions = query_chr_bed.intersect(merged_query_bed) uncovered_query_regions = query_chr_bed.subtract(merged_query_bed) else: merged_ref_bed = BedTool([]) merged_query_bed = BedTool([]) covered_ref_regions = BedTool([]) uncovered_ref_regions = BedTool([]) covered_query_regions = BedTool([]) uncovered_query_regions = BedTool([]) ref_genome_size = calculate_total_length(reference_chr_bed) ref_covered_size = calculate_total_length(covered_ref_regions) ref_uncovered_size = calculate_total_length(uncovered_ref_regions) query_genome_size = calculate_total_length(query_chr_bed) query_covered_size = calculate_total_length(covered_query_regions) query_uncovered_size = calculate_total_length(uncovered_query_regions) assert ref_covered_size + ref_uncovered_size == ref_genome_size assert query_covered_size + query_uncovered_size == query_genome_size # Create return_df return_columns = ['Ref_Contig','Ref_Start','Ref_End','Ref_Length','Ref_Aligned','Query_Contig','Query_Start','Query_End','Query_Length','Query_Aligned','Perc_Iden'] return_df = pd.DataFrame(columns=return_columns) if coords_file.shape[0] > 0: return_df = pd.concat([return_df,coords_file[return_columns]],ignore_index=True) if uncovered_ref_regions.count() > 0: return_uncovered_df = uncovered_ref_regions.to_dataframe().sort_values(['chrom','start']).rename(columns={'chrom':'Ref_Contig','start':'Ref_Start','end':'Ref_End'}).assign(Ref_Aligned = ".",Query_Aligned = ".",Query_Contig=".",Query_Start=".",Query_End=".",Ref_Length = ".",Query_Length=".",Perc_Iden = ".") return_df = pd.concat([return_df,return_uncovered_df[return_columns]],ignore_index=True) if uncovered_query_regions.count() > 0: return_uncovered_df = uncovered_query_regions.to_dataframe().sort_values(['chrom','start']).rename(columns={'chrom':'Query_Contig','start':'Query_Start','end':'Query_End'}).assign(Ref_Aligned = ".",Query_Aligned = ".",Ref_Contig=".",Ref_Start=".",Ref_End=".",Ref_Length = ".",Query_Length=".",Perc_Iden = ".") return_df = pd.concat([return_df,return_uncovered_df[return_columns]],ignore_index=True) return return_df def adjust_query_row(row): if set(['Query_Start', 'Query_End', 'Query_Contig']).issubset(row.index): if row['Query_Start'] > row['Query_End']: row['Query_Start'], row['Query_End'] = row['Query_End'] - 1, row['Query_Start'] else: row['Query_Start'] = row['Query_Start'] - 1 return row def parseMUmmerSNPs(mum_snps_dir,report_id,coords_file): return_columns = ['Ref_Contig','Start_Ref','Ref_Pos', 'Query_Contig','Start_Query','Query_Pos', 'Ref_Loc','Query_Loc', 'Ref_Start','Ref_End', 'Query_Start','Query_End', 'Ref_Base','Query_Base', 'Dist_to_Ref_End','Dist_to_Query_End', 'Ref_Aligned','Query_Aligned', 'Query_Direction','Perc_Iden','Cat'] snp_file = pd.read_csv(mum_snps_dir+"/"+report_id+".snps",sep="\t",index_col=False, names=['Ref_Pos','Ref_Base','Query_Base','Query_Pos', 'SNP_Buffer','Dist_to_End','Ref_Length','Query_Length', 'Ref_Direction','Query_Direction','Ref_Contig','Query_Contig']) int_columns = ['Ref_Pos', 'Query_Pos', 'Dist_to_End','Ref_Length', 'Query_Length'] contig_columns = ['Ref_Contig','Query_Contig'] if snp_file.shape[0] == 0: return pd.DataFrame(columns=return_columns) else: for col in int_columns: snp_file.loc[:, col] = snp_file.loc[:, col].astype(float).astype(int) for col in contig_columns: snp_file.loc[:, col] = snp_file.loc[:, col].astype(str) coords_file.loc[:, col] = coords_file.loc[:, col].astype(str) snp_file['Start_Ref'] = snp_file['Ref_Pos'] - 1 snp_file['Start_Query'] = snp_file['Query_Pos'] - 1 snp_file['Dist_to_Ref_End'] = [min([x,y]) for x,y in zip(snp_file['Ref_Pos'], snp_file['Ref_Length'] - snp_file['Ref_Pos'])] snp_file['Dist_to_Query_End'] = [min([x,y]) for x,y in zip(snp_file['Query_Pos'],snp_file['Query_Length'] - snp_file['Query_Pos'])] snp_file['Ref_Loc'] = ["/".join([str(x[0]),str(x[1])]) for x in list(zip(snp_file.Ref_Contig, snp_file.Ref_Pos))] snp_file['Query_Loc'] = ["/".join([str(x[0]),str(x[1])]) for x in list(zip(snp_file.Query_Contig, snp_file.Query_Pos))] total_snp_count = snp_file.shape[0] valid_bases = ['a', 'A', 'c', 'C', 'g', 'G', 't', 'T',"."] invalid_file = snp_file[(~snp_file['Ref_Base'].isin(valid_bases)) | (~snp_file['Query_Base'].isin(valid_bases))] snp_file = snp_file[(snp_file['Ref_Base'].isin(valid_bases)) & (snp_file['Query_Base'].isin(valid_bases))] indel_file = snp_file[(snp_file['Query_Base'] == ".") | (snp_file['Ref_Base'] == ".")] snp_file = snp_file[~((snp_file['Query_Base'] == ".") | (snp_file['Ref_Base'] == "."))] if total_snp_count >= 10000: return (snp_file.shape[0],indel_file.shape[0],invalid_file.shape[0]) else: if snp_file.shape[0] == 0: snp_coords = pd.DataFrame(columns=return_columns) else: snp_coords = makeSNPCoords(snp_file,coords_file,snp_file.shape[0]) snp_coords['Cat'] = "SNP" if indel_file.shape[0] == 0: indel_coords = pd.DataFrame(columns=return_columns) else: indel_coords = makeSNPCoords(indel_file,coords_file,indel_file.shape[0]) indel_coords['Cat'] = "Indel" if invalid_file.shape[0] == 0: invalid_coords = pd.DataFrame(columns=return_columns) else: invalid_coords = makeSNPCoords(invalid_file,coords_file,invalid_file.shape[0]) invalid_coords['Cat'] = "Invalid" return_df = pd.concat([snp_coords,indel_coords,invalid_coords],ignore_index=True)[return_columns] assert return_df.shape[0] == total_snp_count return return_df def makeSNPCoords(snp_df, coords_df, row_count): snp_coords = pd.merge(snp_df, coords_df, how='left').dropna() condition = ((snp_coords['Ref_Start'] <= snp_coords['Ref_Pos']) & (snp_coords['Ref_Pos'] <= snp_coords['Ref_End']) & (snp_coords['Query_Start'] <= snp_coords['Query_Pos']) & (snp_coords['Query_Pos'] <= snp_coords['Query_End'])) snp_coords = snp_coords[condition] if row_count != snp_coords.shape[0]: snp_coords.sort_values(by=['Ref_Aligned', 'Perc_Iden'], ascending=False, inplace=True) snp_coords.drop_duplicates(subset=['Ref_Loc','Query_Loc'], keep='first', inplace=True) return snp_coords def fasta_info(file_path): records = list(SeqIO.parse(file_path, 'fasta')) contig_count = int(len(records)) lengths = sorted([len(record) for record in records], reverse=True) assembly_bases = sum(lengths) with open(file_path, 'rb') as file: sha256 = hashlib.sha256(file.read()).hexdigest() cumulative_length = 0 n50 = None n90 = None l50 = None l90 = None for i, length in enumerate(lengths, start=1): cumulative_length += length if cumulative_length >= assembly_bases * 0.5 and n50 is None: n50 = length l50 = i if cumulative_length >= assembly_bases * 0.9 and n90 is None: n90 = length l90 = i if n50 is not None and n90 is not None: break return [file_path,contig_count,assembly_bases,n50,n90,l50,l90,sha256] def fasta_to_bedtool(fasta_file): intervals = [] for record in SeqIO.parse(fasta_file, "fasta"): chrom_name = record.id chrom_length = len(record.seq) interval = Interval(chrom_name, 0, chrom_length) intervals.append(interval) bedtool = BedTool(intervals).sort() return bedtool def calculate_total_length(bedtool): return sum(len(interval) for interval in bedtool) def get_kmers(command, log_file, retries=10, delay=5, timeout=30): kmer_set = set() for attempt in range(retries): try: process = subprocess.run(command, capture_output=True, text=True, timeout=timeout) kmer_set.update(process.stdout.strip().split('\n')) if len(kmer_set) > 1: return kmer_set except subprocess.TimeoutExpired: with open(log_file, "a") as log: log.write(f"Kmer command timed out after {timeout} seconds\n") time.sleep(delay) except subprocess.CalledProcessError as e: with open(log_file, "a") as log: log.write(f"Kmer command errored out: {e}\n") time.sleep(delay) # Wait before retrying if this is not the last attempt if attempt < retries - 1: time.sleep(delay) else: raise RuntimeError(f"Command failed after {retries} attempts: {command}") return None def compare_kmers(query_file,reference_file,log_file): ref_command = ["kmercountexact.sh", f"in={reference_file}", "threads=1", "fastadump=f", "out=stdout", "|", "cut", "-f1"] query_command = ["kmercountexact.sh", f"in={query_file}", "threads=1", "fastadump=f", "out=stdout", "|", "cut", "-f1"] ref_kmers = get_kmers(ref_command,log_file) with open(log_file, "a") as log: log.write(f"Fetched reference kmers...\n") query_kmers = get_kmers(query_command,log_file) with open(log_file, "a") as log: log.write(f"Fetched query kmers...\n") intersection = len(ref_kmers.intersection(query_kmers)) similarity = 100*(intersection/(len(ref_kmers) + len(query_kmers) - intersection)) unique_ref = len(ref_kmers.difference(query_kmers)) unique_query = len(query_kmers.difference(ref_kmers)) return [len(ref_kmers),len(query_kmers), unique_ref,unique_query, intersection,similarity] #### 01: Read in arguments #### run_failed = False parser = argparse.ArgumentParser(description='CSP2 MUMmer Compilation') parser.add_argument('--query', type=str, help='Query') parser.add_argument('--query_fasta', type=str, help='Query Fasta') parser.add_argument('--reference', type=str, help='Reference') parser.add_argument('--reference_fasta', type=str, help='Reference Fasta') parser.add_argument('--mummer_dir', type=str, help='MUMmer Directory') parser.add_argument('--snpdiffs_dir', type=str, help='snpdiffs Directory') parser.add_argument('--temp_dir', type=str, default='', help='Temporary Directory') parser.add_argument('--log_file', type=str, help='Log File') args = parser.parse_args() query = args.query query_fasta = args.query_fasta reference = args.reference reference_fasta = args.reference_fasta mummer_dir = os.path.normpath(os.path.abspath(args.mummer_dir)) snpdiffs_dir = os.path.normpath(os.path.abspath(args.snpdiffs_dir)) log_file = os.path.normpath(os.path.abspath(args.log_file)) with open(log_file, "w") as log: log.write(f"CSP2 MUMmer Compilation Log: {query}__vs__{reference}\n") log.write(f"\t- Query Fasta: {query_fasta}\n") log.write(f"\t- Reference Fasta: {reference_fasta}\n") log.write(f"\t - MUMmer Directory: {mummer_dir}\n") log.write(f"\t - snpdiffs Directory: {snpdiffs_dir}\n") # Set TMP global temp_dir if args.temp_dir != "": random_temp_id = str(uuid.uuid4()) temp_dir = f"{os.path.normpath(os.path.abspath(args.temp_dir))}/{random_temp_id}" try: os.mkdir(temp_dir) helpers.set_tempdir(temp_dir) with open(log_file, "a") as log: log.write(f"\t - Temporary Directory: {temp_dir}\n") except OSError as e: run_failed = True print(f"Error: Failed to create directory '{temp_dir}': {e}") else: temp_dir = "" with open(log_file, "a") as log: log.write("-------------------------------------------------------\n\n") try: # Get query data query_data = [query] + fasta_info(query_fasta) query_string = [x+":"+str(y) for x,y in zip(['Query_ID','Query_Assembly','Query_Contig_Count','Query_Assembly_Bases', 'Query_N50','Query_N90','Query_L50','Query_L90','Query_SHA256'],query_data)] with open(log_file, "a") as log: log.write("Fetched Query Data...\n") # Get reference data reference_data = [reference] + fasta_info(reference_fasta) reference_string = [x+":"+str(y) for x,y in zip(['Reference_ID','Reference_Assembly','Reference_Contig_Count','Reference_Assembly_Bases', 'Reference_N50','Reference_N90','Reference_L50','Reference_L90','Reference_SHA256'],reference_data)] with open(log_file, "a") as log: log.write("Fetched Reference Data...\n") # Get kmer distance [ref_kmers,query_kmers, unique_ref_kmers,unique_query_kmers, kmer_intersection,kmer_similarity] = compare_kmers(query_fasta,reference_fasta,log_file) with open(log_file, "a") as log: log.write("Fetched Kmer Data...\n") # Create reference BED file using fasta seq lengths query_chr_bed = fasta_to_bedtool(query_fasta) reference_chr_bed = fasta_to_bedtool(reference_fasta) with open(log_file, "a") as log: log.write("Created BED files...\n") # Set report ID report_id = query + "__vs__" + reference snpdiffs_file = snpdiffs_dir + "/" + report_id + ".snpdiffs" # Create NA variables for all downstream options median_percent_identity = "NA" median_alignment_length = "NA" total_snp_count = "NA" total_indel_count = "NA" total_invalid_count = "NA" #### 02: Read in MUMmer report data #### [ref_bases,percent_ref_aligned, query_bases,percent_query_aligned, g_snps,g_indels, ref_breakpoints,query_breakpoints, ref_relocations,query_relocations, ref_translocations,query_translocations, ref_inversions,query_inversions, ref_insertions,query_insertions, ref_tandem,query_tandem] = parseMUmmerReport(mummer_dir,report_id) with open(log_file, "a") as log: log.write("Parsed MUMmer report...\n") if percent_ref_aligned > 0: #### 03: Process MUMmer coords file #### coords_file = parseMUmmerCoords(mummer_dir,report_id,reference_chr_bed,query_chr_bed) with open(log_file, "a") as log: log.write("Parsed MUMmer coords...\n") aligned_coords = coords_file[~((coords_file['Query_Contig'] == ".") | (coords_file['Ref_Contig'] == "."))] if aligned_coords.shape[0] > 0: aligned_coords['Perc_Iden'] = aligned_coords['Perc_Iden'].astype(float) aligned_coords['Ref_Aligned'] = aligned_coords['Ref_Aligned'].astype(int) median_percent_identity = np.median(aligned_coords['Perc_Iden']) median_alignment_length = np.median(aligned_coords['Ref_Aligned']) ##### 04: Process MUMmer SNP file #### processed_snps = parseMUmmerSNPs(mummer_dir,report_id,aligned_coords) with open(log_file, "a") as log: log.write("Parsed MUMmer SNPs...\n") # Check if processed_snps is a df or a tuple if isinstance(processed_snps, tuple): total_snp_count,total_indel_count,total_invalid_count = processed_snps else: total_snp_count = processed_snps[processed_snps['Cat'] == "SNP"].shape[0] total_indel_count = processed_snps[processed_snps['Cat'] == "Indel"].shape[0] total_invalid_count = processed_snps[processed_snps['Cat'] == "Invalid"].shape[0] # Clean up pybedtools temp helpers.cleanup(verbose=False,remove_all = False) with open(log_file, "a") as log: log.write("Cleaned up TMP...\n") # Create header percent_ref_aligned = f"{percent_ref_aligned:.2f}" if percent_ref_aligned != "NA" else percent_ref_aligned percent_query_aligned = f"{percent_query_aligned:.2f}" if percent_query_aligned != "NA" else percent_query_aligned median_percent_identity = f"{median_percent_identity:.2f}" if median_percent_identity != "NA" else median_percent_identity median_alignment_length = f"{median_alignment_length:.2f}" if median_alignment_length != "NA" else median_alignment_length total_snp_count = f"{total_snp_count:.0f}" if total_snp_count != "NA" else total_snp_count total_indel_count = f"{total_indel_count:.0f}" if total_indel_count != "NA" else total_indel_count total_invalid_count = f"{total_invalid_count:.0f}" if total_invalid_count != "NA" else total_invalid_count ref_breakpoints = f"{ref_breakpoints:.0f}" ref_relocations = f"{ref_relocations:.0f}" ref_translocations = f"{ref_translocations:.0f}" ref_inversions = f"{ref_inversions:.0f}" ref_insertions = f"{ref_insertions:.0f}" ref_tandem = f"{ref_tandem:.0f}" query_breakpoints = f"{query_breakpoints:.0f}" query_relocations = f"{query_relocations:.0f}" query_translocations = f"{query_translocations:.0f}" query_inversions = f"{query_inversions:.0f}" query_insertions = f"{query_insertions:.0f}" query_tandem = f"{query_tandem:.0f}" g_snps = f"{g_snps:.0f}" g_indels = f"{g_indels:.0f}" ref_kmers = f"{ref_kmers:.0f}" query_kmers = f"{query_kmers:.0f}" unique_ref_kmers = f"{unique_ref_kmers:.0f}" unique_query_kmers = f"{unique_query_kmers:.0f}" kmer_intersection = f"{kmer_intersection:.0f}" kmer_similarity = f"{kmer_similarity:.2f}" snpdiffs_header=[] snpdiffs_header.append("#\t" + "\t".join(query_string) + "\t" + "\t".join(reference_string) + "\t" + "\t".join([ "SNPs:"+total_snp_count, "Reference_Percent_Aligned:"+percent_ref_aligned, "Query_Percent_Aligned:"+percent_query_aligned, "Median_Percent_Identity:"+median_percent_identity, "Median_Alignment_Length:"+median_alignment_length, "Kmer_Similarity:"+kmer_similarity, "Shared_Kmers:"+kmer_intersection, "Reference_Unique_Kmers:"+unique_ref_kmers, "Query_Unique_Kmers:"+unique_query_kmers, "Reference_Breakpoints:"+ref_breakpoints, "Query_Breakpoints:"+query_breakpoints, "Reference_Relocations:"+ref_relocations, "Query_Relocations:"+query_relocations, "Reference_Translocations:"+ref_translocations, "Query_Translocations:"+query_translocations, "Reference_Inversions:"+ref_inversions, "Query_Inversions:"+query_inversions, "Reference_Insertions:"+ref_insertions, "Query_Insertions:"+query_insertions, "Reference_Tandem:"+ref_tandem, "Query_Tandem:"+query_tandem, "Indels:"+total_indel_count, "Invalid:"+total_invalid_count, "gSNPs:"+g_snps, "gIndels:"+g_indels])) with open(log_file, "a") as log: log.write("Prepped snpdiffs output...\n") with open(snpdiffs_file,"w") as file: file.write("\n".join(snpdiffs_header) + "\n") for index, row in coords_file.iterrows(): file.write("##\t" + "\t".join(map(str, row))+"\n") if isinstance(processed_snps, tuple): pass else: for index, row in processed_snps.iterrows(): file.write("\t".join(map(str, row))+"\n") with open(log_file, "a") as log: log.write("Saved snpdiffs file...\n") print(",".join([query,reference,snpdiffs_file])) except: run_failed = True print("Exception occurred:\n", traceback.format_exc()) finally: helpers.cleanup(verbose=False, remove_all=False) if temp_dir != "": shutil.rmtree(temp_dir) if run_failed: sys.exit(1)