Mercurial > repos > rliterman > csp2
comparison CSP2/bin/compileSNPResults.py @ 39:93393808f415
"planemo upload"
author | rliterman |
---|---|
date | Thu, 12 Dec 2024 13:53:15 -0500 |
parents | 01431fa12065 |
children |
comparison
equal
deleted
inserted
replaced
38:ee512a230a1e | 39:93393808f415 |
---|---|
465 log.write("-There were no QC warnings or failures\n") | 465 log.write("-There were no QC warnings or failures\n") |
466 | 466 |
467 # Output data | 467 # Output data |
468 | 468 |
469 # Mean assembly stats | 469 # Mean assembly stats |
470 isolate_mean_df.reset_index().to_csv(mean_isolate_file,sep='\t',index=False) | 470 with open(mean_isolate_file, 'w') as f: |
471 isolate_mean_df.reset_index().to_csv(f,sep='\t',index=False) | |
471 | 472 |
472 # Isolate assembly stats | 473 # Isolate assembly stats |
473 isolate_assembly_stats = isolate_stats.loc[isolate_stats['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])].drop(['Min','Max','StdDev','Count'],axis=1).rename(columns = {'Mean':'Value'}) | 474 isolate_assembly_stats = isolate_stats.loc[isolate_stats['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])].drop(['Min','Max','StdDev','Count'],axis=1).rename(columns = {'Mean':'Value'}) |
474 isolate_assembly_stats.to_csv(isolate_assembly_stats_file,sep='\t',index=False) | 475 with open(isolate_assembly_stats_file,'w') as f: |
476 isolate_assembly_stats.to_csv(f,sep='\t',index=False) | |
475 | 477 |
476 # Isolate alignment stats | 478 # Isolate alignment stats |
477 isolate_align_stats = pd.concat([align_stats,isolate_cocalled_stats,isolate_snp_stats,isolate_stdev_stats]).reset_index(drop=True) | 479 isolate_align_stats = pd.concat([align_stats,isolate_cocalled_stats,isolate_snp_stats,isolate_stdev_stats]).reset_index(drop=True) |
478 for col in ['Min', 'Mean', 'Max', 'StdDev', 'Zscore']: | 480 for col in ['Min', 'Mean', 'Max', 'StdDev', 'Zscore']: |
479 isolate_align_stats[col] = isolate_align_stats[col].astype("float").round(3) | 481 isolate_align_stats[col] = isolate_align_stats[col].astype("float").round(3) |
480 isolate_align_stats.to_csv(align_stats_file,sep='\t',index=False) | 482 with open(align_stats_file,'w') as f: |
483 isolate_align_stats.to_csv(f,sep='\t',index=False) | |
481 | 484 |
482 # Reference Assembly Stats | 485 # Reference Assembly Stats |
483 ref_align_summary_df = ref_summary_df.loc[(~ref_summary_df['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])) & (~pd.isna(ref_summary_df['Zscore']))] | 486 ref_align_summary_df = ref_summary_df.loc[(~ref_summary_df['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])) & (~pd.isna(ref_summary_df['Zscore']))] |
484 ref_mean_summary_df = ref_summary_df.loc[(~ref_summary_df['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])) & (pd.isna(ref_summary_df['Zscore']))].drop(['Zscore','QC'],axis =1) | 487 ref_mean_summary_df = ref_summary_df.loc[(~ref_summary_df['Measure'].isin(['Contig_Count','Assembly_Bases','L50','L90','N50','N90'])) & (pd.isna(ref_summary_df['Zscore']))].drop(['Zscore','QC'],axis =1) |
485 ref_mean_summary_df['Zscore'] = np.nan | 488 ref_mean_summary_df['Zscore'] = np.nan |
490 ref_mean_summary_df = pd.concat([ref_mean_summary_df,ref_summary_preserved_df]) | 493 ref_mean_summary_df = pd.concat([ref_mean_summary_df,ref_summary_preserved_df]) |
491 | 494 |
492 ref_isolate_align_stats = align_stats.loc[(align_stats['Isolate_Type'] == "Reference") & (align_stats['Measure'].isin(['Self_Aligned','Compare_Aligned']))].drop(['Isolate_Type'],axis=1).rename(columns = {'Isolate_ID':'Reference_ID'})[['Reference_ID','Measure','Mean','StdDev','Min','Max','Count','Zscore','QC']] | 495 ref_isolate_align_stats = align_stats.loc[(align_stats['Isolate_Type'] == "Reference") & (align_stats['Measure'].isin(['Self_Aligned','Compare_Aligned']))].drop(['Isolate_Type'],axis=1).rename(columns = {'Isolate_ID':'Reference_ID'})[['Reference_ID','Measure','Mean','StdDev','Min','Max','Count','Zscore','QC']] |
493 | 496 |
494 ref_mean_summary_stats = pd.concat([ref_mean_summary_df,ref_isolate_align_stats]) | 497 ref_mean_summary_stats = pd.concat([ref_mean_summary_df,ref_isolate_align_stats]) |
495 ref_mean_summary_stats.to_csv(ref_mean_summary_file,sep='\t',index=False) | 498 |
499 with open(ref_mean_summary_file,'w') as f: | |
500 ref_mean_summary_stats.to_csv(f,sep='\t',index=False) | |
496 | 501 |
497 end_time = time.time() | 502 end_time = time.time() |
498 | 503 |
499 with open(log_file,"a+") as log: | 504 with open(log_file,"a+") as log: |
500 log.write(f"\n- Completed compilation in {end_time - start_time:.2f} seconds\n") | 505 log.write(f"\n- Completed compilation in {end_time - start_time:.2f} seconds\n") |
503 log.write(f"\t- Saved isolate alignment data to {align_stats_file}\n") | 508 log.write(f"\t- Saved isolate alignment data to {align_stats_file}\n") |
504 log.write(f"\t- Saved reference summary data to {ref_mean_summary_file}\n") | 509 log.write(f"\t- Saved reference summary data to {ref_mean_summary_file}\n") |
505 | 510 |
506 # Comparisons if multiple refs | 511 # Comparisons if multiple refs |
507 if len(reference_ids) > 1: | 512 if len(reference_ids) > 1: |
508 comparison_df.to_csv(snp_comparison_file,sep="\t",index = False) | 513 with open(snp_comparison_file,"w") as f: |
514 comparison_df.to_csv(f,sep="\t",index = False) | |
509 log.write(f"\t- Saved SNP distance comparisons across references to {snp_comparison_file}\n") | 515 log.write(f"\t- Saved SNP distance comparisons across references to {snp_comparison_file}\n") |
510 | 516 |
511 # Failures/warnings | 517 # Failures/warnings |
512 if warn_fail_df.shape[0] > 0: | 518 if warn_fail_df.shape[0] > 0: |
513 warn_fail_df.to_csv(qc_file,sep="\t",index=False) | 519 with open(qc_file,"w") as f: |
520 warn_fail_df.to_csv(f,sep="\t",index=False) | |
514 log.write(f"\t- Saved QC warnings/failures to {qc_file}\n") | 521 log.write(f"\t- Saved QC warnings/failures to {qc_file}\n") |