Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/pytools/lib/common.py @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 #! /usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 """ | |
4 Function definitions common to all programs. | |
5 | |
6 | |
7 """ | |
8 | |
9 ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
10 ## libraries to use | |
11 | |
12 #import re | |
13 import os | |
14 import time | |
15 import sys | |
16 #import getpass | |
17 import logging | |
18 #from colorlog import ColoredFormatter | |
19 # import EnvironmentModules # get_read_count_fastq | |
20 from subprocess import Popen, PIPE | |
21 from email.mime.text import MIMEText | |
22 | |
23 | |
24 ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
25 ## function definitions | |
26 | |
27 | |
28 ''' | |
29 creates a logging instance | |
30 https://docs.python.org/2/howto/logging.html | |
31 https://pypi.python.org/pypi/colorlog | |
32 ''' | |
33 def get_logger(log_name, log_file, log_level = "INFO", stdout = False, color = False): | |
34 log = logging.getLogger(log_name) | |
35 handler = None | |
36 if stdout: | |
37 handler = logging.StreamHandler(sys.stdout) | |
38 else: | |
39 handler = logging.FileHandler(log_file) | |
40 | |
41 formatter = logging.Formatter('%(filename)-15s:%(process)d %(asctime)s %(levelname)s: %(message)s') | |
42 | |
43 if color and 1==2: | |
44 """ | |
45 formatter = ColoredFormatter("%(filename)-15s:%(process)d %(asctime)s %(log_color)s%(levelname)s: %(message)s", datefmt=None, reset=True, | |
46 log_colors={ | |
47 'DEBUG': 'blue', | |
48 'INFO': 'green', | |
49 'WARNING': 'yellow', | |
50 'ERROR': 'red', | |
51 'CRITICAL': 'red, bg_white', | |
52 }, | |
53 secondary_log_colors={}, | |
54 style='%') | |
55 Not working in conda - 2017-04-29 | |
56 """ | |
57 handler.setFormatter(formatter) | |
58 | |
59 log.addHandler(handler) | |
60 log.setLevel(log_level) | |
61 | |
62 return log | |
63 | |
64 | |
65 ''' | |
66 Checkpoint the status plus a timestamp | |
67 - appends the status | |
68 | |
69 @param status_log: /path/to/status.log (or whatever you name it) | |
70 @param status: status to append to status.log | |
71 | |
72 ''' | |
73 def checkpoint_step(status_log, status): | |
74 status_line = "%s,%s\n" % (status, time.strftime("%Y-%m-%d %H:%M:%S")) | |
75 | |
76 with open(status_log, "a") as myfile: | |
77 myfile.write(status_line) | |
78 | |
79 ''' | |
80 returns the last step (status) from the pipeline | |
81 @param status_log: /path/to/status.log (or whatever you name it) | |
82 @param log: logger object | |
83 | |
84 @return last status in the status log, "start" if nothing there | |
85 ''' | |
86 def get_status(status_log, log = None): | |
87 #status_log = "%s/%s" % (output_path, "test_status.log") | |
88 | |
89 status = "start" | |
90 timestamp = str(time.strftime("%Y-%m-%d %H:%M:%S")) | |
91 | |
92 if os.path.isfile(status_log): | |
93 fh = open(status_log, 'r') | |
94 lines = fh.readlines() | |
95 fh.close() | |
96 | |
97 for line in lines: | |
98 if line.startswith('#'): continue | |
99 line_list = line.split(",") | |
100 assert len(line_list) == 2 | |
101 status = str(line_list[0]).strip() | |
102 timestamp = str(line_list[1]).strip() | |
103 | |
104 if not status: | |
105 status = "start" | |
106 | |
107 if log: | |
108 log.info("Last checkpointed step: %s (%s)", status, timestamp) | |
109 | |
110 else: | |
111 if log: | |
112 log.info("Cannot find status.log (%s), assuming new run", status_log) | |
113 | |
114 status = status.strip().lower() | |
115 | |
116 return status | |
117 | |
118 | |
119 | |
120 ''' | |
121 run a command from python | |
122 | |
123 @param cmd: command to run | |
124 @param live: False = run in dry mode (print command), True = run normally | |
125 @param log: logger object | |
126 | |
127 @return std_out, std_err, exit_code | |
128 ''' | |
129 def run_command(cmd, live=False, log=None): | |
130 stdOut = None | |
131 stdErr = None | |
132 exitCode = None | |
133 #end = 0 | |
134 #elapsedSec = 0 | |
135 | |
136 if cmd: | |
137 if not live: | |
138 stdOut = "Not live: cmd = '%s'" % (cmd) | |
139 exitCode = 0 | |
140 else: | |
141 if log: log.info("cmd: %s" % (cmd)) | |
142 | |
143 p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) | |
144 stdOut, stdErr = p.communicate() | |
145 exitCode = p.returncode | |
146 | |
147 if log: | |
148 log.info("Return values: exitCode=" + str(exitCode) + ", stdOut=" + str(stdOut) + ", stdErr=" + str(stdErr)) | |
149 if exitCode != 0: | |
150 log.warn("- The exit code has non-zero value.") | |
151 | |
152 else: | |
153 if log: | |
154 log.error("- No command to run.") | |
155 return None, None, -1 | |
156 | |
157 return stdOut, stdErr, exitCode | |
158 | |
159 | |
160 ''' | |
161 replacement for run_command | |
162 - includes logging, convert_cmd & post_mortem | |
163 ''' | |
164 | |
165 def run_cmd(cmd, log=None): | |
166 | |
167 std_out = None | |
168 std_err = None | |
169 exit_code = 0 | |
170 | |
171 if cmd: | |
172 # convert to work on genepool/denovo | |
173 cmd = convert_cmd(cmd) | |
174 | |
175 if log: | |
176 log.info("- cmd: %s", cmd) | |
177 | |
178 p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) | |
179 std_out, std_err = p.communicate() | |
180 exit_code = p.returncode | |
181 | |
182 post_mortem_cmd(cmd, exit_code, std_out, std_err, log) | |
183 | |
184 return std_out, std_err, exit_code | |
185 | |
186 | |
187 ''' | |
188 Simple function to output to the log what happened only if exit code > 0 | |
189 | |
190 Typical usage: | |
191 std_out, std_err, exit_code = run_command(cmd, True) | |
192 post_mortem_cmd(cmd, exit_code, std_out, std_err) | |
193 | |
194 ''' | |
195 def post_mortem_cmd(cmd, exit_code, std_out, std_err, log = None): | |
196 if exit_code > 0: | |
197 if log: | |
198 log.error("- cmd failed: %s", cmd) | |
199 log.error("- exit code: %s", exit_code) | |
200 | |
201 else: | |
202 print "- cmd failed: %s" % (cmd) | |
203 print "- exit code: %s" % (exit_code) | |
204 | |
205 | |
206 if std_out: | |
207 if log: | |
208 log.error("- std_out: %s", std_out) | |
209 else: | |
210 print "- std_out: %s" % (std_out) | |
211 | |
212 if std_err: | |
213 if log: | |
214 log.error("- std_err: %s", std_err) | |
215 else: | |
216 print "- std_err: %s" % (std_err) | |
217 | |
218 | |
219 ''' | |
220 Convert command to use genepool or denovo (shifter) to run | |
221 replace #placeholder; with shifter or module load command | |
222 #placeholder.v; should specify the version to use | |
223 | |
224 This should be the only place in the pipelines that specifies the images/modules translation | |
225 ''' | |
226 def convert_cmd(cmd): | |
227 new_cmd = cmd | |
228 | |
229 shifter_img = { | |
230 "#bbtools" : "shifter --image=bryce911/bbtools ", | |
231 "#pigz" : "module load pigz;", | |
232 "#jamo" : "shifter --image=registry.services.nersc.gov/htandra/jamo_dev:1.0 ", # works, but would like simple module to use - have one on Denovo but not Cori | |
233 "#gnuplot" : "shifter --image=bryce911/bbtools ", # (1) | |
234 "#spades/3.9.0" : "shifter --image=bryce911/spades3.9.0 ", | |
235 "#spades/3.10.1" : "shifter --image=bryce911/spades3.10.1 ", | |
236 "#spades/3.11.0" : "shifter --image=bryce911/spades-3.11.0 ", # GAA-3383 | |
237 "#spades/3.11.1-check" : "shifter --image=bryce911/spades3.11.1-check ", # development | |
238 "#prodigal/2.6.3" : "shifter --image=registry.services.nersc.gov/jgi/prodigal ", # RQCSUPPORT-1318 | |
239 "#prodigal/2.5.0" : "shifter --image=registry.services.nersc.gov/jgi/prodigal ", | |
240 "#prodigal/2.50" : "shifter --image=registry.services.nersc.gov/jgi/prodigal ", | |
241 "#lastal/869" : "shifter --image=bryce911/lastal:869 ", | |
242 "#lastal/828" : "shifter --image=bryce911/lastal:869 ", | |
243 #"#lastal" : "shifter --image=bryce911/lastal:869 ", | |
244 "#R/3.3.2" : "module load R/3.3.2;", | |
245 "#texlive" : "shifter --image=bryce911/bbtools ", # (1) | |
246 "#java" : "shifter --image=bryce911/bbtools ", # (1) | |
247 "#blast+/2.6.0" : "shifter --image=sulsj/ncbi-blastplus:2.6.0 ", | |
248 "#blast" : "shifter --image=sulsj/ncbi-blastplus:2.7.0 ", | |
249 "#megahit-1.1.1" : "shifter --image=foster505/megahit:v1.1.1-2-g02102e1 ", | |
250 "#smrtanalysis/2.3.0_p5" : "shifter --image=registry.services.nersc.gov/jgi/smrtanalysis:2.3.0_p5 ", # meth - need more memory | |
251 "#mummer/3.23" : "shifter --image=bryce911/mummer3.23 ", # 3.23 | |
252 "#hmmer" : "shifter --image=registry.services.nersc.gov/jgi/hmmer:latest ", # 3.1b2 | |
253 "#samtools/1.4" : "shifter --image=rmonti/samtools ", | |
254 "#mothur/1.39.5" : "shifter --image=bryce911/mothur1.39.5 ", | |
255 "#vsearch/2.4.3" : "shifter --image=bryce911/vsearch2.4.3 ", | |
256 "#graphviz" : "shifter --image=bryce911/bbtools ", | |
257 "#ssu-align/0.1.1" : "shifter --image=bryce911/ssu-align0.1.1 ", # openmpi/1.10 included in docker container | |
258 "#smrtlink/4.0.0.190159" : "shifter --image=registry.services.nersc.gov/jgi/smrtlink:4.0.0.190159 /smrtlink/smrtcmds/bin/", # progs not in path | |
259 "#smrtlink/5.0.1.9585" : "shifter --image=registry.services.nersc.gov/jgi/smrtlink:5.0.1.9585 /smrtlink/smrtcmds/bin/", # progs not in path, Tony created 2017-10-16 | |
260 "#smrtlink" : "shifter --image=registry.services.nersc.gov/jgi/smrtlink:5.0.1.9585 /smrtlink/smrtcmds/bin/", # progs not in path | |
261 "#prodege" : "shifter --image=bryce911/prodege ", # 2.2.1 | |
262 #"#hmmer" : "shifter --image=registry.services.nersc.gov/jgi/hmmer ", # 3.1b2 - Feb 2015, latest as of Oct 2017 | |
263 "#checkm" : "shifter --image=registry.services.nersc.gov/jgi/checkm ", | |
264 } | |
265 | |
266 | |
267 # (1) - added as part of the bryce911 bbtools package | |
268 | |
269 #cmd = "#bbtools-shijie;bbmap...." | |
270 # this dict will be deprecated as of March 2018 when genepool passes into legend | |
271 genepool_mod = { | |
272 "#bbtools" : "module load bbtools", | |
273 "#pigz" : "module load pigz", | |
274 "#jamo" : "module load jamo", | |
275 "#gnuplot" : "module load gnuplot/4.6.2", # sag,iso,sps,ce:gc_cov, gc_histogram, contig_gc | |
276 "#spades/3.9.0" : "module load spades/3.9.0", | |
277 "#spades/3.10.1" : "module load spades/3.10.1", | |
278 "#spades/3.11.1" : "module load spades/3.11.1-check", | |
279 "#prodigal/2.6.3" : "module load prodigal/2.50", # aka 2.50, also 2.60 is available | |
280 "#prodigal/2.5.0" : "module load prodigal/2.50", | |
281 "#prodigal/2.50" : "module load prodigal/2.50", | |
282 #"#lastal" : "module load last/828", | |
283 "#lastal/828" : "module load last/828", | |
284 "#R/3.3.2" : "module unload R;module load R/3.3.1", # 3.3.2 not on genepool - RQCSUPPORT-1516 unload R for Kecia | |
285 "#texlive" : "module load texlive", | |
286 "#blast+/2.6.0" : "module load blast+/2.6.0", | |
287 #"#blast+/2.7.0" : "module load blast+/2.7.0", # not created | |
288 "#blast" : "module load blast+/2.6.0", | |
289 "#java" : "", # java runs natively on genepool | |
290 "#megahit-1.1.1" : "module load megahit/1.1.1", | |
291 "#smrtanalysis/2.3.0_p5" : "module load smrtanalysis/2.3.0_p5", | |
292 "#smrtanalysis/2.3.0_p5_xmx32g" : "module load smrtanalysis/2.3.0_p5;export _JAVA_OPTIONS='-Xmx32g'", | |
293 "#mummer/3.23" : "module load mummer/3.23", | |
294 "#hmmer" : "module load hmmer/3.1b2", | |
295 "#samtools/1.4" : "module load samtools/1.4", | |
296 "#mothur/1.39.5" : "module load mothur/1.32.1", # 1.26.0 default, 1.32.1 | |
297 "#vsearch/2.4.3" : "module load vsearch/2.3.0", # 2.3.0 | |
298 "#graphviz" : "module load graphviz", | |
299 "#ssu-align/0.1.1" : "module load ssu-align", | |
300 "#smrtlink/4.0.0.190159" : "module load smrtlink/4.0.0.190159", | |
301 "#smrtlink" : "module load smrtlink/5.0.1.9585", | |
302 "#smrtlink/5.0.1.9585" : "module load smrtlink/5.0.1.9585", | |
303 "#prodege" : "module load R;/projectb/sandbox/rqc/prod/pipelines/external_tools/sag_decontam/prodege-2.2/bin/", | |
304 "#checkm" : "module load hmmer prodigal pplacer", # checkm installed in python by default on genepool | |
305 } | |
306 | |
307 #bbtools;stats.sh | |
308 | |
309 if cmd.startswith("#"): | |
310 | |
311 cluster = "genepool" | |
312 # any other env ids to use? | |
313 | |
314 # cori, denovo, genepool | |
315 cluster = os.environ.get('NERSC_HOST', 'unknown') | |
316 | |
317 | |
318 f = cmd.find(";") | |
319 mod = "" # command to replace | |
320 if f > -1: | |
321 mod = cmd[0:f] | |
322 | |
323 if mod: | |
324 | |
325 | |
326 # use module load jamo on denovo | |
327 if mod == "#jamo" and cluster == "denovo": | |
328 shifter_img[mod] = "module load jamo;" | |
329 | |
330 if cluster in ("denovo", "cori"): | |
331 | |
332 if mod in shifter_img: | |
333 new_cmd = new_cmd.replace(mod + ";", shifter_img[mod]) | |
334 | |
335 else: | |
336 if mod in genepool_mod: | |
337 if genepool_mod[mod] == "": | |
338 new_cmd = new_cmd.replace(mod + ";", "") | |
339 else: | |
340 new_cmd = new_cmd.replace(mod, genepool_mod[mod]) | |
341 | |
342 if new_cmd.startswith("#"): | |
343 print "Command not found! %s" % new_cmd | |
344 sys.exit(18) | |
345 | |
346 #print new_cmd | |
347 return new_cmd | |
348 | |
349 | |
350 ''' | |
351 returns human readable file size | |
352 @param num = file size (e.g. 1000) | |
353 | |
354 @return: readable float e.g. 1.5 KB | |
355 ''' | |
356 def human_size(num): | |
357 if not num: | |
358 num = 0.0 | |
359 | |
360 for x in ['bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'XB']: | |
361 if num < 1024.0: | |
362 return "%3.1f %s" % (num, x) | |
363 num /= 1024.0 | |
364 | |
365 return "%3.1f %s" % (num, 'ZB') | |
366 | |
367 | |
368 | |
369 ''' | |
370 send out email | |
371 @param emailTo: email receipient (e.g. bryce@lbl.gov) | |
372 @param emailSubject: subject line for the email | |
373 @param emailBody: content of the email | |
374 @param emailFrom: optional email from | |
375 | |
376 ''' | |
377 def send_email(email_to, email_subject, email_body, email_from = 'rqc@jgi-psf.org', log = None): | |
378 msg = "" | |
379 err_flag = 0 | |
380 | |
381 if not email_to: | |
382 msg = "- send_email: email_to parameter missing!" | |
383 | |
384 if not email_subject: | |
385 msg = "- send_email: email_subject parameter missing!" | |
386 | |
387 if not email_body: | |
388 msg = "- send_email: email_body parameter missing!" | |
389 | |
390 | |
391 if err_flag == 0: | |
392 msg = "- sending email to: %s" % (email_to) | |
393 | |
394 if log: | |
395 log.info(msg) | |
396 else: | |
397 print msg | |
398 | |
399 if err_flag == 1: | |
400 return 0 | |
401 | |
402 # assume html | |
403 email_msg = MIMEText(email_body, "html") # vs "plain" | |
404 email_msg['Subject'] = email_subject | |
405 email_msg['From'] = email_from | |
406 email_msg['To'] = email_to | |
407 | |
408 p = Popen(["/usr/sbin/sendmail", "-t"], stdin = PIPE) | |
409 p.communicate(email_msg.as_string()) | |
410 | |
411 return err_flag | |
412 | |
413 ''' | |
414 Write to rqc_file (e.g. rqc-files.tmp) the file_key and file_value | |
415 | |
416 @param rqc_file_log: full path to file containing key=file | |
417 @param file_key: key for the entry | |
418 @param file_value: value for the entry | |
419 | |
420 ''' | |
421 def append_rqc_file(rqc_file_log, file_key, file_value, log=None): | |
422 if file_key: | |
423 | |
424 buffer = "%s = %s\n" % (file_key, file_value) | |
425 | |
426 with open(rqc_file_log, "a") as myfile: | |
427 myfile.write(buffer) | |
428 | |
429 if log: log.info("append_rqc_file: %s:%s" % (file_key, file_value)) | |
430 | |
431 else: | |
432 | |
433 if log: log.warning("key or value error: %s:%s" % (file_key, file_value)) | |
434 | |
435 ''' | |
436 Write to rqc_stats (e.g. rqc-stats.tmp) the stats_key and stats_value | |
437 @param rqc_file_log: full path to file containing key=file | |
438 @param file_key: key for the entry | |
439 @param file_value: value for the entry | |
440 | |
441 ''' | |
442 def append_rqc_stats(rqc_stats_log, stats_key, stats_value, log=None): | |
443 if stats_key: | |
444 | |
445 buffer = "%s = %s\n" % (stats_key, stats_value) | |
446 | |
447 with open(rqc_stats_log, "a") as myfile: | |
448 myfile.write(buffer) | |
449 | |
450 if log: log.info("append_rqc_stats: %s:%s" % (stats_key, stats_value)) | |
451 | |
452 else: | |
453 | |
454 if log: log.warning("key or value error: %s:%s" % (stats_key, stats_value)) | |
455 | |
456 ''' | |
457 Return the file system path to jgi-rqc-pipeline so we can use */tools and */lib | |
458 | |
459 @return /path/to/jgi-rqc-pipelines | |
460 ''' | |
461 def get_run_path(): | |
462 current_path = os.path.dirname(os.path.abspath(__file__)) | |
463 run_path = os.path.abspath(os.path.join(current_path, os.pardir)) | |
464 | |
465 return run_path | |
466 | |
467 | |
468 | |
469 ''' | |
470 Simple read count using bbtools n_contigs field | |
471 - slightly different than in rqc_utility | |
472 n_scaffolds n_contigs scaf_bp contig_bp gap_pct scaf_N50 scaf_L50 ctg_N50 ctg_L50 scaf_N90 scaf_L90 ctg_N90 ctg_L90 scaf_max ctg_max scaf_n_gt50K scaf_pct_gt50K gc_avg gc_std | |
473 1346616 1346616 405331416 405331415 0.000 1346616 301 1346615 301 1346616 301 1346615 301 301 301 0 0.000 0.44824 0.02675 | |
474 | |
475 ''' | |
476 def get_read_count_fastq(fastq, log = None): | |
477 read_cnt = 0 | |
478 | |
479 if os.path.isfile(fastq): | |
480 | |
481 # EnvironmentModules.module(["load", "bbtools"]) | |
482 # bbtools faster than zcat | wc because bbtools uses pigz | |
483 # cmd = "stats.sh format=3 in=%s" % fastq | |
484 cmd = "#bbtools;stats.sh format=3 in=%s" % fastq | |
485 cmd = convert_cmd(cmd) | |
486 | |
487 | |
488 if log: | |
489 log.info("- cmd: %s", cmd) | |
490 | |
491 std_out, std_err, exit_code = run_command(cmd, True) | |
492 | |
493 # EnvironmentModules.module(["unload", "bbtools"]) | |
494 | |
495 if exit_code == 0 and std_out: | |
496 | |
497 line_list = std_out.split("\n") | |
498 #print line_list | |
499 val_list = str(line_list[1]).split() #.split('\t') | |
500 #print "v = %s" % val_list | |
501 | |
502 read_cnt = int(val_list[1]) | |
503 | |
504 if log: | |
505 log.info("- read count: %s", read_cnt) | |
506 | |
507 else: | |
508 if log: | |
509 post_mortem_cmd(cmd, exit_code, std_out, std_err, log) | |
510 | |
511 | |
512 else: | |
513 log.error("- fastq: %s does not exist!", fastq) | |
514 | |
515 return read_cnt | |
516 | |
517 | |
518 | |
519 ''' | |
520 Subsampling calculation | |
521 0 .. 250k reads = 100% | |
522 250k .. 25m = 100% to 1% | |
523 25m .. 600m = 1% | |
524 600m+ .. oo < 1% | |
525 | |
526 July 2014 - 15 runs > 600m (HiSeq-2500 Rapid) - 4 actual libraries / 85325 seq units | |
527 - returns new subsampling rate | |
528 ''' | |
529 def get_subsample_rate(read_count): | |
530 subsample = 0 | |
531 subsample_rate = 0.01 | |
532 max_subsample = 6000000 # 4 hours of blast time | |
533 | |
534 new_subsample_rate = 250000.0/read_count | |
535 subsample_rate = max(new_subsample_rate, subsample_rate) | |
536 subsample_rate = min(1, subsample_rate) # if subsample_rate > 1, then set to 1 | |
537 | |
538 subsample = int(read_count * subsample_rate) | |
539 | |
540 if subsample > max_subsample: | |
541 subsample = max_subsample | |
542 | |
543 subsample_rate = subsample / float(read_count) | |
544 | |
545 return subsample_rate | |
546 | |
547 | |
548 ''' | |
549 Set color hash | |
550 - need to update to remove "c" parameter - used in too many places | |
551 ''' | |
552 def set_colors(c, use_color = False): | |
553 | |
554 if use_color == False: | |
555 | |
556 color = { | |
557 'black' : "", | |
558 'red' : "", | |
559 'green' : "", | |
560 'yellow' : "", | |
561 'blue' : "", | |
562 'pink' : "", | |
563 'cyan' : "", | |
564 'white' : "", | |
565 '' : "" | |
566 } | |
567 | |
568 else: | |
569 | |
570 color = { | |
571 'black' : "\033[1;30m", | |
572 'red' : "\033[1;31m", | |
573 'green' : "\033[1;32m", | |
574 'yellow' : "\033[1;33m", | |
575 'blue' : "\033[1;34m", | |
576 'pink' : "\033[1;35m", | |
577 'cyan' : "\033[1;36m", | |
578 'white' : "\033[1;37m", | |
579 '' : "\033[m" | |
580 } | |
581 | |
582 | |
583 return color | |
584 | |
585 ''' | |
586 New function that just returns colors | |
587 ''' | |
588 def get_colors(): | |
589 | |
590 color = { | |
591 'black' : "\033[1;30m", | |
592 'red' : "\033[1;31m", | |
593 'green' : "\033[1;32m", | |
594 'yellow' : "\033[1;33m", | |
595 'blue' : "\033[1;34m", | |
596 'pink' : "\033[1;35m", | |
597 'cyan' : "\033[1;36m", | |
598 'white' : "\033[1;37m", | |
599 '' : "\033[m" | |
600 } | |
601 | |
602 | |
603 return color | |
604 | |
605 | |
606 | |
607 ''' | |
608 Returns msg_ok, msg_fail, msg_warn colored or not colored | |
609 ''' | |
610 def get_msg_settings(color): | |
611 | |
612 msg_ok = "[ "+color['green']+"OK"+color['']+" ]" | |
613 msg_fail = "[ "+color['red']+"FAIL"+color['']+" ]" | |
614 msg_warn = "[ "+color['yellow']+"WARN"+color['']+" ]" | |
615 | |
616 return msg_ok, msg_fail, msg_warn | |
617 | |
618 | |
619 ''' | |
620 Use RQC's ap_tool to get the status | |
621 set mode = "-sa" to show all, even completed | |
622 ''' | |
623 def get_analysis_project_id(seq_proj_id, target_analysis_project_id, target_analysis_task_id, output_path, log = None, mode = ""): | |
624 | |
625 if log: | |
626 log.info("get_analysis_project_id: spid = %s, tapid = %s, tatid = %s", seq_proj_id, target_analysis_project_id, target_analysis_task_id) | |
627 | |
628 analysis_project_id = 0 | |
629 analysis_task_id = 0 | |
630 project_type = None | |
631 task_type = None | |
632 | |
633 ap_list = os.path.join(output_path, "ap-info.txt") | |
634 AP_TOOL = "/global/dna/projectdirs/PI/rqc/prod/jgi-rqc-pipeline/tools/ap_tool.py" | |
635 #AP_TOOL = "/global/homes/b/brycef/git/jgi-rqc-pipeline/tools/ap_tool.py" | |
636 cmd = "%s -spid %s -m psv -tapid %s -tatid %s %s > %s 2>&1" % (AP_TOOL, seq_proj_id, target_analysis_project_id, target_analysis_task_id, mode, ap_list) | |
637 if log: | |
638 log.info("- cmd: %s", cmd) | |
639 else: | |
640 print "- cmd: %s" % cmd | |
641 std_out, std_err, exit_code = run_command(cmd, True) | |
642 | |
643 post_mortem_cmd(cmd, exit_code, std_out, std_err, log) | |
644 | |
645 if os.path.isfile(ap_list): | |
646 | |
647 ap_dict = {} # header = value | |
648 cnt = 0 | |
649 fh = open(ap_list, "r") | |
650 for line in fh: | |
651 arr = line.strip().split("|") | |
652 if cnt == 0: | |
653 c2 = 0 # position of title in header | |
654 for a in arr: | |
655 ap_dict[a.lower()] = c2 | |
656 c2 += 1 | |
657 | |
658 else: | |
659 | |
660 for a in ap_dict: | |
661 | |
662 if ap_dict[a] + 1 > len(arr): | |
663 pass | |
664 else: | |
665 ap_dict[a] = arr[ap_dict[a]] | |
666 | |
667 | |
668 cnt += 1 | |
669 | |
670 fh.close() | |
671 | |
672 | |
673 analysis_project_id = ap_dict.get("analysis project id") | |
674 analysis_task_id = ap_dict.get("analysis task id") | |
675 project_type = ap_dict.get("analysis product name") | |
676 task_type = ap_dict.get("analysis task name") | |
677 | |
678 # nno such project | |
679 if cnt == 1: | |
680 analysis_project_id = 0 | |
681 analysis_task_id = 0 | |
682 | |
683 if log: | |
684 log.info("- project type: %s, task type: %s", project_type, task_type) | |
685 log.info("- analysis_project_id: %s, analysis_task_id: %s", analysis_project_id, analysis_task_id) | |
686 | |
687 try: | |
688 analysis_project_id = int(analysis_project_id) | |
689 analysis_task_id = int(analysis_task_id) | |
690 except: | |
691 analysis_project_id = 0 | |
692 analysis_task_id = 0 | |
693 | |
694 | |
695 # ap = 4, at = 8 means its using the column names but didn't find anything | |
696 if analysis_project_id < 100: | |
697 analysis_project_id = 0 | |
698 if analysis_task_id < 100: | |
699 analysis_task_id = 0 | |
700 | |
701 return analysis_project_id, analysis_task_id | |
702 | |
703 | |
704 ''' | |
705 For creating a dot file from the pipeline flow | |
706 ''' | |
707 def append_flow(flow_file, orig_node, orig_label, next_node, next_label, link_label): | |
708 | |
709 fh = open(flow_file, "a") | |
710 fh.write("%s|%s|%s|%s|%s\n" % (orig_node, orig_label, next_node, next_label, link_label)) | |
711 fh.close() | |
712 | |
713 ''' | |
714 Flow file format: | |
715 # comment | |
716 *label|PBMID Pipeline run for BTXXY<br><font point-size="10">Run Date: 2017-09-28 14:22:50</font> | |
717 # origin node, origin label, next node, next label, link label | |
718 input_h5|BTXXY H5<br><font point-size="10">3 smrtcells</font>|assembly|HGAP Assembly<FONT POINT-SIZE="10"><br>3 contigs, 13,283,382bp</FONT>|HGAP v4.0.1 | |
719 | |
720 nodes should be the output of the transformation between the nodes | |
721 e.g. input fastq (25m reads) --[ bbtools subsampling ]--> subsampled fastq (10m reads) | |
722 | |
723 creates a dot file, to convert to png use: | |
724 $ module load graphviz | |
725 $ dot -T png (dot file) > (png file) | |
726 | |
727 More info on formatting the labels | |
728 http://www.graphviz.org/content/node-shapes#html | |
729 ''' | |
730 def dot_flow(flow_file, dot_file, log = None): | |
731 | |
732 if not os.path.isfile(flow_file): | |
733 if log: | |
734 log.info("- cannot find flow file: %s", flow_file) | |
735 else: | |
736 print "Cannot find flow file: %s" % flow_file | |
737 return | |
738 | |
739 | |
740 fhw = open(dot_file, "w") | |
741 | |
742 fhw.write("// dot file\n") | |
743 fhw.write("digraph rqc {\n") # directed graph | |
744 fhw.write(" node [shape=box];\n") | |
745 fhw.write(" rankdir=LR;\n") | |
746 | |
747 fh = open(flow_file, "r") | |
748 | |
749 for line in fh: | |
750 line = line.strip() | |
751 | |
752 if not line: | |
753 continue | |
754 | |
755 if line.startswith("#"): | |
756 continue | |
757 | |
758 # graph label | |
759 if line.startswith("*label"): | |
760 arr = line.split("|") | |
761 label = flow_replace(str(arr[1])) | |
762 | |
763 fhw.write(" label=<%s>;\n" % label) | |
764 fhw.write(" labelloc=top;\n") | |
765 | |
766 else: | |
767 | |
768 arr = line.split("|") | |
769 #print arr | |
770 | |
771 if len(arr) == 5: | |
772 | |
773 org_node = arr[0] | |
774 org_label = str(arr[1]) | |
775 next_node = arr[2] | |
776 next_label = str(arr[3]) | |
777 link_label = str(arr[4]) | |
778 | |
779 # must be <br/> in the dot file, I have a habit of using <br> | |
780 org_label = flow_replace(org_label) | |
781 next_label = flow_replace(next_label) | |
782 link_label = flow_replace(link_label) | |
783 | |
784 # label are enclosed by < > instead of " " to handle html-ish markups | |
785 | |
786 if next_node: | |
787 link = " %s -> %s;\n" % (org_node, next_node) | |
788 if link_label: | |
789 link = " %s -> %s [label=<%s>];\n" % (org_node, next_node, link_label) | |
790 fhw.write(link) | |
791 | |
792 if org_label: | |
793 label = " %s [label=<%s>];\n" % (org_node, org_label) | |
794 fhw.write(label) | |
795 | |
796 if next_label: | |
797 label = " %s [label=<%s>];\n" % (next_node, next_label) | |
798 fhw.write(label) | |
799 | |
800 | |
801 | |
802 fh.close() | |
803 | |
804 fhw.write("}\n") | |
805 | |
806 fhw.close() | |
807 if log: | |
808 log.info("- created dot file: %s", dot_file) | |
809 | |
810 return dot_file | |
811 | |
812 ''' | |
813 simple replacements | |
814 ''' | |
815 def flow_replace(my_str): | |
816 | |
817 new_str = my_str.replace("<br>", "<br/>").replace("<smf>", "<font point-size=\"10\">").replace("</f>", "</font>") | |
818 | |
819 return new_str | |
820 | |
821 | |
822 ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
823 ## main program | |
824 | |
825 | |
826 if __name__ == "__main__": | |
827 # unit tests | |
828 | |
829 print human_size(102192203) | |
830 print human_size(250000000000) | |
831 #print get_read_count_fastq("/global/projectb/scratch/brycef/sag/phix/11185.1.195330.UNKNOWN_matched.fastq.gz") | |
832 | |
833 cmd = "#bbtools;bbduk.sh in=/global/dna/dm_archive/sdm/illumina//01/14/88/11488.1.208132.UNKNOWN.fastq.gz ref=/global/dna/shared/rqc/ref_databases/qaqc/databases/phix174_ill.ref.fa outm=/global/projectb/scratch/brycef/phix/11488/11488.1.208132.UNKNOWN_matched.fastq.gz outu=/global/projectb/scratch/brycef/phix/11488/11488.1.208132.UNKNOWN_unmatched.fastq.gz" | |
834 print convert_cmd(cmd) | |
835 cmd = "#pigz;pigz /global/projectb/scratch/brycef/align/BTOYH/genome/11463.6.208000.CAAGGTC-AGACCTT.filter-RNA.fastq.gz-genome.sam" | |
836 print convert_cmd(cmd) | |
837 | |
838 cmd = "#java;java -version" | |
839 print convert_cmd(cmd) | |
840 | |
841 dot_flow("/global/projectb/scratch/brycef/pbmid/BWOAU/f2.flow", "/global/projectb/scratch/brycef/pbmid/BWOAU/BWOUAx.dot") | |
842 | |
843 sys.exit(0) | |
844 | |
845 |