Mercurial > repos > rliterman > csp2
comparison CSP2/bin/fetchReads.py @ 0:01431fa12065
"planemo upload"
author | rliterman |
---|---|
date | Mon, 02 Dec 2024 10:40:55 -0500 |
parents | |
children | 792274118b2e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:01431fa12065 |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 import os | |
4 import sys | |
5 from glob import glob | |
6 import argparse | |
7 | |
8 # Parse args | |
9 parser = argparse.ArgumentParser(description='Fetch Reads') | |
10 parser.add_argument('--read_dir', type=str, help='path to directory containing read files') | |
11 parser.add_argument('--read_filetype', type=str, help='read filetype information') | |
12 parser.add_argument('--forward_suffix', type=str, help='forward suffix') | |
13 parser.add_argument('--reverse_suffix', type=str, help='reverse suffix') | |
14 parser.add_argument('--trim_name', type=str, help='trim name') | |
15 args = parser.parse_args() | |
16 | |
17 # Get path to directory containing read files | |
18 read_dir = os.path.abspath(args.read_dir) | |
19 if not os.path.isdir(read_dir): | |
20 sys.exit("--read_dir is not a valid path: " + str(read_dir)) | |
21 | |
22 # Get read filetype information | |
23 read_filetype = args.read_filetype | |
24 if not read_filetype.startswith("."): | |
25 read_filetype = "." + read_filetype | |
26 forward_suffix = args.forward_suffix | |
27 reverse_suffix = args.reverse_suffix | |
28 trim_name = args.trim_name | |
29 | |
30 # Check if sequence files exist in directory, ignoring undetermined reads | |
31 read_files = sorted(glob(read_dir+"/*"+read_filetype)) | |
32 read_files = [r for r in read_files if not os.path.basename(r).startswith("Undetermined_")] | |
33 | |
34 if len(read_files) == 0: | |
35 sys.exit("No "+read_filetype +" files detected in "+read_dir) | |
36 | |
37 # Get data for paired-end and single-end reads | |
38 left_files = [s for s in read_files if s.endswith(forward_suffix)] | |
39 right_files = [s for s in read_files if s.endswith(reverse_suffix)] | |
40 | |
41 # Identify pairs based on file name | |
42 left_pairs = list() | |
43 right_pairs = list() | |
44 paired_files = list(set([x.replace(forward_suffix, '') for x in left_files]).intersection([y.replace(reverse_suffix, '') for y in right_files])) | |
45 | |
46 for pair in paired_files: | |
47 left_pairs.append(pair+forward_suffix) | |
48 right_pairs.append(pair+reverse_suffix) | |
49 single_end = [x for x in read_files if x not in left_pairs + right_pairs] | |
50 | |
51 for left in left_pairs: | |
52 base = str(os.path.basename(left).replace(forward_suffix,"").replace(trim_name,"")) | |
53 print(",".join([base,"Paired",";".join([left,left.replace(forward_suffix,reverse_suffix)])])) | |
54 | |
55 for single in single_end: | |
56 if single.endswith(forward_suffix): | |
57 base = str(os.path.basename(single).replace(forward_suffix,"").replace(trim_name,"")) | |
58 else: | |
59 base = str(os.path.basename(single).replace(read_filetype,"").replace(trim_name,"")) | |
60 print(",".join([base,"Single",single])) |