Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/pileup.sh @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 17:55:14 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 69:33d812a61356 |
---|---|
1 #!/bin/bash | |
2 | |
3 usage(){ | |
4 echo " | |
5 Written by Brian Bushnell | |
6 Last modified April 30, 2020 | |
7 | |
8 Description: Calculates per-scaffold or per-base coverage information from an unsorted sam or bam file. | |
9 Supports SAM/BAM format for reads and FASTA for reference. | |
10 Sorting is not needed, so output may be streamed directly from a mapping program. | |
11 Requires a minimum of 1 bit per reference base plus 100 bytes per scaffold (even if no reference is specified). | |
12 If per-base coverage is needed (including for stdev and median), at least 4 bytes per base is needed. | |
13 | |
14 Usage: pileup.sh in=<input> out=<output> | |
15 | |
16 Input Parameters: | |
17 in=<file> The input sam file; this is the only required parameter. | |
18 ref=<file> Scans a reference fasta for per-scaffold GC counts, not otherwise needed. | |
19 fastaorf=<file> An optional fasta file with ORF header information in PRODIGAL's output format. Must also specify 'outorf'. | |
20 unpigz=t Decompress with pigz for faster decompression. | |
21 addfromref=t Allow ref scaffolds not present in sam header to be added from the reference. | |
22 addfromreads=f Allow ref scaffolds not present in sam header to be added from the reads. | |
23 Note that in this case the ref scaffold lengths will be inaccurate. | |
24 | |
25 Output Parameters: | |
26 out=<file> (covstats) Per-scaffold coverage info. | |
27 rpkm=<file> Per-scaffold RPKM/FPKM counts. | |
28 twocolumn=f Change to true to print only ID and Avg_fold instead of all 6 columns. | |
29 countgc=t Enable/disable counting of read GC content. | |
30 outorf=<file> Per-orf coverage info to this file (only if 'fastaorf' is specified). | |
31 outsam=<file> Print the input sam stream to this file (or stdout). Useful for piping data. | |
32 hist=<file> Histogram of # occurrences of each depth level. | |
33 basecov=<file> Coverage per base location. | |
34 bincov=<file> Binned coverage per location (one line per X bases). | |
35 binsize=1000 Binsize for binned coverage output. | |
36 keepshortbins=t (ksb) Keep residual bins shorter than binsize. | |
37 normcov=<file> Normalized coverage by normalized location (X lines per scaffold). | |
38 normcovo=<file> Overall normalized coverage by normalized location (X lines for the entire assembly). | |
39 normb=-1 If positive, use a fixed number of bins per scaffold; affects 'normcov' and 'normcovo'. | |
40 normc=f Normalize coverage to fraction of max per scaffold; affects 'normcov' and 'normcovo'. | |
41 delta=f Only print base coverage lines when the coverage differs from the previous base. | |
42 nzo=f Only print scaffolds with nonzero coverage. | |
43 concise=f Write 'basecov' in a more concise format. | |
44 header=t (hdr) Include headers in output files. | |
45 headerpound=t (#) Prepend header lines with '#' symbol. | |
46 stdev=t Calculate coverage standard deviation. | |
47 covminscaf=0 (minscaf) Don't print coverage for scaffolds shorter than this. | |
48 covwindow=0 Calculate how many bases are in windows of this size with | |
49 low average coverage. Produces an extra stats column. | |
50 covwindowavg=5 Average coverage below this will be classified as low. | |
51 k=0 If positive, calculate kmer coverage statstics for this kmer length. | |
52 keyvalue=f Output statistics to screen as key=value pairs. | |
53 mincov=1 When calculating percent covered, ignore bases under this depth. | |
54 | |
55 Processing Parameters: | |
56 strandedcov=f Track coverage for plus and minus strand independently. | |
57 startcov=f Only track start positions of reads. | |
58 stopcov=f Only track stop positions of reads. | |
59 secondary=t Use secondary alignments, if present. | |
60 softclip=f Include soft-clipped bases in coverage. | |
61 minmapq=0 (minq) Ignore alignments with mapq below this. | |
62 physical=f (physcov) Calculate physical coverage for paired reads. This includes the unsequenced bases. | |
63 tlen=t Track physical coverage from the tlen field rather than recalculating it. | |
64 arrays=auto Set to t/f to manually force the use of coverage arrays. Arrays and bitsets are mutually exclusive. | |
65 bitsets=auto Set to t/f to manually force the use of coverage bitsets. | |
66 32bit=f Set to true if you need per-base coverage over 64k; does not affect per-scaffold coverage precision. | |
67 This option will double RAM usage (when calculating per-base coverage). | |
68 delcoverage=t (delcov) Count bases covered by deletions or introns as covered. | |
69 True is faster than false. | |
70 dupecoverage=t (dupes) Include reads flagged as duplicates in coverage. | |
71 samstreamer=t (ss) Load reads multithreaded to increase speed. | |
72 | |
73 Trimming Parameters: | |
74 ** NOTE: These are applied before adding coverage, to allow mimicking ** | |
75 ** tools like CallVariants, which uses 'qtrim=r trimq=10 border=5' ** | |
76 qtrim=f Quality-trim. May be set to: | |
77 f (false): Don't quality-trim. | |
78 r (right): Trim right (3') end only. | |
79 l (left): Trim right (5') end only. | |
80 rl (both): Trim both ends. | |
81 trimq=-1 If positive, quality-trim to this threshold. | |
82 border=0 Ignore this many bases on the left and right end. | |
83 | |
84 Output format (tab-delimited): | |
85 ID, Avg_fold, Length, Ref_GC, Covered_percent, Covered_bases, Plus_reads, Minus_reads, Read_GC, Median_fold, Std_Dev | |
86 | |
87 ID: Scaffold ID | |
88 Length: Scaffold length | |
89 Ref_GC: GC ratio of reference | |
90 Avg_fold: Average fold coverage of this scaffold | |
91 Covered_percent: Percent of scaffold with any coverage (only if arrays or bitsets are used) | |
92 Covered_bases: Number of bases with any coverage (only if arrays or bitsets are used) | |
93 Plus_reads: Number of reads mapped to plus strand | |
94 Minus_reads: Number of reads mapped to minus strand | |
95 Read_GC: Average GC ratio of reads mapped to this scaffold | |
96 Median_fold: Median fold coverage of this scaffold (only if arrays are used) | |
97 Std_Dev: Standard deviation of coverage (only if arrays are used) | |
98 | |
99 Java Parameters: | |
100 | |
101 -Xmx This will set Java's memory usage, overriding | |
102 autodetection. -Xmx20g will | |
103 specify 20 gigs of RAM, and -Xmx200m will specify 200 megs. | |
104 The max is typically 85% of physical memory. | |
105 -eoom This flag will cause the process to exit if an out-of-memory | |
106 exception occurs. Requires Java 8u92+. | |
107 -da Disable assertions. | |
108 | |
109 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. | |
110 " | |
111 } | |
112 | |
113 #This block allows symlinked shellscripts to correctly set classpath. | |
114 pushd . > /dev/null | |
115 DIR="${BASH_SOURCE[0]}" | |
116 while [ -h "$DIR" ]; do | |
117 cd "$(dirname "$DIR")" | |
118 DIR="$(readlink "$(basename "$DIR")")" | |
119 done | |
120 cd "$(dirname "$DIR")" | |
121 DIR="$(pwd)/" | |
122 popd > /dev/null | |
123 | |
124 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" | |
125 CP="$DIR""current/" | |
126 | |
127 z="-Xmx1g" | |
128 z2="-Xms1g" | |
129 set=0 | |
130 | |
131 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then | |
132 usage | |
133 exit | |
134 fi | |
135 | |
136 calcXmx () { | |
137 source "$DIR""/calcmem.sh" | |
138 setEnvironment | |
139 parseXmx "$@" | |
140 if [[ $set == 1 ]]; then | |
141 return | |
142 fi | |
143 freeRam 3200m 84 | |
144 z="-Xmx${RAM}m" | |
145 z2="-Xms${RAM}m" | |
146 } | |
147 calcXmx "$@" | |
148 | |
149 pileup() { | |
150 local CMD="java $EA $EOOM $z -cp $CP jgi.CoveragePileup $@" | |
151 echo $CMD >&2 | |
152 eval $CMD | |
153 } | |
154 | |
155 pileup "$@" |