comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/opt/bbmap-39.01-1/pileup.sh @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 #!/bin/bash
2
3 usage(){
4 echo "
5 Written by Brian Bushnell
6 Last modified April 30, 2020
7
8 Description: Calculates per-scaffold or per-base coverage information from an unsorted sam or bam file.
9 Supports SAM/BAM format for reads and FASTA for reference.
10 Sorting is not needed, so output may be streamed directly from a mapping program.
11 Requires a minimum of 1 bit per reference base plus 100 bytes per scaffold (even if no reference is specified).
12 If per-base coverage is needed (including for stdev and median), at least 4 bytes per base is needed.
13
14 Usage: pileup.sh in=<input> out=<output>
15
16 Input Parameters:
17 in=<file> The input sam file; this is the only required parameter.
18 ref=<file> Scans a reference fasta for per-scaffold GC counts, not otherwise needed.
19 fastaorf=<file> An optional fasta file with ORF header information in PRODIGAL's output format. Must also specify 'outorf'.
20 unpigz=t Decompress with pigz for faster decompression.
21 addfromref=t Allow ref scaffolds not present in sam header to be added from the reference.
22 addfromreads=f Allow ref scaffolds not present in sam header to be added from the reads.
23 Note that in this case the ref scaffold lengths will be inaccurate.
24
25 Output Parameters:
26 out=<file> (covstats) Per-scaffold coverage info.
27 rpkm=<file> Per-scaffold RPKM/FPKM counts.
28 twocolumn=f Change to true to print only ID and Avg_fold instead of all 6 columns.
29 countgc=t Enable/disable counting of read GC content.
30 outorf=<file> Per-orf coverage info to this file (only if 'fastaorf' is specified).
31 outsam=<file> Print the input sam stream to this file (or stdout). Useful for piping data.
32 hist=<file> Histogram of # occurrences of each depth level.
33 basecov=<file> Coverage per base location.
34 bincov=<file> Binned coverage per location (one line per X bases).
35 binsize=1000 Binsize for binned coverage output.
36 keepshortbins=t (ksb) Keep residual bins shorter than binsize.
37 normcov=<file> Normalized coverage by normalized location (X lines per scaffold).
38 normcovo=<file> Overall normalized coverage by normalized location (X lines for the entire assembly).
39 normb=-1 If positive, use a fixed number of bins per scaffold; affects 'normcov' and 'normcovo'.
40 normc=f Normalize coverage to fraction of max per scaffold; affects 'normcov' and 'normcovo'.
41 delta=f Only print base coverage lines when the coverage differs from the previous base.
42 nzo=f Only print scaffolds with nonzero coverage.
43 concise=f Write 'basecov' in a more concise format.
44 header=t (hdr) Include headers in output files.
45 headerpound=t (#) Prepend header lines with '#' symbol.
46 stdev=t Calculate coverage standard deviation.
47 covminscaf=0 (minscaf) Don't print coverage for scaffolds shorter than this.
48 covwindow=0 Calculate how many bases are in windows of this size with
49 low average coverage. Produces an extra stats column.
50 covwindowavg=5 Average coverage below this will be classified as low.
51 k=0 If positive, calculate kmer coverage statstics for this kmer length.
52 keyvalue=f Output statistics to screen as key=value pairs.
53 mincov=1 When calculating percent covered, ignore bases under this depth.
54
55 Processing Parameters:
56 strandedcov=f Track coverage for plus and minus strand independently.
57 startcov=f Only track start positions of reads.
58 stopcov=f Only track stop positions of reads.
59 secondary=t Use secondary alignments, if present.
60 softclip=f Include soft-clipped bases in coverage.
61 minmapq=0 (minq) Ignore alignments with mapq below this.
62 physical=f (physcov) Calculate physical coverage for paired reads. This includes the unsequenced bases.
63 tlen=t Track physical coverage from the tlen field rather than recalculating it.
64 arrays=auto Set to t/f to manually force the use of coverage arrays. Arrays and bitsets are mutually exclusive.
65 bitsets=auto Set to t/f to manually force the use of coverage bitsets.
66 32bit=f Set to true if you need per-base coverage over 64k; does not affect per-scaffold coverage precision.
67 This option will double RAM usage (when calculating per-base coverage).
68 delcoverage=t (delcov) Count bases covered by deletions or introns as covered.
69 True is faster than false.
70 dupecoverage=t (dupes) Include reads flagged as duplicates in coverage.
71 samstreamer=t (ss) Load reads multithreaded to increase speed.
72
73 Trimming Parameters:
74 ** NOTE: These are applied before adding coverage, to allow mimicking **
75 ** tools like CallVariants, which uses 'qtrim=r trimq=10 border=5' **
76 qtrim=f Quality-trim. May be set to:
77 f (false): Don't quality-trim.
78 r (right): Trim right (3') end only.
79 l (left): Trim right (5') end only.
80 rl (both): Trim both ends.
81 trimq=-1 If positive, quality-trim to this threshold.
82 border=0 Ignore this many bases on the left and right end.
83
84 Output format (tab-delimited):
85 ID, Avg_fold, Length, Ref_GC, Covered_percent, Covered_bases, Plus_reads, Minus_reads, Read_GC, Median_fold, Std_Dev
86
87 ID: Scaffold ID
88 Length: Scaffold length
89 Ref_GC: GC ratio of reference
90 Avg_fold: Average fold coverage of this scaffold
91 Covered_percent: Percent of scaffold with any coverage (only if arrays or bitsets are used)
92 Covered_bases: Number of bases with any coverage (only if arrays or bitsets are used)
93 Plus_reads: Number of reads mapped to plus strand
94 Minus_reads: Number of reads mapped to minus strand
95 Read_GC: Average GC ratio of reads mapped to this scaffold
96 Median_fold: Median fold coverage of this scaffold (only if arrays are used)
97 Std_Dev: Standard deviation of coverage (only if arrays are used)
98
99 Java Parameters:
100
101 -Xmx This will set Java's memory usage, overriding
102 autodetection. -Xmx20g will
103 specify 20 gigs of RAM, and -Xmx200m will specify 200 megs.
104 The max is typically 85% of physical memory.
105 -eoom This flag will cause the process to exit if an out-of-memory
106 exception occurs. Requires Java 8u92+.
107 -da Disable assertions.
108
109 Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems.
110 "
111 }
112
113 #This block allows symlinked shellscripts to correctly set classpath.
114 pushd . > /dev/null
115 DIR="${BASH_SOURCE[0]}"
116 while [ -h "$DIR" ]; do
117 cd "$(dirname "$DIR")"
118 DIR="$(readlink "$(basename "$DIR")")"
119 done
120 cd "$(dirname "$DIR")"
121 DIR="$(pwd)/"
122 popd > /dev/null
123
124 #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/"
125 CP="$DIR""current/"
126
127 z="-Xmx1g"
128 z2="-Xms1g"
129 set=0
130
131 if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then
132 usage
133 exit
134 fi
135
136 calcXmx () {
137 source "$DIR""/calcmem.sh"
138 setEnvironment
139 parseXmx "$@"
140 if [[ $set == 1 ]]; then
141 return
142 fi
143 freeRam 3200m 84
144 z="-Xmx${RAM}m"
145 z2="-Xms${RAM}m"
146 }
147 calcXmx "$@"
148
149 pileup() {
150 local CMD="java $EA $EOOM $z -cp $CP jgi.CoveragePileup $@"
151 echo $CMD >&2
152 eval $CMD
153 }
154
155 pileup "$@"