kkonganti@11: // Include any necessary methods and modules kkonganti@11: include { stopNow; validateParamsForFASTQ } from "${params.routines}" kkonganti@11: include { GEN_SAMPLESHEET } from "${params.modules}${params.fs}gen_samplesheet${params.fs}main" kkonganti@11: include { SAMPLESHEET_CHECK } from "${params.modules}${params.fs}samplesheet_check${params.fs}main" kkonganti@11: include { CAT_FASTQ } from "${params.modules}${params.fs}cat${params.fs}fastq${params.fs}main" kkonganti@11: include { SEQKIT_SEQ } from "${params.modules}${params.fs}seqkit${params.fs}seq${params.fs}main" kkonganti@11: kkonganti@11: // Validate 4 required workflow parameters if kkonganti@11: // FASTQ files are the input for the kkonganti@11: // entry point. kkonganti@11: validateParamsForFASTQ() kkonganti@11: kkonganti@11: // Start the subworkflow kkonganti@11: workflow PROCESS_FASTQ { kkonganti@11: main: kkonganti@11: versions = Channel.empty() kkonganti@11: input_ch = Channel.empty() kkonganti@11: reads = Channel.empty() kkonganti@11: kkonganti@11: def input = file( (params.input ?: params.metadata) ) kkonganti@11: kkonganti@11: if (params.input) { kkonganti@11: def fastq_files = [] kkonganti@11: kkonganti@11: if (params.fq_suffix == null) { kkonganti@11: stopNow("We need to know what suffix the FASTQ files ends with inside the\n" + kkonganti@11: "directory. Please use the --fq_suffix option to indicate the file\n" + kkonganti@11: "suffix by which the files are to be collected to run the pipeline on.") kkonganti@11: } kkonganti@11: kkonganti@11: if (params.fq_strandedness == null) { kkonganti@11: stopNow("We need to know if the FASTQ files inside the directory\n" + kkonganti@11: "are sequenced using stranded or non-stranded sequencing. This is generally\n" + kkonganti@11: "required if the sequencing experiment is RNA-SEQ. For almost all of the other\n" + kkonganti@11: "cases, you can probably use the --fq_strandedness unstranded option to indicate\n" + kkonganti@11: "that the reads are unstranded.") kkonganti@11: } kkonganti@11: kkonganti@11: if (params.fq_filename_delim == null || params.fq_filename_delim_idx == null) { kkonganti@11: stopNow("We need to know the delimiter of the filename of the FASTQ files.\n" + kkonganti@11: "By default the filename delimiter is _ (underscore). This delimiter character\n" + kkonganti@11: "is used to split and assign a group name. The group name can be controlled by\n" + kkonganti@11: "using the --fq_filename_delim_idx option (1-based). For example, if the FASTQ\n" + kkonganti@11: "filename is WT_REP1_001.fastq, then to create a group WT, use the following\n" + kkonganti@11: "options: --fq_filename_delim _ --fq_filename_delim_idx 1") kkonganti@11: } kkonganti@11: kkonganti@11: if (!input.exists()) { kkonganti@11: stopNow("The input directory,\n${params.input}\ndoes not exist!") kkonganti@11: } kkonganti@11: kkonganti@11: input.eachFileRecurse { kkonganti@11: it.name.endsWith("${params.fq_suffix}") ? fastq_files << it : fastq_files << null kkonganti@11: } kkonganti@11: kkonganti@11: if (fastq_files.findAll{ it != null }.size() == 0) { kkonganti@11: stopNow("The input directory,\n${params.input}\nis empty! or does not " + kkonganti@11: "have FASTQ files ending with the suffix: ${params.fq_suffix}") kkonganti@11: } kkonganti@11: kkonganti@11: GEN_SAMPLESHEET( Channel.fromPath(params.input, type: 'dir') ) kkonganti@11: GEN_SAMPLESHEET.out.csv.set{ input_ch } kkonganti@11: versions.mix( GEN_SAMPLESHEET.out.versions ) kkonganti@11: .set { versions } kkonganti@11: } else if (params.metadata) { kkonganti@11: if (!input.exists()) { kkonganti@11: stopNow("The metadata CSV file,\n${params.metadata}\ndoes not exist!") kkonganti@11: } kkonganti@11: kkonganti@11: if (input.size() <= 0) { kkonganti@11: stopNow("The metadata CSV file,\n${params.metadata}\nis empty!") kkonganti@11: } kkonganti@11: kkonganti@11: Channel.fromPath(params.metadata, type: 'file') kkonganti@11: .set { input_ch } kkonganti@11: } kkonganti@11: kkonganti@11: SAMPLESHEET_CHECK( input_ch ) kkonganti@11: .csv kkonganti@11: .splitCsv( header: true, sep: ',') kkonganti@11: .map { create_fastq_channel(it) } kkonganti@11: .groupTuple(by: [0]) kkonganti@11: .branch { kkonganti@11: meta, fastq -> kkonganti@11: single : fastq.size() == 1 kkonganti@11: return [ meta, fastq.flatten() ] kkonganti@11: multiple : fastq.size() > 1 kkonganti@11: return [ meta, fastq.flatten() ] kkonganti@11: } kkonganti@11: .set { reads } kkonganti@11: kkonganti@11: CAT_FASTQ( reads.multiple ) kkonganti@11: .catted_reads kkonganti@11: .mix( reads.single ) kkonganti@11: .set { processed_reads } kkonganti@11: kkonganti@11: if (params.fq_filter_by_len.toInteger() > 0) { kkonganti@11: SEQKIT_SEQ( processed_reads ) kkonganti@11: .fastx kkonganti@11: .set { processed_reads } kkonganti@11: kkonganti@11: versions.mix( SEQKIT_SEQ.out.versions.first().ifEmpty(null) ) kkonganti@11: .set { versions } kkonganti@11: } kkonganti@11: kkonganti@11: versions.mix( kkonganti@11: SAMPLESHEET_CHECK.out.versions, kkonganti@11: CAT_FASTQ.out.versions.first().ifEmpty(null) kkonganti@11: ) kkonganti@11: .set { versions } kkonganti@11: kkonganti@11: emit: kkonganti@11: processed_reads kkonganti@11: versions kkonganti@11: } kkonganti@11: kkonganti@11: // Function to get list of [ meta, [ fq1, fq2 ] ] kkonganti@11: def create_fastq_channel(LinkedHashMap row) { kkonganti@11: kkonganti@11: def meta = [:] kkonganti@11: meta.id = row.sample kkonganti@11: meta.single_end = row.single_end.toBoolean() kkonganti@11: meta.strandedness = row.strandedness kkonganti@11: meta.id = meta.id.split(params.fq_filename_delim)[0..params.fq_filename_delim_idx.toInteger() - 1] kkonganti@11: .join(params.fq_filename_delim) kkonganti@11: meta.id = (meta.id =~ /\./ ? meta.id.take(meta.id.indexOf('.')) : meta.id) kkonganti@11: kkonganti@11: def array = [] kkonganti@11: kkonganti@11: if (!file(row.fq1).exists()) { kkonganti@11: stopNow("Please check input metadata CSV. The following Read 1 FASTQ file does not exist!" + kkonganti@11: "\n${row.fq1}") kkonganti@11: } kkonganti@11: if (meta.single_end) { kkonganti@11: array = [ meta, [ file(row.fq1) ] ] kkonganti@11: } else { kkonganti@11: if (!file(row.fq2).exists()) { kkonganti@11: stopNow("Please check input metadata CSV. The following Read 2 FASTQ file does not exist!" + kkonganti@11: "\n${row.fq2}") kkonganti@11: } kkonganti@11: array = [ meta, [ file(row.fq1), file(row.fq2) ] ] kkonganti@11: } kkonganti@11: return array kkonganti@11: }