diff --git a/conf/modules.config b/conf/modules.config index 3293e4bf..b3e62bee 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -274,4 +274,28 @@ process { ] } + withName: 'COBALT_PANEL_NORMALISATION' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "panel_resources/${filename}" }, + ] + } + + withName: 'PAVE_PON_PANEL_CREATION' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "panel_resources/${filename}" }, + ] + } + + withName: 'ISOFOX_PANEL_NORMALISATION' { + publishDir = [ + path: { "${params.outdir}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : "panel_resources/${filename}" }, + ] + } + } diff --git a/conf/panel_resource_creation_parameters.config b/conf/panel_resource_creation_parameters.config new file mode 100644 index 00000000..f02ff12b --- /dev/null +++ b/conf/panel_resource_creation_parameters.config @@ -0,0 +1,5 @@ +process { + withName: '^.*:AMBER' { + ext.args = '-tumor_min_depth 2' + } +} diff --git a/lib/Constants.groovy b/lib/Constants.groovy index 32decfee..89d7ecc4 100644 --- a/lib/Constants.groovy +++ b/lib/Constants.groovy @@ -25,6 +25,7 @@ class Constants { static enum RunMode { + PANEL_RESOURCE_CREATION, TARGETED, WGTS, } diff --git a/lib/Utils.groovy b/lib/Utils.groovy index e2ef6d4b..0de90408 100644 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -408,6 +408,15 @@ class Utils { Nextflow.exit(1) } + // Require --isofox_gene_ids argument to be provided in PANEL_RESOURCE_CREATION when RNA inputs are present + if (run_config.mode === Constants.RunMode.PANEL_RESOURCE_CREATION && run_config.has_rna && !params.isofox_gene_ids) { + log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Running the panel resource creation workflow with RNA requires that the\n" + + " --isofox_gene_ids argument is set with an appropriate input file.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + Nextflow.exit(1) + } + } static public getEnumFromString(s, e) { diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 8cf99e57..39ef380b 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -16,7 +16,6 @@ class WorkflowMain { def default_invalid = false // Set defaults common to all run configuration - if (!params.containsKey('genome_version')) { if (Constants.GENOMES_VERSION_37.contains(params.genome)) { params.genome_version = '37' @@ -200,7 +199,7 @@ class WorkflowMain { def panels = Constants.PANELS_DEFINED.join('\n - ') log.error "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " A panel is required to be set using the --panel CLI argument or in a\n" + - " configuration file when running in targeted mode.\n" + + " configuration file when running in targeted mode or panel resource creation mode.\n" + " Currently, the available built-in panels are:\n" + " - ${panels}\n" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" diff --git a/main.nf b/main.nf index facb872d..b6d2e022 100644 --- a/main.nf +++ b/main.nf @@ -58,8 +58,9 @@ if (workflow.stubRun && params.create_stub_placeholders) { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { TARGETED } from './workflows/targeted' -include { WGTS } from './workflows/wgts' +include { PANEL_RESOURCE_CREATION } from './workflows/panel_resource_creation' +include { TARGETED } from './workflows/targeted' +include { WGTS } from './workflows/wgts' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -78,6 +79,8 @@ workflow NFCORE_ONCOANALYSER { WGTS() } else if (run_mode === Constants.RunMode.TARGETED) { TARGETED() + } else if (run_mode === Constants.RunMode.PANEL_RESOURCE_CREATION) { + PANEL_RESOURCE_CREATION() } else { log.error("received bad run mode: ${run_mode}") Nextflow.exit(1) diff --git a/modules/local/amber/main.nf b/modules/local/amber/main.nf index aa2e314c..3c63c46e 100644 --- a/modules/local/amber/main.nf +++ b/modules/local/amber/main.nf @@ -11,7 +11,7 @@ process AMBER { tuple val(meta), path(tumor_bam), path(normal_bam), path(donor_bam), path(tumor_bai), path(normal_bai), path(donor_bai) val genome_ver path heterozygous_sites - path target_region_bed + path target_regions_bed output: tuple val(meta), path('amber/'), emit: amber_dir @@ -35,7 +35,7 @@ process AMBER { if (donor_bam) reference_bams.add(donor_bam.toString()) def reference_bam_arg = reference_bams.size() > 0 ? "-reference_bam ${String.join(",", reference_bams)}" : '' - def target_regions_bed_arg = target_region_bed ? "-target_regions_bed ${target_region_bed}" : '' + def target_regions_bed_arg = target_regions_bed ? "-target_regions_bed ${target_regions_bed}" : '' """ amber \\ diff --git a/modules/local/amber/meta.yml b/modules/local/amber/meta.yml index 476550ea..f0533531 100644 --- a/modules/local/amber/meta.yml +++ b/modules/local/amber/meta.yml @@ -46,19 +46,15 @@ input: type: file description: AMBER heterozygous sites file pattern: "*.{vcf.gz}" - - target_region_bed: + - target_regions_bed: type: file - description: Target region BED file (optional) + description: Target regions BED file (optional) pattern: "*.{bed}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [id: 'sample_id', tumor_id: 'tumor_name', normal_id: 'normal_name'] - - amber_dir: - type: directory + - cobalt_normalisation: + file: directory description: AMBER output directory + pattern: "versions.yml" - versions: type: file description: File containing software versions diff --git a/modules/local/cobalt/panel_normalisation/environment.yml b/modules/local/cobalt/panel_normalisation/environment.yml new file mode 100644 index 00000000..8b6b6235 --- /dev/null +++ b/modules/local/cobalt/panel_normalisation/environment.yml @@ -0,0 +1,7 @@ +name: cobalt_panel_normalisation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-cobalt=2.0 diff --git a/modules/local/cobalt/panel_normalisation/main.nf b/modules/local/cobalt/panel_normalisation/main.nf new file mode 100644 index 00000000..5527ae80 --- /dev/null +++ b/modules/local/cobalt/panel_normalisation/main.nf @@ -0,0 +1,62 @@ +process COBALT_PANEL_NORMALISATION { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-cobalt:2.0--hdfd78af_0' : + 'biocontainers/hmftools-cobalt:2.0--hdfd78af_0' }" + + input: + tuple path('amber_dir.*'), path('cobalt_dir.*') + val genome_ver + path gc_profile + path target_regions_bed + + output: + path 'cobalt.region_normalisation.*.tsv', emit: cobalt_normalisation + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p inputs/ + + for fp in \$(find -L amber_dir.* cobalt_dir.* -type f ! -name '*.version'); do + ln -sf ../\${fp} inputs/\${fp##*/}; + done + + ( + echo SampleId; + find -L inputs/ -type f -name '*.amber.baf.tsv.gz' | sed 's#inputs/##; s#\\.amber\\..*\$##; s#\\.cobalt\\..*\$##' | sort -V | uniq; + ) > sample_ids.txt + + java -cp /usr/local/share/hmftools-cobalt-2.0-0/cobalt.jar \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.cobalt.norm.NormalisationFileBuilder \\ + ${args} \\ + -sample_id_file sample_ids.txt \\ + -amber_dir inputs/ \\ + -cobalt_dir inputs/ \\ + -ref_genome_version ${genome_ver} \\ + -gc_profile ${gc_profile} \\ + -target_regions_bed ${target_regions_bed} \\ + -output_file cobalt.region_normalisation.${genome_ver}.tsv \\ + -log_level ${params.module_log_level} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cobalt_panel_normalisation: \$(cobalt -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch cobalt.region_normalisation.${genome_ver}.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/cobalt/panel_normalisation/meta.yml b/modules/local/cobalt/panel_normalisation/meta.yml new file mode 100644 index 00000000..79c43110 --- /dev/null +++ b/modules/local/cobalt/panel_normalisation/meta.yml @@ -0,0 +1,41 @@ +name: cobalt_panel_normalisation +description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes +keywords: + - cobalt + - read depth ratios + - cnv +tools: + - cobalt: + description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/cobalt + documentation: https://github.com/hartwigmedical/hmftools/tree/master/cobalt + licence: ["GPL v3"] +input: + - amber_dirs: + type: directory + description: List of AMBER output directories + - cobalt_dirs: + type: directory + description: List of COBALT output directories + - genome_ver: + type: string + description: Reference genome version + - gc_profile: + type: file + description: GC profile file + pattern: "*.{cnp}" + - target_regions_bed: + type: file + description: Target regions BED file + pattern: "*.{bed}" +output: + - cobalt_normalisation: + type: file + description: COBALT normalisation file + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/cobalt/environment.yml b/modules/local/cobalt/run/environment.yml similarity index 85% rename from modules/local/cobalt/environment.yml rename to modules/local/cobalt/run/environment.yml index 8b4e2bc1..b08a9947 100644 --- a/modules/local/cobalt/environment.yml +++ b/modules/local/cobalt/run/environment.yml @@ -1,4 +1,4 @@ -name: cobalt +name: cobalt_run channels: - conda-forge - bioconda diff --git a/modules/local/cobalt/main.nf b/modules/local/cobalt/run/main.nf similarity index 96% rename from modules/local/cobalt/main.nf rename to modules/local/cobalt/run/main.nf index c4bbc8e6..0c818ff0 100644 --- a/modules/local/cobalt/main.nf +++ b/modules/local/cobalt/run/main.nf @@ -52,7 +52,7 @@ process COBALT { cat <<-END_VERSIONS > versions.yml "${task.process}": - cobalt: \$(cobalt -version | sed -n '/^Cobalt version/ { s/^.* //p }') + cobalt_run: \$(cobalt -version | sed -n '/^Cobalt version/ { s/^.* //p }') END_VERSIONS """ diff --git a/modules/local/cobalt/meta.yml b/modules/local/cobalt/run/meta.yml similarity index 99% rename from modules/local/cobalt/meta.yml rename to modules/local/cobalt/run/meta.yml index 61812410..74ee055f 100644 --- a/modules/local/cobalt/meta.yml +++ b/modules/local/cobalt/run/meta.yml @@ -1,4 +1,4 @@ -name: cobalt +name: cobalt_run description: Count bam lines determines the read depth ratios of the supplied tumor and reference genomes keywords: - cobalt diff --git a/modules/local/isofox/panel_normalisation/environment.yml b/modules/local/isofox/panel_normalisation/environment.yml new file mode 100644 index 00000000..1d6af676 --- /dev/null +++ b/modules/local/isofox/panel_normalisation/environment.yml @@ -0,0 +1,7 @@ +name: isofox_panel_normalisation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-isofox=1.7.1 diff --git a/modules/local/isofox/panel_normalisation/main.nf b/modules/local/isofox/panel_normalisation/main.nf new file mode 100644 index 00000000..de940f4e --- /dev/null +++ b/modules/local/isofox/panel_normalisation/main.nf @@ -0,0 +1,60 @@ +process ISOFOX_PANEL_NORMALISATION { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-isofox:1.7.1--hdfd78af_0' : + 'biocontainers/hmftools-isofox:1.7.1--hdfd78af_0' }" + + input: + path 'isofox_dirs.*' + val genome_ver + path gene_ids + path gene_distribution + + output: + path 'isofox.gene_normalisation.*.csv', emit: isofox_normalisation + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + mkdir -p inputs/ + for fp in \$(find -L isofox_dirs.* -name '*.gene_data.csv'); do ln -sf ../\${fp} inputs/; done + + ( + echo SampleId; + find inputs/ -name '*csv' | sed 's#^.*/\\(.*\\).isf.gene_data.csv#\\1#'; + ) > sample_ids.txt + + java -cp /usr/local/share/hmftools-isofox-1.7.1-0/isofox.jar \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.isofox.cohort.CohortAnalyser \\ + ${args} \\ + -sample_data_file sample_ids.txt \\ + -root_data_dir inputs/ \\ + -analyses PANEL_TPM_NORMALISATION \\ + -gene_id_file ${gene_ids} \\ + -gene_distribution_file ${gene_distribution} \\ + -output_dir ./ \\ + -log_level ${params.module_log_level} + + mv isofox.panel_gene_normalisation.csv isofox.gene_normalisation.${genome_ver}.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + isofox: \$(isofox -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch isofox.gene_normalisation.${genome_ver}.csv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} diff --git a/modules/local/isofox/panel_normalisation/meta.yml b/modules/local/isofox/panel_normalisation/meta.yml new file mode 100644 index 00000000..9db20ab5 --- /dev/null +++ b/modules/local/isofox/panel_normalisation/meta.yml @@ -0,0 +1,37 @@ +name: isofox_panel_normalisation +description: Characterise and count gene, transcript features +keywords: + - rna + - rnaseq +tools: + - isofox: + description: Characterises and counts gene, transcript features + homepage: https://github.com/hartwigmedical/hmftools/tree/master/isofox + documentation: https://github.com/hartwigmedical/hmftools/tree/master/isofox + licence: ["GPL v3"] +input: + - isofox_dirs: + type: directory + description: List of Isofox directories + - genome_ver: + type: string + description: Reference genome version + - gene_ids: + type: file + description: Isofox gene ID file (optional) + pattern: "*.{csv}" + - gene_distribution: + type: file + description: Isofox cohort gene expression file + pattern: "*.{csv}" +output: + - isofox_normalisation: + type: file + description: Isofox normalisation file + pattern: "versions.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/isofox/environment.yml b/modules/local/isofox/run/environment.yml similarity index 85% rename from modules/local/isofox/environment.yml rename to modules/local/isofox/run/environment.yml index d4251c57..c14c7cd4 100644 --- a/modules/local/isofox/environment.yml +++ b/modules/local/isofox/run/environment.yml @@ -1,4 +1,4 @@ -name: isofox +name: isofox_run channels: - conda-forge - bioconda diff --git a/modules/local/isofox/main.nf b/modules/local/isofox/run/main.nf similarity index 100% rename from modules/local/isofox/main.nf rename to modules/local/isofox/run/main.nf diff --git a/modules/local/isofox/meta.yml b/modules/local/isofox/run/meta.yml similarity index 99% rename from modules/local/isofox/meta.yml rename to modules/local/isofox/run/meta.yml index 5ccf224e..716ed7d6 100644 --- a/modules/local/isofox/meta.yml +++ b/modules/local/isofox/run/meta.yml @@ -1,4 +1,4 @@ -name: isofox +name: isofox_run description: Characterise and count gene, transcript features keywords: - rna diff --git a/modules/local/pave/pon_creation/environment.yml b/modules/local/pave/pon_creation/environment.yml new file mode 100644 index 00000000..79a69628 --- /dev/null +++ b/modules/local/pave/pon_creation/environment.yml @@ -0,0 +1,7 @@ +name: pave_pon_creation +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::hmftools-pave=1.7 diff --git a/modules/local/pave/pon_creation/main.nf b/modules/local/pave/pon_creation/main.nf new file mode 100644 index 00000000..eec144a6 --- /dev/null +++ b/modules/local/pave/pon_creation/main.nf @@ -0,0 +1,52 @@ +process PAVE_PON_PANEL_CREATION { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/hmftools-pave:1.7_beta--hdfd78af_1' : + 'biocontainers/hmftools-pave:1.7--hdfd78af_0' }" + + input: + tuple path(sage_vcf), path(sage_tbi) + val genome_ver + + output: + path 'pave.somatic_artefacts.*.tsv', emit: pave_artefacts + path 'versions.yml' , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + ( + echo SampleId; + find ${sage_vcf} | sed 's#.sage.somatic.vcf.gz##'; + ) > sample_ids.txt + + java -cp /usr/local/share/hmftools-pave-1.7-0/pave.jar \\ + -Xmx${Math.round(task.memory.bytes * 0.95)} \\ + com.hartwig.hmftools.pave.pon_gen.PonBuilder \\ + ${args} \\ + -sample_id_file sample_ids.txt \\ + -vcf_path '*.sage.somatic.vcf.gz' \\ + -ref_genome_version ${genome_ver} \\ + -output_pon_file pave.somatic_artefacts.${genome_ver}.tsv \\ + -log_level ${params.module_log_level} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pave: \$(pave -version | sed 's/^.* //') + END_VERSIONS + """ + + stub: + """ + touch pave.somatic_artefacts.${genome_ver}.tsv + + echo -e '${task.process}:\\n stub: noversions\\n' > versions.yml + """ +} + diff --git a/modules/local/pave/pon_creation/meta.yml b/modules/local/pave/pon_creation/meta.yml new file mode 100644 index 00000000..cc20b117 --- /dev/null +++ b/modules/local/pave/pon_creation/meta.yml @@ -0,0 +1,38 @@ +name: pave_pon_creation +description: Annotate small variant VCF with gene, transcript coding and protein effects +keywords: + - pave + - annotation + - gene + - transcript + - protein + - vcf +tools: + - pave: + description: Annotates small variant VCF with gene, transcript coding and protein effects. + homepage: https://github.com/hartwigmedical/hmftools/tree/master/pave + documentation: https://github.com/hartwigmedical/hmftools/tree/master/pave + licence: ["GPL v3"] +input: + - sage_vcf: + type: file + description: SAGE VCF file + pattern: "*.{vcf.gz}" + - sage_tbi: + type: file + description: SAGE VCF index file + pattern: "*.{tbi}" + - genome_ver: + type: string + description: Reference genome version +output: + - pave_artefacts: + type: file + description: PAVE artefacts file + pattern: "*.{tsv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@scwatts" diff --git a/modules/local/sage/somatic/main.nf b/modules/local/sage/somatic/main.nf index 07667eae..dd5bc57f 100644 --- a/modules/local/sage/somatic/main.nf +++ b/modules/local/sage/somatic/main.nf @@ -46,6 +46,8 @@ process SAGE_SOMATIC { def ref_sample_count_arg = "-ref_sample_count ${reference_ids.size()}" + def coverage_bed_arg = sage_coverage_panel ? "-coverage_bed ${sage_coverage_panel}" : '' + def run_mode = Utils.getEnumFromString(params.mode, Constants.RunMode) def high_depth_mode_arg = (run_mode === Constants.RunMode.TARGETED) ? '-high_depth_mode' : '' @@ -65,7 +67,7 @@ process SAGE_SOMATIC { -ref_genome_version ${genome_ver} \\ -hotspots ${sage_known_hotspots_somatic} \\ -panel_bed ${sage_actionable_panel} \\ - -coverage_bed ${sage_coverage_panel} \\ + ${coverage_bed_arg} \\ -high_confidence_bed ${sage_highconf_regions} \\ -ensembl_data_dir ${ensembl_data_resources} \\ ${high_depth_mode_arg} \\ diff --git a/nextflow.config b/nextflow.config index 6faf1e38..96b0849e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,6 +43,10 @@ params { isofox_read_length = null isofox_functions = 'TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS' + // NOTE(SW): used only for panel resource creation + driver_gene_panel = null + target_regions_bed = null + gridss_config = null prepare_reference_only = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 82eed696..10565a75 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -48,7 +48,7 @@ "type": "string", "description": "Workflow run mode.", "fa_icon": "fas fa-diagram-project", - "pattern": "^(wgts|targeted)" + "pattern": "^(wgts|targeted|panel_resource_creation)" }, "panel": { "type": "string", @@ -169,6 +169,16 @@ "description": "Semicolon-separated list of Isofox functions to run", "default": "TRANSCRIPT_COUNTS;ALT_SPLICE_JUNCTIONS;FUSIONS;RETAINED_INTRONS", "fa_icon": "fas fa-cog" + }, + "driver_gene_panel": { + "type": "string", + "description": "User defined driver gene panel used in panel resource creation.", + "fa_icon": "fas fa-cog" + }, + "target_regions_bed": { + "type": "string", + "description": "User defined target regions BED used in panel resource creation.", + "fa_icon": "fas fa-cog" } } }, diff --git a/subworkflows/local/amber_profiling/main.nf b/subworkflows/local/amber_profiling/main.nf index 47d5048d..f09c8b74 100644 --- a/subworkflows/local/amber_profiling/main.nf +++ b/subworkflows/local/amber_profiling/main.nf @@ -18,7 +18,7 @@ workflow AMBER_PROFILING { // Reference data genome_version // channel: [mandatory] genome version heterozygous_sites // channel: [optional] /path/to/heterozygous_sites - target_region_bed // channel: [optional] /path/to/target_region_bed + target_regions_bed // channel: [optional] /path/to/target_regions_bed main: // Channel for version.yml files @@ -80,7 +80,7 @@ workflow AMBER_PROFILING { ch_amber_inputs, genome_version, heterozygous_sites, - target_region_bed, + target_regions_bed, ) ch_versions = ch_versions.mix(AMBER.out.versions) diff --git a/subworkflows/local/cobalt_normalisation/main.nf b/subworkflows/local/cobalt_normalisation/main.nf new file mode 100644 index 00000000..4f233cf8 --- /dev/null +++ b/subworkflows/local/cobalt_normalisation/main.nf @@ -0,0 +1,54 @@ +// +// COBALT normalisation prepares the panel-specific target region normalisation resource +// + +import Constants +import Utils + +include { COBALT_PANEL_NORMALISATION } from '../../../modules/local/cobalt/panel_normalisation/main' + +workflow COBALT_NORMALISATION { + take: + // Sample data + ch_amber // channel: [mandatory] [ meta, amber_dir ] + ch_cobalt // channel: [mandatory] [ meta, cobalt_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + gc_profile // channel: [mandatory] /path/to/gc_profile + target_region_bed // channel: [mandatory] /path/to/target_region_bed + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create process input channel + // channel: [ [amber_dir, ...], [cobalt_dir, ...] ] + ch_cobalt_inputs = WorkflowOncoanalyser.groupByMeta( + ch_amber, + ch_cobalt, + ) + .map { meta, amber_dir, cobalt_dir -> + return [ + Utils.selectCurrentOrExisting(amber_dir, meta, Constants.INPUT.AMBER_DIR), + Utils.selectCurrentOrExisting(cobalt_dir, meta, Constants.INPUT.COBALT_DIR), + ] + } + .collect(flat: false) + .map { d -> d.transpose() } + + + // Run process + COBALT_PANEL_NORMALISATION( + ch_cobalt_inputs, + genome_version, + gc_profile, + target_region_bed, + ) + + ch_versions = ch_versions.mix(COBALT_PANEL_NORMALISATION.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/cobalt_profiling/main.nf b/subworkflows/local/cobalt_profiling/main.nf index fb5a3b77..3a877288 100644 --- a/subworkflows/local/cobalt_profiling/main.nf +++ b/subworkflows/local/cobalt_profiling/main.nf @@ -5,7 +5,7 @@ import Constants import Utils -include { COBALT } from '../../../modules/local/cobalt/main' +include { COBALT } from '../../../modules/local/cobalt/run/main' workflow COBALT_PROFILING { take: diff --git a/subworkflows/local/isofox_normalisation/main.nf b/subworkflows/local/isofox_normalisation/main.nf new file mode 100644 index 00000000..3f873d47 --- /dev/null +++ b/subworkflows/local/isofox_normalisation/main.nf @@ -0,0 +1,45 @@ +// +// ISOFOX normalisation prepares panel-specific TPM normalisation resource +// + +import Constants +import Utils + +include { ISOFOX_PANEL_NORMALISATION } from '../../../modules/local/isofox/panel_normalisation/main' + +workflow ISOFOX_NORMALISATION { + take: + // Sample data + ch_isofox // channel: [mandatory] [ meta, isofox_dir ] + + // Reference data + genome_version // channel: [mandatory] genome version + isofox_gene_ids // channel: [mandatory] /path/to/gene_ids + isofox_gene_distribution // channel: [mandatory] /path/to/isofox_gene_distribution + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create process input channel + // channel: [ [isofox_dir, ...] ] + ch_isofox_inputs = ch_isofox + .map { meta, isofox_dir -> + return Utils.selectCurrentOrExisting(isofox_dir, meta, Constants.INPUT.ISOFOX_DIR) + } + .collect() + + // Run process + ISOFOX_PANEL_NORMALISATION( + ch_isofox_inputs, + genome_version, + isofox_gene_ids, + isofox_gene_distribution, + ) + + ch_versions = ch_versions.mix(ISOFOX_PANEL_NORMALISATION.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/isofox_quantification/main.nf b/subworkflows/local/isofox_quantification/main.nf index 51d2641a..111c2078 100644 --- a/subworkflows/local/isofox_quantification/main.nf +++ b/subworkflows/local/isofox_quantification/main.nf @@ -5,7 +5,7 @@ import Constants import Utils -include { ISOFOX } from '../../../modules/local/isofox/main' +include { ISOFOX } from '../../../modules/local/isofox/run/main' workflow ISOFOX_QUANTIFICATION { take: diff --git a/subworkflows/local/pave_pon_creation/main.nf b/subworkflows/local/pave_pon_creation/main.nf new file mode 100644 index 00000000..b27400b7 --- /dev/null +++ b/subworkflows/local/pave_pon_creation/main.nf @@ -0,0 +1,46 @@ +// +// PAVE PON creation prepares the panel-specific small variant artefact resource +// + +import Constants +import Utils + +include { PAVE_PON_PANEL_CREATION } from '../../../modules/local/pave/pon_creation/main' + + +workflow PAVE_PON_CREATION { + take: + // Sample data + ch_sage_somatic_vcf // channel: [mandatory] [ meta, sage_somatic_vcf, sage_somatic_tbi ] + + // Reference data + genome_version // channel: [mandatory] genome version + + main: + // Channel for version.yml files + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create process input channel + // channel: [ [sage_vcf, ...], [sage_tbi, ...] ] + ch_pave_inputs = ch_sage_somatic_vcf + .map { meta, sage_vcf, sage_tbi -> + return [ + Utils.selectCurrentOrExisting(sage_vcf, meta, Constants.INPUT.SAGE_VCF_TUMOR), + Utils.selectCurrentOrExisting(sage_tbi, meta, Constants.INPUT.SAGE_VCF_TBI_TUMOR), + ] + } + .collect(flat: false) + .map { d -> d.transpose() } + + // Run process + PAVE_PON_PANEL_CREATION( + ch_pave_inputs, + genome_version, + ) + + ch_versions = ch_versions.mix(PAVE_PON_PANEL_CREATION.out.versions) + + emit: + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/sage_append/main.nf b/subworkflows/local/sage_append/main.nf index 6a8fbe46..ae53b1a5 100644 --- a/subworkflows/local/sage_append/main.nf +++ b/subworkflows/local/sage_append/main.nf @@ -22,7 +22,7 @@ workflow SAGE_APPEND { genome_dict // channel: [mandatory] /path/to/genome_dict // Params - run_germline // boolean: [mandatory] Run germline flag + enable_germline // boolean: [mandatory] Enable germline main: // Channel for version.yml files diff --git a/subworkflows/local/sage_calling/main.nf b/subworkflows/local/sage_calling/main.nf index 62ea4fad..c4d3998f 100644 --- a/subworkflows/local/sage_calling/main.nf +++ b/subworkflows/local/sage_calling/main.nf @@ -34,6 +34,7 @@ workflow SAGE_CALLING { segment_mappability // channel: [mandatory] /path/to/segment_mappability driver_gene_panel // channel: [mandatory] /path/to/driver_gene_panel ensembl_data_resources // channel: [mandatory] /path/to/ensembl_data_resources/ + enable_germline // boolean: [mandatory] Enable germline mode main: // Channel for version.yml files @@ -108,7 +109,7 @@ workflow SAGE_CALLING { def has_tumor_normal = tumor_bam && normal_bam def has_existing = Utils.hasExistingInput(meta, Constants.INPUT.SAGE_VCF_NORMAL) - runnable: has_tumor_normal && !has_existing + runnable: has_tumor_normal && !has_existing && enable_germline skip: true return meta } diff --git a/workflows/panel_resource_creation.nf b/workflows/panel_resource_creation.nf new file mode 100644 index 00000000..b336c68c --- /dev/null +++ b/workflows/panel_resource_creation.nf @@ -0,0 +1,300 @@ +import Constants +import Processes +import Utils + + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ +// Parse input samplesheet +// NOTE(SW): this is done early and outside of gpars so that we can access synchronously and prior to pipeline execution +inputs = Utils.parseInput(params.input, workflow.stubRun, log) + +// Get run config +run_config = WorkflowMain.getRunConfig(params, inputs, log) + +// Validate inputs +Utils.validateInput(inputs, run_config, params, log) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.isofox_counts, + params.isofox_gc_ratios, + params.isofox_gene_ids, + params.isofox_tpm_norm, + params.driver_gene_panel, + params.target_regions_bed, +] + +if (run_config.stages.lilac) { + if (params.genome_version.toString() == '38' && params.genome_type == 'alt' && params.containsKey('ref_data_hla_slice_bed')) { + checkPathParamList.add(params.ref_data_hla_slice_bed) + } +} + +// TODO(SW): consider whether we should check for null entries here for errors to be more informative +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + +// Used in Isofox subworkflow only +isofox_read_length = params.isofox_read_length !== null ? params.isofox_read_length : Constants.DEFAULT_ISOFOX_READ_LENGTH_TARGETED + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' + +include { AMBER_PROFILING } from '../subworkflows/local/amber_profiling' +include { COBALT_NORMALISATION } from '../subworkflows/local/cobalt_normalisation' +include { COBALT_PROFILING } from '../subworkflows/local/cobalt_profiling' +include { ISOFOX_NORMALISATION } from '../subworkflows/local/isofox_normalisation' +include { ISOFOX_QUANTIFICATION } from '../subworkflows/local/isofox_quantification' +include { PAVE_PON_CREATION } from '../subworkflows/local/pave_pon_creation' +include { PREPARE_REFERENCE } from '../subworkflows/local/prepare_reference' +include { READ_ALIGNMENT_DNA } from '../subworkflows/local/read_alignment_dna' +include { READ_ALIGNMENT_RNA } from '../subworkflows/local/read_alignment_rna' +include { REDUX_PROCESSING } from '../subworkflows/local/redux_processing' +include { SAGE_CALLING } from '../subworkflows/local/sage_calling' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Get absolute file paths +samplesheet = Utils.getFileObject(params.input) + +workflow PANEL_RESOURCE_CREATION { + // Create channel for versions + // channel: [ versions.yml ] + ch_versions = Channel.empty() + + // Create input channel from parsed CSV + // channel: [ meta ] + ch_inputs = Channel.fromList(inputs) + + // Set up reference data, assign more human readable variables + PREPARE_REFERENCE( + run_config, + ) + ref_data = PREPARE_REFERENCE.out + hmf_data = PREPARE_REFERENCE.out.hmf_data + + ch_versions = ch_versions.mix(PREPARE_REFERENCE.out.versions) + + // + // SUBWORKFLOW: Run read alignment to generate BAMs + // + READ_ALIGNMENT_DNA( + ch_inputs, + ref_data.genome_fasta, + ref_data.genome_bwamem2_index, + params.max_fastq_records, + params.fastp_umi_enabled, + params.fastp_umi_location, + params.fastp_umi_length, + params.fastp_umi_skip, + ) + + READ_ALIGNMENT_RNA( + ch_inputs, + ref_data.genome_star_index, + ) + + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_versions = ch_versions.mix( + READ_ALIGNMENT_DNA.out.versions, + READ_ALIGNMENT_RNA.out.versions, + ) + + // channel: [ meta, [bam, ...], [bai, ...] ] + ch_align_dna_tumor_out = READ_ALIGNMENT_DNA.out.dna_tumor + ch_align_dna_normal_out = READ_ALIGNMENT_DNA.out.dna_normal + ch_align_rna_tumor_out = READ_ALIGNMENT_RNA.out.rna_tumor + + // + // SUBWORKFLOW: Run REDUX for DNA BAMs + // + REDUX_PROCESSING( + ch_inputs, + ch_align_dna_tumor_out, + ch_align_dna_normal_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_dna_donor + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.unmap_regions, + hmf_data.msi_jitter_sites, + params.redux_umi_enabled, + params.redux_umi_duplex_delim, + ) + + ch_versions = ch_versions.mix(REDUX_PROCESSING.out.versions) + + // channel: [ meta, bam, bai ] + ch_redux_dna_tumor_out = REDUX_PROCESSING.out.dna_tumor + ch_redux_dna_normal_out = REDUX_PROCESSING.out.dna_normal + + // channel: [ meta, dup_freq_tsv, jitter_tsv, ms_tsv, repeat_tsv ] + ch_redux_dna_tumor_tsv_out = REDUX_PROCESSING.out.dna_tumor_tsv + ch_redux_dna_normal_tsv_out = REDUX_PROCESSING.out.dna_normal_tsv + + // + // MODULE: Run Isofox to analyse RNA data + // + isofox_counts = params.isofox_counts ? file(params.isofox_counts) : hmf_data.isofox_counts + isofox_gc_ratios = params.isofox_gc_ratios ? file(params.isofox_gc_ratios) : hmf_data.isofox_gc_ratios + + ISOFOX_QUANTIFICATION( + ch_inputs, + ch_align_rna_tumor_out, + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + hmf_data.ensembl_data_resources, + hmf_data.known_fusion_data, + isofox_counts, + isofox_gc_ratios, + [], // isofox_gene_ids + [], // isofox_tpm_norm + 'TRANSCRIPT_COUNTS', + isofox_read_length, + ) + + ch_versions = ch_versions.mix(ISOFOX_QUANTIFICATION.out.versions) + + // channel: [ meta, isofox_dir ] + ch_isofox_out = ISOFOX_QUANTIFICATION.out.isofox_dir + + // + // SUBWORKFLOW: Run AMBER to obtain b-allele frequencies + // + AMBER_PROFILING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_donor_bam + ref_data.genome_version, + hmf_data.heterozygous_sites, + params.target_regions_bed, + 2, // tumor_min_depth + ) + + ch_versions = ch_versions.mix(AMBER_PROFILING.out.versions) + + // channel: [ meta, amber_dir ] + ch_amber_out = AMBER_PROFILING.out.amber_dir + + // + // SUBWORKFLOW: Run COBALT to obtain read ratios + // + COBALT_PROFILING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + hmf_data.gc_profile, + hmf_data.diploid_bed, + [], // panel_target_region_normalisation + ) + + ch_versions = ch_versions.mix(COBALT_PROFILING.out.versions) + + // channel: [ meta, cobalt_dir ] + ch_cobalt_out = COBALT_PROFILING.out.cobalt_dir + + + // SUBWORKFLOW: call SNV, MNV, and small INDELS with SAGE + // + SAGE_CALLING( + ch_inputs, + ch_redux_dna_tumor_out, + ch_redux_dna_normal_out, + ch_inputs.map { meta -> [meta, [], []] }, // ch_donor_bam + ch_redux_dna_tumor_tsv_out, + ch_redux_dna_normal_tsv_out, + ch_inputs.map { meta -> [meta, [], [], []] }, // ch_donor_tsv + ref_data.genome_fasta, + ref_data.genome_version, + ref_data.genome_fai, + ref_data.genome_dict, + hmf_data.sage_pon, + hmf_data.sage_known_hotspots_somatic, + hmf_data.sage_known_hotspots_germline, + hmf_data.sage_highconf_regions, + hmf_data.segment_mappability, + params.driver_gene_panel, + hmf_data.ensembl_data_resources, + hmf_data.gnomad_resource, + true, // enable_germline + ) + + ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) + + // channel: [ meta, sage_vcf, sage_tbi ] + ch_sage_somatic_vcf_out = SAGE_CALLING.out.somatic_vcf + + // + // SUBWORKFLOW: Run COBALT normalisation + // + target_regions_bed = params.target_regions_bed ? file(params.target_regions_bed) : [] + + COBALT_NORMALISATION( + ch_amber_out, + ch_cobalt_out, + ref_data.genome_version, + hmf_data.gc_profile, + target_regions_bed, + ) + + ch_versions = ch_versions.mix(COBALT_NORMALISATION.out.versions) + + // + // SUBWORKFLOW: Run PAVE panel of normals creation + // + PAVE_PON_CREATION( + ch_sage_somatic_vcf_out, + ref_data.genome_version, + ) + + ch_versions = ch_versions.mix(PAVE_PON_CREATION.out.versions) + + // + // SUBWORKFLOW: Run Isofox TPM normalisation + // + isofox_gene_ids = params.isofox_gene_ids ? file(params.isofox_gene_ids) : [] + + ISOFOX_NORMALISATION( + ch_isofox_out, + ref_data.genome_version, + isofox_gene_ids, + hmf_data.gene_exp_distribution, + ) + + ch_versions = ch_versions.mix(ISOFOX_NORMALISATION.out.versions) + + // + // TASK: Aggregate software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'software_versions.yml', + sort: true, + newLine: true, + ) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/targeted.nf b/workflows/targeted.nf index 34dfff2f..e9f4a6a1 100644 --- a/workflows/targeted.nf +++ b/workflows/targeted.nf @@ -355,6 +355,7 @@ workflow TARGETED { hmf_data.segment_mappability, panel_data.driver_gene_panel, hmf_data.ensembl_data_resources, + true, // enable_germline ) ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) @@ -469,7 +470,7 @@ workflow TARGETED { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, - true, // run_germline + true, // enable_germline ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions) diff --git a/workflows/wgts.nf b/workflows/wgts.nf index 4d4beaf4..72bc80a0 100644 --- a/workflows/wgts.nf +++ b/workflows/wgts.nf @@ -357,6 +357,7 @@ workflow WGTS { hmf_data.segment_mappability, hmf_data.driver_gene_panel, hmf_data.ensembl_data_resources, + true, // enable_germline ) ch_versions = ch_versions.mix(SAGE_CALLING.out.versions) @@ -469,7 +470,7 @@ workflow WGTS { ref_data.genome_version, ref_data.genome_fai, ref_data.genome_dict, - run_config.stages.orange, // run_germline [run for ORANGE but not Neo] + run_config.stages.orange, // enable_germline [run for ORANGE but not Neo] ) ch_versions = ch_versions.mix(SAGE_APPEND.out.versions)