Skip to content
Snippets Groups Projects
Commit 9cd7b92a authored by Jules Sabban's avatar Jules Sabban
Browse files

Merge branch 'dev_Jules' into 'master'

V1.24.0 > V1.25.0

See merge request !25
parents 67168dc8 a954a495
No related branches found
Tags V1.25.0
1 merge request!25V1.24.0 > V1.25.0
......@@ -6,7 +6,7 @@
==================================================
----------------------------------------------------------------------------------
NextFlow Run Name : $wfRunName
NextFlow Run Name : $runName
Demultiplexing is over, the analysis started at $dateStart.
......
----------------------------------------------------------------------------------
==================================================
------------------------------- get-nf workflow ----------------------------
S H O R T R E A D S - N F P I P E L I N E
V$version
==================================================
----------------------------------------------------------------------------------
NextFlow Run Name : $runName
Project : $project
An error occured during the analysis pipeline run.
The pipeline is not completed ! Some analysis or files may be not on NGL-Bi.
The command used to launch the workflow was as follows :
$commandLine
The pipeline was launched from :
$launchDir
The error message is :
$errorMessage
$errorReport
---
$name
$homePage
......@@ -341,7 +341,7 @@ process {
module = toolsModuleHash['SEQTK']
}
withName: ADD_MULTIQC {
withName: ADD_REPORT {
errorStrategy = 'ignore'
}
......
......@@ -29,5 +29,5 @@ manifest {
description = "Workflow for Illumina data quality control"
mainScript = 'main.nf'
nextflowVersion = '>=0.32.0'
version = '1.24.0'
version = '1.25.0'
}
\ No newline at end of file
......@@ -88,9 +88,10 @@ _Default_ : null
### Optionnal parameters
Some other parameters are only for tracability and have no effect on analysis, there are :
- **`--outdir_prefix`** [str]
This value will be a part of the name of the output directory. The real output directory is constructed as follow : `${inputdir}/nextflow/${outdir_prefix}_${nf_uniqueness}`. Where `${nf_uniqueness}` is the current date.
_Default_ : `${project}_${run_name}`
- **`--outdir`** [str]
Path to the output directory. The real output directory is constructed as follow : `${params.inputdir}/nextflow/${params.project}/${params.run_name}_${nf_uniqueness}` if all these values are set or `${launchDir}/results_${nf_uniqueness}`. Where `${nf_uniqueness}` is the current date.
This parameter is not intended to be set manually.
_Default_ : `${launchDir}/results_${nf_uniqueness}`
- **`--machine_id`** [str]
The machine identifier, such as `A00318` or `AV232702`.
......@@ -120,10 +121,6 @@ _Default_ : null
The nG6 like description of the analysis.
_Default_ : null
- **`--merge_lanes`** [bool]
Merge fastq over the two lanes in CORE pipeline.
_Default_ : false
### Skipping parameters
There are some availlable flags can be set to not run some parts of the pipeline.
- **`--no_subset`** [bool]
......
/*
* SHORT READS PIPELINE FUNCTIONS
*/
// ----------------------------------
// Includes
// ----------------------------------
import java.text.SimpleDateFormat
include {
helpMessage;
printOptions;
paramsValidation;
customMailSend;
sendFinalMail;
get_workflow_info;
createSummary;
} from "${params.shared_modules}/lib/utils.groovy"
// ----------------------------------
// Variables Definition
// ----------------------------------
SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
pipeline_info = workflow.manifest.name.split('/')
pipeline_group = pipeline_info[0]
pipeline_project = pipeline_info[1]
pipeline_techno = pipeline_project.split('-')[1]
if (params.email == null) {
email_main = params.email_bioinfo
} else {
email_main = params.email
}
emails_map = [
main: email_main,
bioinfo: params.email_bioinfo,
labo: params.email_labo,
failure: params.email_on_fail,
dev: params.email_dev
]
pipeline_options_map = [
inputdir: [default: '', optional: false, help: 'Path to the input directory [demultiplexing output directory]'],
samplesheet: [default: '$params.inputdir/SampleSheet.csv', optional: true, help: 'Path to IEM SampleSheet. Only for Illumina analysis'],
outdir: [default: launchDir + '/results' , optional: true, help: 'Path where results are stored. Do not modify it, its value is automatically set'],
project: [default: '', optional: false, help: 'Project\'s name'],
project_hash: [default: '', optional: true, help: 'Project\'s hash ID for NGL-Bi'],
select_samples: [default: '', optional: true, help: 'Comma separated list of samples name. Each sample in this list must match exactly the beginning of the fastq file name in the project directory. If this option is empty, the workflow takes as input every fastq files in the director'],
sequencer: [default: '', optional: false, help: 'Name of the sequencer [NovaSeq600, AVITI2, ...]'],
machine_id: [default: '', optional: true, help: 'Serial Number or unique identifier of the sequencer'],
fc_id: [default: '', optional: true, help: '[metadata] Identifier of the Flowcell used'], // useless ??
fc_type: [default: '', optional: true, help: '[metadata] Type of the Flowcell'], // useless ??
lane: [default: '', optional: false, help: 'Number of the lane'],
data_nature: [default: '', optional: true, help: 'ENA library strategy [AMPLICON, 16S, WGS, ...]. It will be use to select analysis to perform'],
species: [default: '', optional: true, help: '[metadata] Scientific name of the species'],
is_multiplex: [default: false, optional: true, help: 'true if several samples were sequenced on the same lane'],
run_name: [default: '', optional: false, help: 'Human readable identifier of the analysis'],
run_date: [default: '', optional: false, help: 'Use for file renamming. Format : DD/MM/YYYY or YYYYMMDD'],
description: [default: '', optional: true, help: 'NG6 description of the run'], // useless ??
fastp_n_reads: [default: 100000000, optional: true, help: 'Number of reads to process using fastp'],
no_subset: [default: false, optional: true, help: 'Is a subset of reads in fastq must be done before QC'],
large_sampling_threshold: [default: 200, optional: true, help: 'Number of samples from which we consider that the sequencing is highly multiplexed'],
miseq_subset_seq: [default: '50000', optional: true, help: 'Number of reads to subsampling on MiSeq run'],
nova_subset_seq: [default: '50000000', optional: true, help: 'Number of reads to subsampling on NovaSeq run'],
large_indexing_nova_subset_seq: [default: '500000', optional: true, help: 'Number of reads to subsampling on highly multiplexed NovaSeq run'],
aviti_subset_seq: [default: '50000000', optional: true, help: 'Number of reads to subsampling on Aviti run'],
//depth: [default: 0, optional: true, help: 'In subsampling, number of X to keep'], // Not use for the moment
reference_genome: [default: '', optional: true, help: 'Path to the genome FASTA file'],
reference_transcriptome: [default: '', optional: true, help: 'Path to the transcriptome FASTA file'],
make_star_index: [default: false, optional: true, help: 'Is the FASTA file must be indexed by star'],
sortmerna_db_path: [default: '/work/project/PlaGe/sortemerna_db', optional: true, help: 'Path to the directory where the sortmerna databases are'],
min_overlap: [default: 20, optional: true, help: 'For join pair step, minimum overlapping value [-m Flash option]'],
max_overlap: [default: 55, optional: true, help: 'For join pair step, maximum overlapping value [-M Flash option]'],
max_mismatch_density: [default: 0.1, optional: true, help: 'For join pair step, maximum mismatch density [-x Flash option]'],
assignation_databank: [default: '/save/ng6/TODO/HiSeqIndexedGenomes/new_struct/ncbi_16S/240319_release/16SMicrobial', optional: true, help: 'Path to 16S Microbial database'],
blast_outfmt: [default: 7, optional: true, help: 'Format of output for BLASTn'],
blast_max_target: [default: 10, optional: true, help: 'Maximum number of hits from BLASTn'],
single_cell: [default: false, optional: true, help: 'true is library was build with 10X kit'],
puc19: [default: '', optional: true, help: 'Path to the puC19 fasta for MethylSeq analysis'],
lambda: [default: '', optional: true, help: 'Path to the lambda fasta for MethylSeq analysis'],
ngl_bi_client: [default: '/home/sbsuser/save/scripts-ngs/shared_modules_Current', optional: true, help: 'Path to NGL-Bi_Client sources'],
insert_to_ngl: [default: true, optional: false, help: 'Should the data be stored on NGL ?'],
bi_run_code: [default: '', optional: true, help: 'NGL-Bi Run Code'],
sq_xp_code: [default: '', optional: true, help: 'NGL-SQ Experiment Code'],
ng6_name: [default: true, optional: false, help: 'Is files must be nammed using ng6 standard'],
shared_modules: [default: '/home/sbsuser/save/scripts-ngs/shared_modules_Current', optional: false, help: 'Path to Shared_modules sources'],
max_memory: [default: '500.GB', optional: false, help: 'Maximum amount of memory that can be used to launch a sbatch job'],
max_time: [default: '90.d', optional: false, help: 'Maximum amount of time that can be used to launch a sbatch job'],
max_cpus: [default: '48', optional: false, help: 'Maximum number of CPUs that can be used to launch a sbatch job'],
default_label: [default: 'Pipeline', optional: false, help: 'Default label for MultiQC'],
read_stats_label: [default: 'ReadStats', optional: false, help: 'Read Stats label for MultiQC'],
duplicats_label: [default: 'Duplicats', optional: false, help: 'Duplicats label for MultiQC'],
contamination_search_label: [default: 'ContaminationSearch', optional: false, help: 'Contamination Search label for MultiQC'],
join_pairs_label: [default: 'JoinPairs', optional: false, help: 'Join Pairs label for MultiQC'],
alignment_stats_label: [default: 'AlignmentStats', optional: false, help: 'Alignment Stats label for MultiQC'],
cluster_options: [default: '', optional: true, help: 'Sbatch options to pass to each job'],
is_dev_mode: [default: false, optional: false, help: 'Preset of some options'],
DTM_mode: [default: false, optional: false, help: 'Enable some process for DTM analysis'],
email: [default: '', optional: false, help: 'Main email adress for execution pipeline notifications'],
email_on_fail: [default: 'jules.sabban@inrae.fr', optional: false, help: 'Email adress to notify execution pipeline errors'],
email_bioinfo: [default: 'get-plage.bioinfo@genotoul.fr', optional: true, help: 'Bioinformatics team email adress for execution pipeline notifications'],
email_labo: [default: '', optional: true, help: 'Biologists team email adress for execution pipeline notifications'],
host: [default: 'genobioinfo', optional: false, help: 'Name of the HPC where the pipeline is executed. Must have special config file in conf folder'],
skip_core_illumina: [default: false, optional: false, help: 'To skip Illumina subworkflow'],
skip_core_element: [default: false, optional: false, help: 'To skip Elembio subworkflow'],
help: [default: false, optional: true, help: 'To print help message']
]
begin_email_fields = get_workflow_info(
[
subject_prefix: "[${params.sequencer}]",
subject_sufix: params.inputdir.split('/')[-1],
// version: workflow.manifest.version,
// wfRunName: workflow.runName,
run_name: params.run_name,
runNGLBi: (params.bi_run_code ?: ''),
xpNGLSq: (params.sq_xp_code ?: ''),
project: params.project,
sequencer: params.sequencer,
flowcell: params.fc_id,
lane: params.lane,
data_nature: params.data_nature,
directory: params.inputdir,
// commandLine: workflow.commandLine,
dateStart: format.format(new Date()),
]
)
// ----------------------------------
// Functions Definition
// ----------------------------------
def create_final_email_fields(formatted_date, summary) {
return get_workflow_info(
[
subject_prefix: "[${params.sequencer}]",
subject_sufix: params.inputdir.split('/')[-1],
project: (params.project ?: ''),
run: (params.run_name ?: ''),
runNGLBi: (params.bi_run_code ?: ''),
xpNGLSq: (params.sq_xp_code ?: ''),
dateComplete: formatted_date,
summary: (summary ?: [:])
]
)
}
def create_error_email_fields(formatted_date) {
return get_workflow_info(
[
subject_prefix: "[${params.sequencer}]",
subject_sufix: params.inputdir.split('/')[-1] + " : ERROR",
project: params.project,
]
)
}
def endOfPipelineEvents(summary) {
SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
end_mail_sent = false
def email_address = emails_map.main
def email_cc = emails_map.bioinfo
if (emails_map.main && emails_map.failure && !workflow.success) {
email_address = emails_map.failure
email_cc = ''
}
if (params.is_dev_mode) {
email_address = emails_map.dev
email = ''
}
workflow.onComplete {
log.info "Sending final e-mail"
template_final = "$baseDir/assets/final_email_template.txt"
final_email_fields = create_final_email_fields(format.format(new Date()), summary)
end_mail_sent = sendFinalMail(template_final, final_email_fields, email_address, email_cc, end_mail_sent)
// remove work directory if pipeline is successful
if (workflow.success) {
if (!workflow.profile.contains('dev') ) {
println "Pipeline terminé avec succès => suppression du workdir : $workflow.workDir"
exec:
workflow.workDir.deleteDir()
}
if (workflow.stats.ignoredCount > 0) {
log.warn "Warning, pipeline completed, but with errored process(es) "
log.warn "Number of ignored errored process(es) : ${workflow.stats.ignoredCount} "
log.warn "Number of successfully ran process(es) : ${workflow.stats.succeedCount} "
}
log.info "[$workflow.manifest.name] Pipeline completed successfully at $workflow.complete"
} else {
log.error "[$workflow.manifest.name] Pipeline completed with errors at $workflow.complete"
}
}
workflow.onError {
error_email_fields = create_error_email_fields(format.format(new Date()))
template_error = "$baseDir/assets/error_email_template.txt"
log.info "Sending error e-mail"
end_mail_sent = sendFinalMail(template_error, error_email_fields, email_address, email_cc, end_mail_sent)
}
}
def getSummary() {
SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
return createSummary(format.format(new Date()))
}
// ----------------------------------
// Auto Execution
// ----------------------------------
// Show help message
if (params.help) {
helpMessage(pipeline_options_map, 'S H O R T R E A D S - N F P I P E L I N E')
exit 0
}
// Parameter validation
if (paramsValidation(pipeline_options_map)) { // true s'il manque 1 param
log.error "\t-> Il manque au moins un paramètre obligatoire."
exit 0
} else {
log.info "\t-> OK"
printOptions(pipeline_options_map)
}
// Email on start
customMailSend(
"$baseDir/assets/begin_template.txt",
begin_email_fields,
emails_map.main,
"${emails_map.labo},${emails_map.bioinfo}",
!workflow.resume,
false
)
......@@ -20,11 +20,6 @@ This script is based on :
- the Curie institute template https://github.com/bioinfo-pf-curie/geniac-template/
*/
import java.text.SimpleDateFormat
SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
include {createSummary} from "$baseDir/conf/functions.config"
params.summary = createSummary(format.format(new Date()))
params.summary.collect{k,v -> println "$k : $v"}
/*
......
......@@ -4,15 +4,14 @@
params {
// ----- GLOBAL PARAMETERS -----
inputdir = ""
outdir_prefix = ""
project = ""
project_hash = ""
select_samples = ""
sequencer = ""
machine_id = ""
fc_id = ""
fc_type = ""
lane = ""
merge_lanes = false
data_nature = "" // AMPLICON, 16S, WGS, TRANSCRIPTOMIC, ... //
species = ""
......@@ -34,8 +33,6 @@ params {
aviti_subset_seq = "50000000" // in reads
large_indexing_nova_subset_seq = "500000" // in reads
coverage = 0 // coverage in X
len_r1 = 150
len_r2 = 150
// DNA / RNA params
reference_genome = ""
......@@ -105,7 +102,8 @@ params {
//=========================================
import java.nio.file.Files
import java.nio.file.Paths
def n_read_files = Files.walk(Paths.get(params.inputdir.toString()), 3)
def samples_max_depth = params.sequencer == 'AVITI' ? 4 : 3
def n_read_files = Files.walk(Paths.get(params.inputdir.toString()), samples_max_depth)
.filter(Files::isRegularFile)
.filter(p -> p.getFileName().toString().matches(".*_L00${params.lane}_R[12](_.*)?\\.fastq\\.gz"))
.count()
......@@ -116,16 +114,20 @@ System.out.println "\nNombre d'échantillons trouvés sur cette lane : $n_sample
def factor = java.lang.Math.round(0.1 * n_samples)
params.resource_factor = factor > 1 ? factor : 1
// Dynamics params, depend on others
import java.text.SimpleDateFormat
SimpleDateFormat uniqueness_format = new SimpleDateFormat("yyyyMMddHHmmss")
nf_uniqueness = uniqueness_format.format(new Date())
if (params.inputdir != '' && params.project != '' && params.run_name != '') {
outdir_prefix = "${params.inputdir}/nextflow/${params.project}/${params.run_name}"
} else {
outdir_prefix = "${launchDir}/results"
}
params.outdir = "${outdir_prefix}_${nf_uniqueness}"
params {
// Dynamics params, depend on others
samplesheet = inputdir.toString() + "/SampleSheet.csv"
nf_uniqueness = uniqueness_format.format(new Date())
outdir_prefix = outdir_prefix ?: project + "_" + run_name
outdir = inputdir + "/nextflow/" + run_name + "/" + outdir_prefix + "_" + nf_uniqueness
params.samplesheet = params.inputdir ? params.inputdir+ "/SampleSheet.csv" : ''
params {
subset_seq = miseq_subset_seq
if ( sequencer =~ /NovaSeq.*/ || sequencer =~ /AVITI.*/ ) {
if ( n_samples >= large_sampling_threshold ) {
......@@ -133,7 +135,7 @@ params {
}
subset_seq = nova_subset_seq
}
if ( DTM_mode ) {
if ( params.DTM_mode == true) {
subset_seq = "1000000000"
}
}
......
......@@ -2,29 +2,9 @@
nextflow.enable.dsl = 2
// Import custom functions
include { helpMessage;
createSummary;
sendBeginMail;
sendFinalMail;
softwareVersionsToYAML
} from "$baseDir/conf/functions.config"
// Show help message
if (params.help) {
helpMessage()
exit 0
}
// Print every non-void parameters
System.out.println "\nAffichage de tous les paramètres non vides :"
params.each{entry ->
if (entry.value != "") {
println "$entry.key:\t $entry.value"
}
}
System.out.println "\n"
include { getSummary;
endOfPipelineEvents } from "${baseDir}/lib/pipeline.groovy"
include { softwareVersionsToYAML } from "${params.shared_modules}/lib/utils.groovy"
// -------------------------------------------------
// CHANNELS
// -------------------------------------------------
......@@ -86,7 +66,8 @@ mismatchNumber = params.sequencer == 'MiSeq'? 0 : 1
//banksForConta = params.addBankForConta ? params.genomesRefForConta << params.addBankForConta : params.genomesRefForConta
createDir = file(params.outdir).mkdir()
params.summary = getSummary()
params.summary.collect{k,v -> println "$k : $v"}
// -------------------------------------------------
// INCLUDES
// -------------------------------------------------
......@@ -107,14 +88,6 @@ include { MULTIQC } from "${params.shared_modules}/multiqc.nf"
include { GCBIAS as GC_BIAS } from "${params.shared_modules}/gcbias.nf"
include { workflow_summary as WORKFLOW_SUMMARY } from "${params.shared_modules}/workflow_summary.nf"
// -------------------------------------------------
// EMAIL ON START
// -------------------------------------------------
import java.text.SimpleDateFormat
SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy HH:mm:ss")
sendBeginMail(format.format(new Date()))
// -------------------------------------------------
// WORKFLOW
// -------------------------------------------------
......@@ -231,19 +204,4 @@ workflow SHORT_READS_QC {
}
}
// -------------------------------------------------
// EMAIL ON COMPLETE
// -------------------------------------------------
def end_mail_sent = false
workflow.onComplete {
end_mail_sent = sendFinalMail(format.format(new Date()), params.summary)
// remove work directory if pipeline is successful
if (workflow.success && !( params.is_dev_mode || params.DTM_mode)) {
println "Pipeline terminé avec succès => suppression du workdir : $workflow.workDir"
exec:
workflow.workDir.deleteDir()
}
}
workflow.onError { }
\ No newline at end of file
endOfPipelineEvents(params.summary)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment