From 42885df2252970ae147eb88be536490b1d890e21 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 27 May 2024 13:41:56 +0200
Subject: [PATCH 01/19] Added RagTag param config

---
 Snakefile                   | 4 +++-
 config.yaml                 | 3 +++
 scripts/ragtagChromInfer.sh | 4 +++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 81cf643..18daaa9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -101,7 +101,8 @@ rule ragtag_scaffolding:
     threads: 8
     retries: 1
     params:
-        apppath=config["app.path"]
+        apppath=config["app.path"],
+        rtcmd=config["rt.cmd"]
     shell:
         """
         bash scripts/ragtagChromInfer.sh \
@@ -110,6 +111,7 @@ rule ragtag_scaffolding:
             -t {threads} \
             -r {input.ref} \
             -q {input.fa} \
+            -c {params.rtcmd} \
             -o {output}
 
         if [ ! -s {output} ]; then
diff --git a/config.yaml b/config.yaml
index 7780f37..917fc2c 100644
--- a/config.yaml
+++ b/config.yaml
@@ -7,6 +7,9 @@ reference: 'CHM13.hap1.fa.gz'
 app.path: '<path>'
 
 # Core parameters (WIP)
+# RagTag parameters
+rt.cmd: '-s 1'
+
 # Wfmash alignement parameters : leave to None if unknown
 wfmash.segment_length: 5000
 wfmash.mapping_id: 90
diff --git a/scripts/ragtagChromInfer.sh b/scripts/ragtagChromInfer.sh
index c68f0d1..fe8383f 100755
--- a/scripts/ragtagChromInfer.sh
+++ b/scripts/ragtagChromInfer.sh
@@ -10,13 +10,14 @@ inputquery=""   # Query fasta
 output=""       # Output fasta
 
 ## Getting arguments
-while getopts "d:a:t:r:q:o:" option; do
+while getopts "d:a:t:r:q:c:o:" option; do
     case "$option" in
         d) tmpdir="$OPTARG";;
         a) appdir="$OPTARG";;
         t) threads="$OPTARG";;
         r) inputref="$OPTARG";;
         q) inputquery="$OPTARG";;
+        c) rtcommand="$OPTARG";;
         o) output="$OPTARG";;
         \?) echo "Usage: $0 [-d tmpdir] [-a apptainer dir] [-t threads] [-r inputref] [-q inputquery] [-o output fasta] [-n pangenome name]" >&2
             exit 1;;
@@ -33,6 +34,7 @@ mkdir -p $tmpdir
 
 # Running ragtag scaffold
 apptainer run $appdir/pan1c-env.sif ragtag.py scaffold \
+    $rtcommand \
     -t $threads -o $tmpdir $inputref $inputquery
 
 # Renaming sequence according to naming scheme
-- 
GitLab


From 46c330885ba2f08094668eb503ef25e1a0fe84a8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 27 May 2024 14:18:02 +0200
Subject: [PATCH 02/19] Update ragtagChromInfer.sh

---
 scripts/ragtagChromInfer.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/ragtagChromInfer.sh b/scripts/ragtagChromInfer.sh
index fe8383f..c52476d 100755
--- a/scripts/ragtagChromInfer.sh
+++ b/scripts/ragtagChromInfer.sh
@@ -34,8 +34,7 @@ mkdir -p $tmpdir
 
 # Running ragtag scaffold
 apptainer run $appdir/pan1c-env.sif ragtag.py scaffold \
-    $rtcommand \
-    -t $threads -o $tmpdir $inputref $inputquery
+    "$rtcommand" -t $threads -o $tmpdir $inputref $inputquery
 
 # Renaming sequence according to naming scheme
 grep ">${ref}#chr*" -A1 $tmpdir/ragtag.scaffold.fasta | \
-- 
GitLab


From 4522bd9b43b38b0abc3895b94b2de573cf853fc9 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Mon, 27 May 2024 14:22:23 +0200
Subject: [PATCH 03/19] Update Snakefile

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 18daaa9..14ce9eb 100644
--- a/Snakefile
+++ b/Snakefile
@@ -111,7 +111,7 @@ rule ragtag_scaffolding:
             -t {threads} \
             -r {input.ref} \
             -q {input.fa} \
-            -c {params.rtcmd} \
+            -c "{params.rtcmd}" \
             -o {output}
 
         if [ ! -s {output} ]; then
-- 
GitLab


From aaefc32389ea50c9febba0bf57f13b92b6829ef9 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 4 Jun 2024 10:20:44 +0200
Subject: [PATCH 04/19] Update config.yaml

---
 config.yaml | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/config.yaml b/config.yaml
index 917fc2c..4ddca16 100644
--- a/config.yaml
+++ b/config.yaml
@@ -6,11 +6,8 @@ reference: 'CHM13.hap1.fa.gz'
 # Directory of apptainer images (downloaded with getApps.sh)
 app.path: '<path>'
 
-# Core parameters (WIP)
-# RagTag parameters
-rt.cmd: '-s 1'
-
-# Wfmash alignement parameters : leave to None if unknown
+# Core parameters
+# Wfmash alignement parameters :
 wfmash.segment_length: 5000
 wfmash.mapping_id: 90
 wfmash.secondary: '-k 19 -H 0.001 -X'
@@ -31,6 +28,6 @@ get_PAV: 'False'
 get_allASM_SyRI: 'False' # All vs all
 get_ASMs_SyRI: 'False' # Haplotype vs Reference
 
-# Debug options
+# Debug options (Useless at the moment)
 debug: 'False'
 
-- 
GitLab


From 0f755fc75d0dd6732b83b115b53e0bb2cc208320 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 4 Jun 2024 10:21:03 +0200
Subject: [PATCH 05/19] Moved app path to variable for simplicity

---
 runSnakemake.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/runSnakemake.sh b/runSnakemake.sh
index 4206dc0..37b8d03 100755
--- a/runSnakemake.sh
+++ b/runSnakemake.sh
@@ -9,8 +9,10 @@
 module purge
 module load containers/Apptainer/1.2.5
 
+apppath=<path_to_pan1c-box>
+
 # Creating DAG
-apptainer run <path_to_pan1c-box>/pan1c-box.sif snakemake -c $(nproc) --dag | dot -Tsvg > workflow.svg
+apptainer run $apppath/pan1c-box.sif snakemake -c $(nproc) --dag | dot -Tsvg > workflow.svg
 # Running the workflow
-/usr/bin/time -v -o whole.run.time.log apptainer run <path_to_pan1c-box>/pan1c-box.sif snakemake -c $(nproc)
+/usr/bin/time -v -o whole.run.time.log apptainer run $apppath/pan1c-box.sif snakemake -c $(nproc)
 
-- 
GitLab


From a445f3678951ffb27f3561b3b579f593aef1a7ef Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 4 Jun 2024 10:22:26 +0200
Subject: [PATCH 06/19] Changed some output names

Added pangenome name to pggb inputs, chromosome graphs, ...
---
 Snakefile | 64 +++++++++++++++++++++++++++----------------------------
 1 file changed, 31 insertions(+), 33 deletions(-)

diff --git a/Snakefile b/Snakefile
index 14ce9eb..bed2046 100644
--- a/Snakefile
+++ b/Snakefile
@@ -36,10 +36,10 @@ with gzip.open("data/haplotypes/"+config['reference'], "r") as handle:
 def which_analysis():
     # Creating a list with default analysis steps (to prevent the function from returning an empty list)
     analysis_inputs = [     
-        "output/stats/pan1c.pggb."+config['name']+".core.stats.tsv", # core stats
+        "output/stats/pan1c."+config['name']+".core.stats.tsv", # core stats
         expand("output/panacus.reports/{chromosome}.histgrowth.html", chromosome=CHRLIST), # panacus histgrowth 
         expand("output/chrGraphs.figs/{chromosome}.1Dviz.png", chromosome=CHRLIST), # visualizations from odgi on chromosome graphs
-        "output/stats/pan1c.pggb."+config['name']+".chrGraph.general.stats.tsv" # chromosomes graph statistics
+        "output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv" # chromosomes graph statistics
     ]
     
     # Optionals analysis steps
@@ -61,8 +61,8 @@ Rules
 # Main target rule
 rule all:
     input:
-        "output/pan1c.pggb."+config['name']+".gfa", # Final graph (main output)
-        "output/pan1c.pggb."+config['name']+".gfa.metadata", # Metadata for the final (also in top of gfa files as # line)
+        "output/pan1c."+config['name']+".gfa", # Final graph (main output)
+        "output/pan1c."+config['name']+".gfa.metadata", # Metadata for the final (also in top of gfa files as # line)
         which_analysis()
 
 """
@@ -101,8 +101,7 @@ rule ragtag_scaffolding:
     threads: 8
     retries: 1
     params:
-        apppath=config["app.path"],
-        rtcmd=config["rt.cmd"]
+        apppath=config["app.path"]
     shell:
         """
         bash scripts/ragtagChromInfer.sh \
@@ -111,7 +110,6 @@ rule ragtag_scaffolding:
             -t {threads} \
             -r {input.ref} \
             -q {input.fa} \
-            -c "{params.rtcmd}" \
             -o {output}
 
         if [ ! -s {output} ]; then
@@ -125,7 +123,7 @@ rule clustering:
     input:
         expand('data/hap.ragtagged/{haplotype}.ragtagged.fa.gz', haplotype=SAMPLES)
     output:
-        expand('data/chrInputs/{chromosome}.fa.gz', chromosome=CHRLIST)
+        expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
     threads: workflow.cores
     params:
         apppath=config["app.path"]
@@ -196,7 +194,7 @@ Core section : Running PGGB
 """
 rule create_pggb_input_syri_fig:
     input:
-        fasta='data/chrInputs/{chromosome}.fa.gz'
+        fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
     output:
         fig="output/chrInput.syri.figs/{chromosome}."+config['name']+".asm.syri.png",
         wrkdir=directory('data/chrInput/syri/{chromosome}')
@@ -235,10 +233,10 @@ rule create_pggb_input_syri_fig:
 # rule pggb_on_chr:
 #     # Run pggb on a specific chromosome
 #     input:
-#         fa="data/chrInputs/{chromosome}.fa.gz",
+#         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
 #         gzi="data/chrInputs/{chromosome}.fa.gz.gzi"
 #     output:
-#         gfa="data/chrGraphs/{chromosome}.gfa"
+#         gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
 #     threads: 16
 #     params: 
 #         pggb=config['pggb.params'],
@@ -260,8 +258,8 @@ rule create_pggb_input_syri_fig:
 rule wfmash_on_chr:
     # Run wfmash on a specific chromosome input
     input:
-        fa="data/chrInputs/{chromosome}.fa.gz",
-        fai="data/chrInputs/{chromosome}.fa.gz.fai"
+        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
+        fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
     output:
         mapping="data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf",
         aln="data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf"
@@ -303,7 +301,7 @@ rule wfmash_on_chr:
 rule seqwish:
     # Run seqwish on alignement produced by wfmash
     input:
-        fa="data/chrInputs/{chromosome}.fa.gz",
+        fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
         aln=rules.wfmash_on_chr.output.aln
     output:
         "data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa"
@@ -349,7 +347,7 @@ rule odgi_postprocessing:
     input:
         rules.gfaffix_on_chr.output.gfa
     output:
-        gfa="data/chrGraphs/{chromosome}.gfa"
+        gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     threads: 8
     params:
         apppath=config['app.path']
@@ -398,7 +396,7 @@ rule odgi_postprocessing:
 rule generate_graph_list:
     # Generate a text file containing all created graphs
     input:
-        expand("data/chrGraphs/{chromosome}.gfa", chromosome=CHRLIST)
+        expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
     output:
         "data/chrGraphs/graphsList.txt"
     threads: 1
@@ -412,7 +410,7 @@ rule graph_squeeze:
     input:
         "data/chrGraphs/graphsList.txt"
     output:
-        "output/pan1c.pggb."+config['name']+".gfa"
+        "output/pan1c."+config['name']+".gfa"
     threads: 16
     params:
         apppath=config['app.path']
@@ -428,10 +426,10 @@ rule graph_squeeze:
 rule graph_stats:
     # Using GFAstats to produce stats on every chromosome graphs
     input:
-        graph="data/chrGraphs/{chromosome}.gfa"
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     output:
-        genstats="output/stats/chrGraphs/{chromosome}.general.stats.tsv",
-        pathstats="output/stats/chrGraphs/{chromosome}.path.stats.tsv"
+        genstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv",
+        pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
     threads: 4
     params:
         apppath=config['app.path']
@@ -444,7 +442,7 @@ rule graph_stats:
 rule graph_figs:
     # Creating figures using odgi viz 
     input:
-        graph="data/chrGraphs/{chromosome}.gfa"
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     output:
         oneDviz="output/chrGraphs.figs/{chromosome}.1Dviz.png",
         pcov="output/chrGraphs.figs/{chromosome}.pcov.png"
@@ -467,10 +465,10 @@ rule graph_figs:
 rule aggregate_graphs_stats:
     # Reading and merging all stats files from chromosome graphs into a .tsv.
     input:
-        genstats=expand("output/stats/chrGraphs/{chromosome}.general.stats.tsv", chromosome=CHRLIST)
+        genstats=expand("output/stats/chrGraphs/"+config['name']+".{chromosome}.general.stats.tsv", chromosome=CHRLIST)
     output:
-        genstats="output/stats/pan1c.pggb."+config['name']+".chrGraph.general.stats.tsv",
-        pathstats="output/stats/pan1c.pggb."+config['name']+".chrGraph.path.stats.tsv"
+        genstats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv",
+        pathstats="output/stats/pan1c."+config['name']+".chrGraph.path.stats.tsv"
     params:
         apppath=config['app.path'],
         panname=config['name']
@@ -484,9 +482,9 @@ rule aggregate_graphs_stats:
 rule final_graph_tagging:
     # Add metadata to the final GFA
     input:
-        graph="output/pan1c.pggb."+config['name']+".gfa",
+        graph="output/pan1c."+config['name']+".gfa",
     output:
-        "output/pan1c.pggb."+config['name']+".gfa.metadata"
+        "output/pan1c."+config['name']+".gfa.metadata"
     threads: 1
     params:
         apppath=config['app.path'],
@@ -501,9 +499,9 @@ rule final_graph_tagging:
 rule pggb_input_stats:
     # Produces statistics on pggb input sequences
     input:
-        flag="output/stats/pan1c.pggb."+config['name']+".chrGraph.general.stats.tsv"
+        flag="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
     output:
-        "output/stats/pan1c.pggb."+config['name']+".chrInput.stats.tsv"
+        "output/stats/pan1c."+config['name']+".chrInput.stats.tsv"
     params:
         apppath=config['app.path'],
         panname=config['name']
@@ -516,10 +514,10 @@ rule pggb_input_stats:
 rule core_statistics:
     # Aggregate chrInput, chrGraph and pggb statistics into a single tsv 
     input:
-        chrInputStats="output/stats/pan1c.pggb."+config['name']+".chrInput.stats.tsv",
-        chrGraphStats="output/stats/pan1c.pggb."+config['name']+".chrGraph.general.stats.tsv"
+        chrInputStats="output/stats/pan1c."+config['name']+".chrInput.stats.tsv",
+        chrGraphStats="output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv"
     output:
-        tsv="output/stats/pan1c.pggb."+config['name']+".core.stats.tsv",
+        tsv="output/stats/pan1c."+config['name']+".core.stats.tsv",
         dir=directory("output/pggb.usage.figs")
     params:
         apppath=config['app.path'],
@@ -561,9 +559,9 @@ rule get_pav:
 rule panacus_stats:
     # Produces panacus reports for a chromosome graph
     input:
-        graph="data/chrGraphs/{chromosome}.gfa"
+        graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     output:
-        html="output/panacus.reports/{chromosome}.histgrowth.html"
+        html='output/panacus.reports/'+config['name']+'.{chromosome}.histgrowth.html'
     params:
         apppath=config['app.path'],
         panname=config['name'],
-- 
GitLab


From fe9fe10ea5fe3c189b63ba1aedbbe9ec81b7b0a5 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 4 Jun 2024 10:27:07 +0200
Subject: [PATCH 07/19] Update Snakefile

---
 Snakefile | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index bed2046..9b2d349 100644
--- a/Snakefile
+++ b/Snakefile
@@ -37,8 +37,8 @@ def which_analysis():
     # Creating a list with default analysis steps (to prevent the function from returning an empty list)
     analysis_inputs = [     
         "output/stats/pan1c."+config['name']+".core.stats.tsv", # core stats
-        expand("output/panacus.reports/{chromosome}.histgrowth.html", chromosome=CHRLIST), # panacus histgrowth 
-        expand("output/chrGraphs.figs/{chromosome}.1Dviz.png", chromosome=CHRLIST), # visualizations from odgi on chromosome graphs
+        expand("output/panacus.reports/"+config['name']+".{chromosome}.histgrowth.html", chromosome=CHRLIST), # panacus histgrowth 
+        expand("output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png", chromosome=CHRLIST), # visualizations from odgi on chromosome graphs
         "output/stats/pan1c."+config['name']+".chrGraph.general.stats.tsv" # chromosomes graph statistics
     ]
     
@@ -196,7 +196,7 @@ rule create_pggb_input_syri_fig:
     input:
         fasta='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz'
     output:
-        fig="output/chrInput.syri.figs/{chromosome}."+config['name']+".asm.syri.png",
+        fig="output/chrInput.syri.figs/"+config['name']+".{chromosome}.asm.syri.png",
         wrkdir=directory('data/chrInput/syri/{chromosome}')
     threads: 8
     params:
@@ -444,8 +444,8 @@ rule graph_figs:
     input:
         graph='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
     output:
-        oneDviz="output/chrGraphs.figs/{chromosome}.1Dviz.png",
-        pcov="output/chrGraphs.figs/{chromosome}.pcov.png"
+        oneDviz="output/chrGraphs.figs/"+config['name']+".{chromosome}.1Dviz.png",
+        pcov="output/chrGraphs.figs/"+config['name']+".{chromosome}.pcov.png"
     threads: 4
     params:
         apppath=config['app.path'],
-- 
GitLab


From 706e8d6e9433bc4c71ccf8c19d6e4851659d96ef Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 4 Jun 2024 18:07:41 +0200
Subject: [PATCH 08/19] Update Snakefile

Compressing temporary files in order to save space
---
 Snakefile | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/Snakefile b/Snakefile
index 9b2d349..9c61c97 100644
--- a/Snakefile
+++ b/Snakefile
@@ -261,8 +261,8 @@ rule wfmash_on_chr:
         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
         fai='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz.fai'
     output:
-        mapping="data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf",
-        aln="data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf"
+        mapping=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.mapping.paf"),
+        aln=temp("data/chrGraphs/{chromosome}/{chromosome}.wfmash.aln.paf")
     threads: 16
     params:
         apppath=config['app.path'],
@@ -296,15 +296,22 @@ rule wfmash_on_chr:
             --invert-filtering \
             1> {output.aln} \
             2> >(tee {log.cmd_aln} >&2)
+
+        # Compressing
+        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+            -@ {threads} -k {output.mapping}
+
+        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+            -@ {threads} -k {output.aln}
         """
-    
+
 rule seqwish:
     # Run seqwish on alignement produced by wfmash
     input:
         fa='data/chrInputs/'+config['name']+'.{chromosome}.fa.gz',
         aln=rules.wfmash_on_chr.output.aln
     output:
-        "data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa"
+        temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfa")
     threads: 8
     params:
         apppath=config['app.path'],
@@ -320,6 +327,10 @@ rule seqwish:
             {params.seqwish} -t {threads} \
             --temp-dir $(dirname {output}) -P 2>&1 | \
             tee {log.cmd}
+
+        # Compressing
+        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+            -@ {threads} -k {output}
         """
 
 rule gfaffix_on_chr:
@@ -327,7 +338,7 @@ rule gfaffix_on_chr:
     input:
         rules.seqwish.output
     output:
-        gfa="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa",
+        gfa=temp("data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.gfa"),
         transform="data/chrGraphs/{chromosome}/{chromosome}.seqwish.gfaffixD.transform.txt"
     threads: 1
     params:
@@ -340,6 +351,10 @@ rule gfaffix_on_chr:
             apptainer exec {params.apppath}/pggb.sif gfaffix \
             {input} -o {output.gfa} -t {output.transform} \
             > /dev/null
+
+        # Compressing
+        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+            -@ {threads} -k {output.gfa}
         """
 
 rule odgi_postprocessing:
@@ -347,7 +362,7 @@ rule odgi_postprocessing:
     input:
         rules.gfaffix_on_chr.output.gfa
     output:
-        gfa='data/chrGraphs/'+config['name']+'.{chromosome}.gfa'
+        gfa=temp('data/chrGraphs/'+config['name']+'.{chromosome}.gfa')
     threads: 8
     params:
         apppath=config['app.path']
@@ -389,6 +404,13 @@ rule odgi_postprocessing:
             view -i $OGfile.unchoped.sorted.og -g \
             1> {output.gfa} \
             2> >(tee {log.cmd_view} >&2) 
+
+        ## Removing .og files for space savings
+        rm $(dirname {input})/*.og
+
+        # Compressing
+        apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
+            -@ {threads} -k {output.gfa}
         """
     
 ## ----------------------------------------
-- 
GitLab


From 07b86f0786299cfbf0b5953229d5ddd800e31272 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 09:35:20 +0200
Subject: [PATCH 09/19] Update ragtagChromInfer.sh

Compressing RagTag temporary folder
---
 scripts/ragtagChromInfer.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/ragtagChromInfer.sh b/scripts/ragtagChromInfer.sh
index c52476d..05e0d57 100755
--- a/scripts/ragtagChromInfer.sh
+++ b/scripts/ragtagChromInfer.sh
@@ -46,3 +46,8 @@ apptainer run --app bgzip $appdir/PanGeTools.sif \
 
 # Moving fa.gz to output dir
 mv $tmpdir/${sample}.ragtagged.fa.gz $output
+
+# Compressing temporary files
+tar --remove-files -cf $tmpdir.tar $tmpdir
+apptainer run --app bgzip $appdir/PanGeTools.sif \
+    -@ $threads $tmpdir.tar
-- 
GitLab


From b8ea708b3cdcbdbe5145774a0360560ea4144b74 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 09:35:58 +0200
Subject: [PATCH 10/19] Update Snakefile

Adding metadata to chromosome graphs
---
 Snakefile | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Snakefile b/Snakefile
index 9c61c97..ea108d3 100644
--- a/Snakefile
+++ b/Snakefile
@@ -408,6 +408,12 @@ rule odgi_postprocessing:
         ## Removing .og files for space savings
         rm $(dirname {input})/*.og
 
+        ## Adding metadata
+        python scripts/getTags.py \
+            --appdir {params.apppath} --config-file config.yaml \
+            > "$(dirname {input})/metadata.txt"
+        sed -i "/^H/r $(dirname {input})/metadata.txt" {output.gfa}
+
         # Compressing
         apptainer run --app bgzip {params.apppath}/PanGeTools.sif \
             -@ {threads} -k {output.gfa}
-- 
GitLab


From b2f712f58ec1cab4073d1de1ab0994cb1e488808 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 10:07:47 +0200
Subject: [PATCH 11/19] Update ragtagChromInfer.sh

---
 scripts/ragtagChromInfer.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ragtagChromInfer.sh b/scripts/ragtagChromInfer.sh
index 05e0d57..9afa538 100755
--- a/scripts/ragtagChromInfer.sh
+++ b/scripts/ragtagChromInfer.sh
@@ -34,7 +34,7 @@ mkdir -p $tmpdir
 
 # Running ragtag scaffold
 apptainer run $appdir/pan1c-env.sif ragtag.py scaffold \
-    "$rtcommand" -t $threads -o $tmpdir $inputref $inputquery
+    -t $threads -o $tmpdir $inputref $inputquery
 
 # Renaming sequence according to naming scheme
 grep ">${ref}#chr*" -A1 $tmpdir/ragtag.scaffold.fasta | \
-- 
GitLab


From cbe527a20fd40401fab36bcc483c44f08646a481 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 10:13:36 +0200
Subject: [PATCH 12/19] Updated inputClustering

Added pangenome name to outputed fasta
---
 Snakefile                  | 5 +++--
 scripts/inputClustering.py | 8 +++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index ea108d3..fe9b83c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -126,12 +126,13 @@ rule clustering:
         expand('data/chrInputs/'+config['name']+'.{chromosome}.fa.gz', chromosome=CHRLIST)
     threads: workflow.cores
     params:
-        apppath=config["app.path"]
+        apppath=config["app.path"],
+        panname=config["name"]
     shell:
         """
         mkdir -p $(dirname {output[0]})
         apptainer run {params.apppath}/pan1c-env.sif python scripts/inputClustering.py \
-            --fasta {input} --output $(dirname {output[0]})
+            --fasta {input} --output $(dirname {output[0]}) --panname {params.panname}
         for file in $(dirname {output[0]})/*.fa; do
             apptainer run --app bgzip {params.apppath}/PanGeTools.sif -@ {threads} $file
         done
diff --git a/scripts/inputClustering.py b/scripts/inputClustering.py
index 6284eb4..49bdb13 100644
--- a/scripts/inputClustering.py
+++ b/scripts/inputClustering.py
@@ -33,6 +33,12 @@ arg_parser.add_argument(
     required = True,
     help = "Output directory"
     )
+arg_parser.add_argument(
+    "--panname",
+    dest = "panname",
+    required = True,
+    help = "Pangenome name"
+    )
 arg_parser.add_argument(
     "--debug",
     "-d",
@@ -88,6 +94,6 @@ if args.debug: print(chrSeq.keys())
 
 # Writing chromosome specific fasta file
 for chrName in chrSeq.keys():
-    with open(os.path.join(args.outdir, f"{chrName}.fa"), "w") as output_handle:
+    with open(os.path.join(args.outdir, f"{args.panname}.{chrName}.fa"), "w") as output_handle:
         fasta_out = FastaIO.FastaWriter(output_handle, wrap=None)
         fasta_out.write_file(chrSeq[chrName])  
\ No newline at end of file
-- 
GitLab


From 93eb55ed5677e5c9b5abc80b84a1a35c716c04fa Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 10:17:12 +0200
Subject: [PATCH 13/19] Update Snakefile

---
 Snakefile | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index fe9b83c..c809065 100644
--- a/Snakefile
+++ b/Snakefile
@@ -461,11 +461,14 @@ rule graph_stats:
         pathstats="output/stats/chrGraphs/"+config['name']+".{chromosome}.path.stats.tsv"
     threads: 4
     params:
-        apppath=config['app.path']
+        apppath=config['app.path'],
+        panname=config['name']
     shell:
         """
         apptainer run --app gfastats {params.apppath}/PanGeTools.sif \
-            -g {input.graph} -P -o $(dirname {output.genstats})/{wildcards.chromosome} -t {threads}
+            -g {input.graph} -P \
+            -o $(dirname {output.genstats})/{params.panname}.{wildcards.chromosome} \
+            -t {threads}
         """
 
 rule graph_figs:
-- 
GitLab


From 3f3c2a9f02afba3586f79f185854643f987ffe94 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 10:34:58 +0200
Subject: [PATCH 14/19] Updated Panacus part

---
 Snakefile               | 2 +-
 scripts/getPanacusHG.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index c809065..c22aa76 100644
--- a/Snakefile
+++ b/Snakefile
@@ -604,7 +604,7 @@ rule panacus_stats:
         bash scripts/getPanacusHG.sh \
             -g {input.graph} \
             -r $(basename {params.refname} .fa.gz) \
-            -d data/chrGraphs/$(basename {input.graph} .gfa) \
+            -d data/chrGraphs/{wildcards.chromosome} \
             -o {output.html} \
             -a {params.apppath} \
             -t {threads}
diff --git a/scripts/getPanacusHG.sh b/scripts/getPanacusHG.sh
index 41a2222..7f3ddb9 100755
--- a/scripts/getPanacusHG.sh
+++ b/scripts/getPanacusHG.sh
@@ -25,7 +25,7 @@ while getopts "g:r:a:t:d:o:" option; do
 done
 
 # Getting chromosome name
-chrname=$(basename ${gfa} .gfa)
+chrname=$(basename ${gfa} .gfa | cut -d'.' -f2)
 ref=$(echo $refname | sed 's/.hap/#/')
 
 # Getting paths in chromosome graph
-- 
GitLab


From 6e5e5af38f8c14ef4a938a04b506da76b9bae34b Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 10:50:26 +0200
Subject: [PATCH 15/19] Update Snakefile

---
 Snakefile | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index c22aa76..e0484d3 100644
--- a/Snakefile
+++ b/Snakefile
@@ -437,7 +437,8 @@ rule generate_graph_list:
 rule graph_squeeze:
     # Using odgi to merge every subgraphs into a final one
     input:
-        "data/chrGraphs/graphsList.txt"
+        glist="data/chrGraphs/graphsList.txt",
+        graphs=expand('data/chrGraphs/'+config['name']+'.{chromosome}.gfa', chromosome=CHRLIST)
     output:
         "output/pan1c."+config['name']+".gfa"
     threads: 16
@@ -446,7 +447,7 @@ rule graph_squeeze:
     shell:
         """
         apptainer run --app odgi {params.apppath}/PanGeTools.sif \
-            squeeze -t {threads} -O -P -f {input} -o {output}.og
+            squeeze -t {threads} -O -P -f {input.glist} -o {output}.og
         apptainer run --app odgi {params.apppath}/PanGeTools.sif \
             view -t {threads} -P -i {output}.og -g > {output}
         rm {output}.og
-- 
GitLab


From 525c0047d6dc1b7fa340d9c3079323bd99a7d23d Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 10:59:13 +0200
Subject: [PATCH 16/19] Update chrInputStats.py

---
 scripts/chrInputStats.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/chrInputStats.py b/scripts/chrInputStats.py
index e232f9f..8b3015b 100644
--- a/scripts/chrInputStats.py
+++ b/scripts/chrInputStats.py
@@ -59,7 +59,7 @@ seqDict = {}
 for filename in args.fastafiles:
     
     # Getting chromosome name from fasta filename
-    chrName = os.path.basename(filename).split(".fa.gz")[0]
+    chrName = os.path.basename(filename).split(".fa.gz")[0].split('.')[-1]
 
     # Reading bgzip fasta file and adding records to seqDict
     with gzip.open(filename, "rt") as handle:
-- 
GitLab


From 0e3f1f1d99718e759d35ea334971ce9b9898e7b3 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <amergez@miat-pret-5.toulouse.inrae.fr>
Date: Wed, 5 Jun 2024 11:12:15 +0200
Subject: [PATCH 17/19] Update chrStatsAggregation.py

---
 scripts/chrStatsAggregation.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/scripts/chrStatsAggregation.py b/scripts/chrStatsAggregation.py
index af9d122..be2cf95 100644
--- a/scripts/chrStatsAggregation.py
+++ b/scripts/chrStatsAggregation.py
@@ -54,6 +54,7 @@ for file in fileList:
 
     # Getting filename (i.e. chromosome)
     filename, extension = os.path.splitext(os.path.splitext(os.path.basename(file))[0])
+    chrname = filename.split('.')[-1]
 
     statType = filename.split('.')[-1] # Getting general or path stats
 
@@ -65,9 +66,16 @@ for file in fileList:
     if len(stats[statType]) != 0:
         filelines = filelines[1:]
 
+        # Removing chromosome name from each line
+        filelines = [
+            '\t'.join(line.split('\t')[1:])
+            for line in filelines
+        ]
+
         # Adding pangenome name as first column
         filelines = [
-            f"{args.panname}\t{line}" for line in filelines
+            f"{args.panname}\t{chrname}\t{line}" 
+            for line in filelines
         ]
     # Else, rework header to add pangenome name
     else : 
-- 
GitLab


From 06521322d8c5365763ea7e5228912e7f38602c9d Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Fri, 7 Jun 2024 14:36:57 +0200
Subject: [PATCH 18/19] Update chrStatsAggregation.py

---
 scripts/chrStatsAggregation.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/chrStatsAggregation.py b/scripts/chrStatsAggregation.py
index be2cf95..3ac2b10 100644
--- a/scripts/chrStatsAggregation.py
+++ b/scripts/chrStatsAggregation.py
@@ -54,7 +54,7 @@ for file in fileList:
 
     # Getting filename (i.e. chromosome)
     filename, extension = os.path.splitext(os.path.splitext(os.path.basename(file))[0])
-    chrname = filename.split('.')[-1]
+    chrname = filename.split('.')[-2]
 
     statType = filename.split('.')[-1] # Getting general or path stats
 
@@ -85,7 +85,12 @@ for file in fileList:
 
         # Adding pangenome name as first column
         filelines[1:] = [
-            f"{args.panname}\t{line}" for line in filelines[1:]
+            '\t'.join(line.split('\t')[1:])
+            for line in filelines[1:]
+        ]
+
+        filelines[1:] = [
+            f"{args.panname}\t{chrname}\t{line}" for line in filelines[1:]
         ]
 
     # Redirecting to the correct list
-- 
GitLab


From df61e64a991d0614b4e1951d07438918b36ef5d8 Mon Sep 17 00:00:00 2001
From: Alexis Mergez <alexis.mergez@inrae.fr>
Date: Tue, 11 Jun 2024 18:36:26 +0200
Subject: [PATCH 19/19] Small fixes

---
 Snakefile  | 2 +-
 getApps.sh | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index e0484d3..2968040 100644
--- a/Snakefile
+++ b/Snakefile
@@ -15,7 +15,7 @@ Main variables used in the workflow
 SAMPLES = np.unique([
     os.path.basename(f).split('.fa')[0] 
     for f in os.listdir("data/haplotypes/")
-    if re.search(r".fa", f)
+    if re.search(r"\.fa", f)
     ])
 
 # Retrieving the list of haplotypes excluding the reference
diff --git a/getApps.sh b/getApps.sh
index 76fed73..1ba6cc9 100755
--- a/getApps.sh
+++ b/getApps.sh
@@ -15,7 +15,7 @@ while getopts "a:" option; do
 done
 
 # Script
-apptainer build $appdir/PanGeTools.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangetools/pangetools:latest  
-apptainer build $appdir/pan1c-env.sif oras://registry.forgemia.inra.fr/alexis.mergez/pan1capps/pan1cenv:latest  
-apptainer build $appdir/pggb.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangratools/pggb:latest  
-apptainer build $appdir/pan1c-box.sif oras://registry.forgemia.inra.fr/alexis.mergez/pan1capps/pan1cbox:latest 
+apptainer pull $appdir/PanGeTools.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangetools/pangetools:latest  
+apptainer pull $appdir/pan1c-env.sif oras://registry.forgemia.inra.fr/alexis.mergez/pan1capps/pan1cenv:latest  
+apptainer pull $appdir/pggb.sif oras://registry.forgemia.inra.fr/alexis.mergez/pangratools/pggb:latest  
+apptainer pull $appdir/pan1c-box.sif oras://registry.forgemia.inra.fr/alexis.mergez/pan1capps/pan1cbox:latest 
-- 
GitLab