# Run this Snakefile with the --use-conda flag to ensure the correct applications are present.

CONDITIONS = config["conditions"]
KMERS      = ["19", "21", "25", "27"]

READS_DIR = os.path.abspath(config["reads_dir"])
workdir:
    config.get("results", "results")

rule all_max:
    input:
        expand("assem/{condition}_k{kmer}_max_contig.txt", condition = CONDITIONS,
                                                           kmer = KMERS )

rule cutadapt:
    output:
        read1 = "cutadapt/{sample}_1.fq",
        read2 = "cutadapt/{sample}_2.fq"
    input:
        read1 = READS_DIR + "/{sample}_1.fq",
        read2 = READS_DIR + "/{sample}_2.fq",
    log: "logs/cutadapt_{sample}.log"
    params:
        adapter = "AGATCGGAAGAGC"
    conda: "envs/assembly_conda_env.yaml"
    shell:
        """exec >{log} 2>&1
           cutadapt -a {params.adapter} -A {params.adapter} \
            -o {output.read1} -p {output.read2} \
               {input.read1}     {input.read2}
        """

rule concatenate:
    output:
        read1 = "concatenate/{condition}_1.fq",
        read2 = "concatenate/{condition}_2.fq"
    input:
        read1s = expand("cutadapt/{{condition}}_{rep}_1.fq", rep=config["replicates"]),
        read2s = expand("cutadapt/{{condition}}_{rep}_2.fq", rep=config["replicates"]),
    shell:
        """cat {input.read1s} > {output.read1}
           cat {input.read2s} > {output.read2}
        """

rule assemble:
    output: "assem/{sample}_k{kmer}_contigs.fa"
    input:
        read1 = "concatenate/{sample}_1.fq",
        read2 = "concatenate/{sample}_2.fq"
    log: "logs/assemble_{sample}_k{kmer}.log"
    params:
        tmpdir = "velvet_tmp_{sample}_{kmer}"
    conda: "envs/assembly_conda_env.yaml"
    shell:
        """exec >{log} 2>&1
            velveth {params.tmpdir} {wildcards.kmer} -shortPaired -fastq -separate {input.read1} {input.read2}
            velvetg {params.tmpdir}
            mv {params.tmpdir}/contigs.fa {output}
        """

rule max_contig:
    output: "assem/{assem}_max_contig.txt"
    input:  "assem/{assem}_contigs.fa"
    log:    "logs/{assem}_max_contig.log"
    conda: "envs/assembly_conda_env.yaml"
    shell:
        """exec >{log} 2>&1
           stats.sh {input} | grep 'Max contig length:' > {output}
        """

