无法从输出文件确定输入文件中的Snakemake通配符

import pandas as pd #define configfile with paths etc. configfile: "config.yaml" #read-in dataframe and define Sample and Path SAMPLES = pd.read_table(config["sample_file"]) BAMFILE = SAMPLES["Sample"] PATH = SAMPLES["Path"] rule all: input: expand("{path}{sample}.summary.txt", zip, path=PATH, sample=BAMFILE) #this works like a charm as long as I give the zip-function in the rules 'all' and 'summary': rule indexBam: input: "{path}{sample}.bam" output: "{path}{sample}.bam.bai" shell: "samtools index {input}" #this following command works as long as I give the specific folder for a sample instead of {path}. rule bamdiagnostics: input: bam="{path}{sample}.bam", bai=expand("{path}{sample}.bam.bai", zip, path=PATH, sample=BAMFILE) params: prefix="analysis/BAMDiagnostics/{sample}" output: "analysis/BAMDiagnostics/{sample}_approximateDepth.txt", "analysis/BAMDiagnostics/{sample}_fragmentStats.txt", "analysis/BAMDiagnostics/{sample}_MQ.txt", "analysis/BAMDiagnostics/{sample}_readLength.txt", "analysis/BAMDiagnostics/{sample}_BamDiagnostics.log" message: "running BamDiagnostics...{wildcards.sample}" shell: "{config[atlas]} task=BAMDiagnostics bam={input.bam} out={params.prefix} logFile={params.prefix}_BamDiagnostics.log verbose" rule summary: input: index=expand("{path}{sample}.bam.bai", zip, path=PATH, sample=BAMFILE), bamd=expand("analysis/BAMDiagnostics/{sample}_approximateDepth.txt", sample=BAMFILE) output: "{path}{sample}.summary.txt" shell: "echo -e '{input.index} {input.bamd}"

1条回答

网友

1楼 · 发布于 2024-06-02 08:30:55

根据atlas文档，您似乎需要为每个样本分别运行每个规则，这里的复杂之处在于每个样本都在不同的路径中。在

我修改了您的脚本以适用于上述情况（请参见DAG）。脚本开头的变量被修改以使其更有意义。config被删除用于演示目的，并且使用了pathlib库（而不是{}）。pathlib不是必要的，但它帮助我保持理智。修改了shell命令以避免config。在

import pandas as pd
from pathlib import Path

df = pd.read_csv('sample.tsv', sep='\t', index_col='Sample')
SAMPLES = df.index
BAM_PATH = df["Path"]
# print (BAM_PATH['sample1'])

rule all:
    input:
        expand("{path}{sample}.summary.txt", zip, path=BAM_PATH, sample=SAMPLES)


rule indexBam:
    input:
        str( Path("{path}") / "{sample}.bam")
    output:
        str( Path("{path}") / "{sample}.bam.bai")
    shell:
        "samtools index {input}"

#this following command works as long as I give the specific folder for a sample instead of {path}.
rule bamdiagnostics:
    input:
        bam = lambda wildcards: str( Path(BAM_PATH[wildcards.sample]) / f"{wildcards.sample}.bam"),
        bai = lambda wildcards: str( Path(BAM_PATH[wildcards.sample]) / f"{wildcards.sample}.bam.bai"),
    params:
        prefix="analysis/BAMDiagnostics/{sample}"
    output:
        "analysis/BAMDiagnostics/{sample}_approximateDepth.txt",
        "analysis/BAMDiagnostics/{sample}_fragmentStats.txt",
        "analysis/BAMDiagnostics/{sample}_MQ.txt",
        "analysis/BAMDiagnostics/{sample}_readLength.txt",
        "analysis/BAMDiagnostics/{sample}_BamDiagnostics.log"
    message:
        "running BamDiagnostics...{wildcards.sample}"
    shell:
        ".atlas task=BAMDiagnostics bam={input.bam} out={params.prefix} logFile={params.prefix}_BamDiagnostics.log verbose"

rule summary:
    input:
        bamd = "analysis/BAMDiagnostics/{sample}_approximateDepth.txt",
        index = lambda wildcards: str( Path(BAM_PATH[wildcards.sample]) / f"{wildcards.sample}.bam.bai"),
    output:
        str( Path("{path}") / "{sample}.summary.txt")
    shell:
        "echo -e '{input.index} {input.bamd}"

相关问题更多 >

编程相关推荐

热门问题

热门文章