Skip to content

Commit 497180e

Browse files
committed
feat(rule): add GATK's HaplotypeCaller and filtering
1 parent c3bd9bf commit 497180e

File tree

5 files changed

+63
-6
lines changed

5 files changed

+63
-6
lines changed

Diff for: .test/config/config.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,7 @@ reference:
77

88
known_sites:
99
folder: ".test/data"
10-
filename: "placeholder.vcf"
10+
filename: "placeholder.vcf"
11+
12+
filtering:
13+
params: ""

Diff for: config/config.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,7 @@ reference:
77

88
known_sites:
99
folder: "/home/federico/Desktop/RNA_SNPs_calling/data/reference"
10-
filename: "resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf"
10+
filename: "resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf"
11+
12+
filtering:
13+
params: "-i 'QUAL > 30 && INFO/DP > 10'"

Diff for: workflow/Snakefile

+11-4
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ import os
55
configfile: "config/config.yml"
66

77

8-
# Access the data folder from the config
8+
# Define data and reference files
99
data_folder = config["data"]["folder"]
1010
reference_folder = config["reference"]["folder"]
1111
genome_name = config["reference"]["genome"]
1212
reference = os.path.join(reference_folder, genome_name)
1313
reference_idx = f"{reference}.fai"
1414
reference_dict = f"{reference}.dict"
15+
# Used only in FreeBayes
1516
known_sites_folder = config["known_sites"]["folder"]
1617
known_filename = config["known_sites"]["filename"]
1718
known_sites = os.path.join(known_sites_folder, known_filename)
@@ -20,19 +21,23 @@ known_sites_idx = f"{known_sites}.idx"
2021
sample_files = glob.glob(os.path.join(data_folder, "*.bam"))
2122
samples = [os.path.basename(f).replace(".bam", "") for f in sample_files]
2223

23-
recalibrated_files = [f"results/recal/{sample}.bam" for sample in samples]
24-
2524
first_summaries = [
2625
f"results/stats/{sample}_before_recal.summary.txt" for sample in samples
2726
]
2827
second_summaries = [
2928
f"results/stats/{sample}_after_recal.summary.txt" for sample in samples
3029
]
3130

31+
calls = [f"results/calls/{sample}.vcf" for sample in samples]
32+
alns = [f"results/recal/{sample}.bam" for sample in samples]
33+
idxs = [f"results/recal/{sample}.bai" for sample in samples]
34+
vcfs = [f"results/calls_gatk/{sample}.vcf" for sample in samples]
35+
haplo_calls = "results/calls/calls_gatk.vcf"
36+
3237

3338
rule all:
3439
input:
35-
recalibrated_files,
40+
"results/calls/filtered_calls.vcf",
3641
first_summaries,
3742
second_summaries,
3843

@@ -43,3 +48,5 @@ include: "rules/index_genome.smk"
4348
include: "rules/split_n_cigar_reads.smk"
4449
include: "rules/recalibration.smk"
4550
include: "rules/alignment_summary.smk"
51+
include: "rules/gatk_haplocaller.smk"
52+
include: "rules/filter.smk"

Diff for: workflow/rules/filter.smk

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
rule bcf_filter_o_vcf:
2+
input:
3+
haplo_calls,
4+
output:
5+
"results/calls/filtered_calls.vcf",
6+
log:
7+
"logs/filter/filter.vcf.log",
8+
params:
9+
filter=config["filtering"]["params"],
10+
extra="",
11+
wrapper:
12+
"v3.12.1/bio/bcftools/filter"

Diff for: workflow/rules/gatk_haplocaller.smk

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
rule haplotype_caller:
2+
input:
3+
# single or list of bam files
4+
bam="results/recal/{sample}.bam",
5+
ref=reference,
6+
output:
7+
vcf="results/calls_gatk/{sample}.vcf",
8+
log:
9+
"logs/gatk/haplotypecaller/{sample}.log",
10+
params:
11+
extra="", # optional
12+
java_opts="", # optional
13+
threads: 4
14+
resources:
15+
mem_mb=1024,
16+
wrapper:
17+
"v3.12.1/bio/gatk/haplotypecaller"
18+
19+
20+
rule merge_vcfs:
21+
input:
22+
vcfs=vcfs,
23+
output:
24+
"results/calls/calls_gatk.vcf",
25+
log:
26+
"logs/picard/mergevcfs.log",
27+
params:
28+
extra="",
29+
resources:
30+
mem_mb=1024,
31+
wrapper:
32+
"v3.12.1/bio/picard/mergevcfs"

0 commit comments

Comments
 (0)