Skip to content

Commit c49124c

Browse files
authored
Merge pull request #9 from fcaretti/Add-rules
feat(rule): GATK's HaplotypeCaller
2 parents 50cf5b7 + 61e1aa5 commit c49124c

File tree

4 files changed

+58
-8
lines changed

4 files changed

+58
-8
lines changed

Diff for: config/config.yml

+13-6
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
data:
2-
folder: "/home/federico/Desktop/RNA_SNPs_calling/data"
2+
folder: "data_folder"
33

44
reference:
5-
folder: "/home/federico/Desktop/RNA_SNPs_calling/data/reference"
6-
genome: "GRCh38.primary_assembly.genome.fa"
5+
folder: "reference_folder"
6+
genome: "genome.fa"
77

88
known_sites:
9-
folder: "/home/federico/Desktop/RNA_SNPs_calling/data/reference"
10-
filename: "resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf"
9+
folder: "known_sites_folder"
10+
filename: "ex: resources_broad_hg38_v0_Homo_sapiens_assembly38.dbsnp138.vcf"
1111

1212
filtering:
13-
params: "-i 'QUAL > 30 && INFO/DP > 10'"
13+
params: "ex: -i 'QUAL > 30 && INFO/DP > 10'"
14+
15+
vep:
16+
cache_dir: "cache_folder"
17+
plugins_dir: "plugins_folder"
18+
species: ""
19+
build: ""
20+
release: ""

Diff for: workflow/Snakefile

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ second_summaries = [
3131
calls = [f"results/calls/{sample}.vcf" for sample in samples]
3232
alns = [f"results/recal/{sample}.bam" for sample in samples]
3333
idxs = [f"results/recal/{sample}.bai" for sample in samples]
34+
vcfs = [f"results/calls_gatk/{sample}.vcf" for sample in samples]
3435
vcf_zips = [f"results/calls_gatk/{sample}.vcf.gz" for sample in samples]
3536
vcf_idxs = [f"results/calls_gatk/{sample}.vcf.csi" for sample in samples]
3637
haplo_calls = "results/calls/calls_gatk.vcf"

Diff for: workflow/rules/gatk_haplocaller.smk

+1-2
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,13 @@ rule haplotype_caller:
1717
"v3.12.1/bio/gatk/haplotypecaller"
1818

1919

20-
2120
rule bgzip:
2221
input:
2322
"results/calls_gatk/{sample}.vcf",
2423
output:
2524
temp("results/calls_gatk/{sample}.vcf.gz"),
2625
params:
27-
extra="", # optional
26+
extra="", # optional
2827
threads: 1
2928
log:
3029
"logs/bgzip/{sample}.log",

Diff for: workflow/rules/vep.smk

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
rule annotate_variants:
2+
input:
3+
calls="results/calls/calls_gatk.vcf", # .vcf, .vcf.gz or .bcf
4+
cache=config["vep"]["cache_dir"], # can be omitted if fasta and gff are specified
5+
plugins=config["vep"]["plugins_dir"],
6+
fasta=reference,
7+
fai=reference_idx,
8+
output:
9+
calls="results/calls/annotated_calls.vcf", # .vcf, .vcf.gz or .bcf
10+
stats="results/calls/variants.html",
11+
params:
12+
# Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
13+
# Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
14+
plugins=["LoFtool"],
15+
extra="--everything", # optional: extra arguments
16+
log:
17+
"logs/vep/annotate.log",
18+
threads: 4
19+
wrapper:
20+
"v3.12.1/bio/vep/annotate"
21+
22+
23+
rule get_vep_cache:
24+
output:
25+
directory(config["vep"]["cache_dir"]),
26+
params:
27+
species=config["vep"]["species"],
28+
build=config["vep"]["build"],
29+
release=config["vep"]["release"],
30+
log:
31+
"logs/vep/cache.log",
32+
cache: "omit-software" # save space and time with between workflow caching (see docs)
33+
wrapper:
34+
"v3.12.1/bio/vep/cache"
35+
36+
37+
rule download_vep_plugins:
38+
output:
39+
temp(directory(config["vep"]["plugins_dir"])),
40+
params:
41+
release=config["vep"]["release"],
42+
wrapper:
43+
"v3.12.1/bio/vep/plugins"

0 commit comments

Comments
 (0)