Skip to content

Commit 757940d

Browse files
authored
Merge pull request #10 from fcaretti/Add-rules
feat(rule): add VEP annotation (no wrapper)
2 parents c49124c + 2c72e6d commit 757940d

File tree

6 files changed

+81
-40
lines changed

6 files changed

+81
-40
lines changed

Diff for: .test/config/config.yml

+10-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,13 @@ known_sites:
1010
filename: "placeholder.vcf"
1111

1212
filtering:
13-
params: ""
13+
params: ""
14+
15+
vep:
16+
cache_dir: ""
17+
zip_name: "homo_sapiens_vep_112_GRCh38.tar.gz"
18+
url: "https://ftp.ensembl.org/pub/release-112/variation/indexed_vep_cache/homo_sapiens_vep_112_GRCh38.tar.gz"
19+
image: "docker://ensemblorg/ensembl-vep:release_112.0"
20+
filters: "--filter "
21+
impact_levels: [ "MODERATE", "HIGH"]
22+
species: "homo_sapiens"

Diff for: config/config.yml

+7-5
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ filtering:
1313
params: "ex: -i 'QUAL > 30 && INFO/DP > 10'"
1414

1515
vep:
16-
cache_dir: "cache_folder"
17-
plugins_dir: "plugins_folder"
18-
species: ""
19-
build: ""
20-
release: ""
16+
cache_dir: ""
17+
zip_name: "ex: homo_sapiens_vep_112_GRCh38.tar.gz"
18+
url: "ex: https://ftp.ensembl.org/pub/release-112/variation/indexed_vep_cache/homo_sapiens_vep_112_GRCh38.tar.gz"
19+
image: "ex: docker://ensemblorg/ensembl-vep:release_112.0"
20+
filters: "--filter "
21+
impact_levels: [ "MODERATE", "HIGH"]
22+
species: "ex: homo_sapiens"

Diff for: workflow/Snakefile

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ haplo_calls = "results/calls/calls_gatk.vcf"
3939

4040
rule all:
4141
input:
42-
"results/calls/filtered_calls.vcf",
42+
"results/calls/annotated_calls.vcf",
4343
first_summaries,
4444
second_summaries,
4545

@@ -52,3 +52,4 @@ include: "rules/recalibration.smk"
5252
include: "rules/alignment_summary.smk"
5353
include: "rules/gatk_haplocaller.smk"
5454
include: "rules/filter.smk"
55+
include: "rules/vep.smk"

Diff for: workflow/envs/curl.yml

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
name: wget
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- curl=8.8.0

Diff for: workflow/envs/unzip.yml

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
name: unzip
2+
channels:
3+
- bioconda
4+
- conda-forge
5+
- defaults
6+
dependencies:
7+
- htslib=1.19.1

Diff for: workflow/rules/vep.smk

+50-33
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,60 @@
1-
rule annotate_variants:
2-
input:
3-
calls="results/calls/calls_gatk.vcf", # .vcf, .vcf.gz or .bcf
4-
cache=config["vep"]["cache_dir"], # can be omitted if fasta and gff are specified
5-
plugins=config["vep"]["plugins_dir"],
6-
fasta=reference,
7-
fai=reference_idx,
1+
rule download_vep_cache:
82
output:
9-
calls="results/calls/annotated_calls.vcf", # .vcf, .vcf.gz or .bcf
10-
stats="results/calls/variants.html",
11-
params:
12-
# Pass a list of plugins to use, see https://www.ensembl.org/info/docs/tools/vep/script/vep_plugins.html
13-
# Plugin args can be added as well, e.g. via an entry "MyPlugin,1,FOO", see docs.
14-
plugins=["LoFtool"],
15-
extra="--everything", # optional: extra arguments
3+
expand(
4+
"{dir}/{zip_name}",
5+
dir=config["vep"]["cache_dir"],
6+
zip_name=config["vep"]["zip_name"],
7+
),
168
log:
17-
"logs/vep/annotate.log",
18-
threads: 4
19-
wrapper:
20-
"v3.12.1/bio/vep/annotate"
9+
log_file="logs/vep/download_vep_cache.log",
10+
params:
11+
cache_url=lambda wc: config["vep"]["url"],
12+
directory=config["vep"]["cache_dir"],
13+
conda:
14+
"../envs/curl.yml" # Updated to use a conda environment with curl
15+
shell:
16+
"""
17+
mkdir -p {params.directory}
18+
curl -L -o {output} {params.cache_url} >> {log.log_file} 2>&1
19+
"""
2120

2221

23-
rule get_vep_cache:
22+
rule unzip_vep_cache:
23+
input:
24+
tar_file=expand(
25+
"{dir}/{zip_name}",
26+
dir=config["vep"]["cache_dir"],
27+
zip_name=config["vep"]["zip_name"],
28+
),
2429
output:
25-
directory(config["vep"]["cache_dir"]),
26-
params:
27-
species=config["vep"]["species"],
28-
build=config["vep"]["build"],
29-
release=config["vep"]["release"],
30+
species_dir=directory("{cache_dir}/{species}".format(**config["vep"])),
3031
log:
31-
"logs/vep/cache.log",
32-
cache: "omit-software" # save space and time with between workflow caching (see docs)
33-
wrapper:
34-
"v3.12.1/bio/vep/cache"
32+
log_file="logs/vep/unzip_vep_cache.log",
33+
conda:
34+
"../envs/unzip.yml"
35+
shell:
36+
"""
37+
tar -xzvf {input.tar_file} >> {log.log_file} 2>&1
38+
"""
3539

3640

37-
rule download_vep_plugins:
41+
rule vep_annotation:
42+
input:
43+
vcf="results/calls/filtered_calls.vcf",
44+
dir="{cache_dir}/{species}".format(**config["vep"]),
3845
output:
39-
temp(directory(config["vep"]["plugins_dir"])),
46+
annotated_vcf="results/calls/annotated_calls.vcf",
4047
params:
41-
release=config["vep"]["release"],
42-
wrapper:
43-
"v3.12.1/bio/vep/plugins"
48+
cache_dir=lambda wc: config["vep"]["cache_dir"],
49+
species=lambda wc: config["vep"]["species"],
50+
container:
51+
config["vep"]["image"]
52+
resources:
53+
cores=4,
54+
log:
55+
log_file="logs/vep/vep_annotation.log",
56+
shell:
57+
"""
58+
vep --input_file {input.vcf} --output_file {output.annotated_vcf} --offline --vcf --species homo_sapiens \
59+
--cache --dir_cache {params.cache_dir} --force_overwrite --fork {resources.cores} > {log.log_file} 2>&1
60+
"""

0 commit comments

Comments
 (0)