3
3
import htsjdk .beta .io .bundle .*;
4
4
import htsjdk .beta .plugin .registry .HaploidReferenceResolver ;
5
5
import htsjdk .beta .plugin .variants .VariantsBundle ;
6
- import htsjdk .io .HtsPath ;
7
6
import htsjdk .samtools .util .FileExtensions ;
8
7
import org .apache .logging .log4j .LogManager ;
9
8
import org .apache .logging .log4j .Logger ;
23
22
/**
24
23
* Create a bundle (JSON) file for use with a GATK tool.
25
24
*
26
- * Since most bundles will contain a primary resource plus at least one secondary resource (typically an index),
25
+ * Since most bundles need to contain a primary resource plus at least one secondary resource (typically an index),
27
26
* the tool will attempt to infer standard secondary resources(s) for a given primary resource if no secondary resource
28
27
* is explicitly provided on the command line. Inferred secondary resources are automatically added to the resulting
29
28
* bundle. Secondary resource inference can be suppressed by using the --suppress-resource-resolution argument.
30
29
*
31
30
* Each resource in a bundle must have an associated content type tag. Content types for each resource are either
32
31
* specified on the command line via argument tags, or inferred by the tool. For the primary and secondary resources,
33
32
* when no content type argument tag is provided, the tool will attempt to infer the content type from the file
34
- * extension. However, the content type for "other" resources (resources that are nether primary nor secondary resources)
35
- * are NEVER inferred, and must always include a content type argument tag.
33
+ * extension. However, the content type for "other" resources (resources that are nether primary nor secondary
34
+ * resources) are NEVER inferred, and must always include a content type argument tag.
36
35
*
37
36
* Bundle output file names must end with the suffix ".json".
38
37
*
39
- * Common examples:
38
+ * In general, content types can be any string, but there are well known content types that must be used when creating
39
+ * bundles for tools that expect well known resources types, such as a VCF, a VCF index, a .fasta file, or a reference
40
+ * dictionary file. The common well known content types are:
41
+ *
42
+ * - "CT_VARIANT_CONTEXTS": a VCF file
43
+ * - "CT_VARIANTS_INDEX: VCF" index file
44
+ *
45
+ * - "CT_HAPLOID_REFERENCE": fasta reference file
46
+ * - "CT_HAPLOID_REFERENCE_INDEX": fasta index file
47
+ * - "CT_HAPLOID_REFERENCE_DICTIONARY": fasta dictionary file
48
+ *
49
+ * Common bundle creation examples:
40
50
*
41
51
* VCF Bundles:
42
52
*
43
- * 1) Create a resource bundle for a VCF. Let the tool determine the content types, and resolve the secondary resources
44
- * (which for vcfs is the companion index) automatically by finding a sibling index file. If the sibling file cannot
45
- * be found, an exception wil lbe thrown:
53
+ * 1) Create a resource bundle for a VCF from just the VCF, letting the tool resolve the secondary (index) resource by
54
+ * automatically finding the sibling index file, and letting the tool determine the content types. If the sibling index
55
+ * file cannot be found, an exception will be thrown. Resulting bundle contains the VCF and associated index.
56
+ *
57
+ * CreateBundle \
58
+ * --primary path/to/my.vcf \
59
+ * --output mybundle.json
60
+ *
61
+ * The exact same bundle could be created manually by specifying both the resources and the content types explicitly:
62
+ *
63
+ * CreateBundle \
64
+ * --primary:CT_VARIANT_CONTEXTS path/to/my.vcf \
65
+ * --secondary:CT_VARIANTS_INDEX path/to/my.vcf.idx \
66
+ * --output mybundle.json
67
+ *
68
+ * 2) Create a resource bundle for a VCF from just the VCF, but suppress automatic resolution of the secondary
69
+ * resources. Let the tool determine the content types. The resulting bundle will contain only the vcf resource:
70
+ *
71
+ * CreateBundle \
72
+ * --primary path/to/my.vcf \
73
+ * --suppress-resource-resolution \
74
+ * --output mybundle.json
75
+ *
76
+ * 3) Create a resource bundle for a VCF, but specify the VCF AND the secondary index resource explicitly (which
77
+ * suppresses automatic secondary resolution). This is useful when the VCF and index are not in the same directory.
78
+ * Let the tool determine the content types. The resulting bundle will contain the VCF and index resources:
79
+ *
80
+ * CreateBundle \
81
+ * --primary path/to/my.vcf \
82
+ * --secondary some/other/path/to/vcd.idx \
83
+ * --output mybundle.json
84
+ *
85
+ * 4) Create a resource bundle for a VCF, but specify the VCF AND the secondary index resource explicitly (this
86
+ * is useful when the VCF and index are not in the same directory), and specify the content types explicitly via
87
+ * command line argument tags. The resulting bundle will contain the VCF and index resources.
88
+ *
89
+ * CreateBundle \
90
+ * --primary:CT_VARIANT_CONTEXTS path/to/my.vcf \
91
+ * --secondary:CT_VARIANTS_INDEX some/other/path/to/vcd.idx \
92
+ * --output mybundle.json
93
+ *
94
+ * Reference bundles:
95
+ *
96
+ * 1) Create a resource bundle for a reference from just the .fasta, letting the tool resolve the secondary
97
+ * (index and dictionary) resource by automatically finding the sibling files, and determining the content types.
98
+ * If the sibling index file cannot be found, an exception will be thrown. The resulting bundle will contain the
99
+ * reference, index, and dictionary.
46
100
*
47
- * CreateBundle --primary path/to/my.vcf --output mybundle.json
101
+ * CreateBundle \
102
+ * --primary path/to/my.fasta \
103
+ * --output mybundle.json
48
104
*
49
- * 2) Create a resource bundle for a VCF. Let the tool determine the content types, but suppress resolution of the secondary
50
- * resources (which for vcfs is the companion index). The resulting bundle will contain only the vcf resource:
105
+ * 2) Create a resource bundle for a reference from just the .fasta, but suppress resolution of the secondary index and
106
+ * dictionary resources). Let the tool determine the content type. The resulting bundle will contain only the .fasta
107
+ * resource:
51
108
*
52
- * CreateBundle --primary path/to/my.vcf --output mybundle.json
109
+ * CreateBundle \
110
+ * --primary path/to/my.fasta \
111
+ * --suppress-resource-resolution \
112
+ * --output mybundle.json
53
113
*
54
- * 3) Create a resource bundle for a VCF. Let the tool determine the content type, but specify the secondary
55
- * index resource explicitly (which suppresses secondary resolution). The resulting bundle will contain the vcf
56
- * and index resources:
114
+ * 3) Create a resource bundle for a fasta, but specify the fasta AND the secondary index and dictionary resources
115
+ * explicitly (which suppresses automatic secondary resolution). Let the tool determine the content types. The
116
+ * resulting bundle will contain the fasta, index and dictionary resources:
57
117
*
58
- * CreateBundle --primary path/to/my.vcf --secondary some/other/path/to/vcd.idx --output mybundle.json
118
+ * CreateBundle \
119
+ * --primary path/to/my.fasta \
120
+ * --secondary some/other/path/to/my.fai \
121
+ * --secondary some/other/path/to/my.dict \
122
+ * --output mybundle.json
59
123
*
60
- * Reference bundles: create a bundle using explicitly provided values and content types for the primary and
61
- * secondary resources:
124
+ * 4) Create a resource bundle for a fasta, but specify the fasta, index and dictionary resources and the content
125
+ * types explicitly. The resulting bundle will contain the fasta, index and dictionary resources:
62
126
*
63
- * CreateBundle --primary: path/to/my.fa
127
+ * CreateBundle \
128
+ * --primary:CT_HAPLOID_REFERENCE path/to/my.fasta \
129
+ * --secondary:CT_HAPLOID_REFERENCE_INDEX some/other/path/to/my.fai \
130
+ * --secondary:CT_HAPLOID_REFERENCE_DICTIONARY some/other/path/to/my.dict \
131
+ * --output mybundle.json
64
132
*/
65
133
@ DocumentedFeature
66
134
@ CommandLineProgramProperties (
@@ -110,7 +178,7 @@ public class CreateBundle extends CommandLineProgram {
110
178
private enum BundleType {
111
179
VCF ,
112
180
REFERENCE ,
113
- OTHER
181
+ CUSTOM
114
182
}
115
183
private BundleType outputBundleType ;
116
184
@@ -129,7 +197,7 @@ protected Object doWork() {
129
197
final Bundle bundle = switch (outputBundleType ) {
130
198
case VCF -> createVCFBundle ();
131
199
case REFERENCE -> createHaploidReferenceBundle ();
132
- case OTHER -> createOtherBundle ();
200
+ case CUSTOM -> createOtherBundle ();
133
201
};
134
202
writer .write (BundleJSON .toJSON (bundle ));
135
203
} catch (final IOException e ) {
@@ -153,7 +221,7 @@ private BundleType determinePrimaryContentType() {
153
221
logger .info (String .format ("Primary input content type %s for %s not recognized. A bundle will be created using content types from the provided argument tags." ,
154
222
primaryContentTag ,
155
223
primaryResource ));
156
- bundleType = BundleType .OTHER ;
224
+ bundleType = BundleType .CUSTOM ;
157
225
}
158
226
} else {
159
227
logger .info (String .format ("A content type for the primary input was not provided. Attempting to infer the content type from the %s extension." , primaryResource ));
0 commit comments