Skip to content

Commit 0184bb5

Browse files
committed
Update tests and javadoc.
1 parent ad48eed commit 0184bb5

File tree

2 files changed

+137
-56
lines changed

2 files changed

+137
-56
lines changed

src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java

+90-22
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import htsjdk.beta.io.bundle.*;
44
import htsjdk.beta.plugin.registry.HaploidReferenceResolver;
55
import htsjdk.beta.plugin.variants.VariantsBundle;
6-
import htsjdk.io.HtsPath;
76
import htsjdk.samtools.util.FileExtensions;
87
import org.apache.logging.log4j.LogManager;
98
import org.apache.logging.log4j.Logger;
@@ -23,44 +22,113 @@
2322
/**
2423
* Create a bundle (JSON) file for use with a GATK tool.
2524
*
26-
* Since most bundles will contain a primary resource plus at least one secondary resource (typically an index),
25+
* Since most bundles need to contain a primary resource plus at least one secondary resource (typically an index),
2726
* the tool will attempt to infer standard secondary resources(s) for a given primary resource if no secondary resource
2827
* is explicitly provided on the command line. Inferred secondary resources are automatically added to the resulting
2928
* bundle. Secondary resource inference can be suppressed by using the --suppress-resource-resolution argument.
3029
*
3130
* Each resource in a bundle must have an associated content type tag. Content types for each resource are either
3231
* specified on the command line via argument tags, or inferred by the tool. For the primary and secondary resources,
3332
* when no content type argument tag is provided, the tool will attempt to infer the content type from the file
34-
* extension. However, the content type for "other" resources (resources that are nether primary nor secondary resources)
35-
* are NEVER inferred, and must always include a content type argument tag.
33+
* extension. However, the content type for "other" resources (resources that are nether primary nor secondary
34+
* resources) are NEVER inferred, and must always include a content type argument tag.
3635
*
3736
* Bundle output file names must end with the suffix ".json".
3837
*
39-
* Common examples:
38+
* In general, content types can be any string, but there are well known content types that must be used when creating
39+
* bundles for tools that expect well known resources types, such as a VCF, a VCF index, a .fasta file, or a reference
40+
* dictionary file. The common well known content types are:
41+
*
42+
* - "CT_VARIANT_CONTEXTS": a VCF file
43+
* - "CT_VARIANTS_INDEX: VCF" index file
44+
*
45+
* - "CT_HAPLOID_REFERENCE": fasta reference file
46+
* - "CT_HAPLOID_REFERENCE_INDEX": fasta index file
47+
* - "CT_HAPLOID_REFERENCE_DICTIONARY": fasta dictionary file
48+
*
49+
* Common bundle creation examples:
4050
*
4151
* VCF Bundles:
4252
*
43-
* 1) Create a resource bundle for a VCF. Let the tool determine the content types, and resolve the secondary resources
44-
* (which for vcfs is the companion index) automatically by finding a sibling index file. If the sibling file cannot
45-
* be found, an exception wil lbe thrown:
53+
* 1) Create a resource bundle for a VCF from just the VCF, letting the tool resolve the secondary (index) resource by
54+
* automatically finding the sibling index file, and letting the tool determine the content types. If the sibling index
55+
* file cannot be found, an exception will be thrown. Resulting bundle contains the VCF and associated index.
56+
*
57+
* CreateBundle \
58+
* --primary path/to/my.vcf \
59+
* --output mybundle.json
60+
*
61+
* The exact same bundle could be created manually by specifying both the resources and the content types explicitly:
62+
*
63+
* CreateBundle \
64+
* --primary:CT_VARIANT_CONTEXTS path/to/my.vcf \
65+
* --secondary:CT_VARIANTS_INDEX path/to/my.vcf.idx \
66+
* --output mybundle.json
67+
*
68+
* 2) Create a resource bundle for a VCF from just the VCF, but suppress automatic resolution of the secondary
69+
* resources. Let the tool determine the content types. The resulting bundle will contain only the vcf resource:
70+
*
71+
* CreateBundle \
72+
* --primary path/to/my.vcf \
73+
* --suppress-resource-resolution \
74+
* --output mybundle.json
75+
*
76+
* 3) Create a resource bundle for a VCF, but specify the VCF AND the secondary index resource explicitly (which
77+
* suppresses automatic secondary resolution). This is useful when the VCF and index are not in the same directory.
78+
* Let the tool determine the content types. The resulting bundle will contain the VCF and index resources:
79+
*
80+
* CreateBundle \
81+
* --primary path/to/my.vcf \
82+
* --secondary some/other/path/to/vcd.idx \
83+
* --output mybundle.json
84+
*
85+
* 4) Create a resource bundle for a VCF, but specify the VCF AND the secondary index resource explicitly (this
86+
* is useful when the VCF and index are not in the same directory), and specify the content types explicitly via
87+
* command line argument tags. The resulting bundle will contain the VCF and index resources.
88+
*
89+
* CreateBundle \
90+
* --primary:CT_VARIANT_CONTEXTS path/to/my.vcf \
91+
* --secondary:CT_VARIANTS_INDEX some/other/path/to/vcd.idx \
92+
* --output mybundle.json
93+
*
94+
* Reference bundles:
95+
*
96+
* 1) Create a resource bundle for a reference from just the .fasta, letting the tool resolve the secondary
97+
* (index and dictionary) resource by automatically finding the sibling files, and determining the content types.
98+
* If the sibling index file cannot be found, an exception will be thrown. The resulting bundle will contain the
99+
* reference, index, and dictionary.
46100
*
47-
* CreateBundle --primary path/to/my.vcf --output mybundle.json
101+
* CreateBundle \
102+
* --primary path/to/my.fasta \
103+
* --output mybundle.json
48104
*
49-
* 2) Create a resource bundle for a VCF. Let the tool determine the content types, but suppress resolution of the secondary
50-
* resources (which for vcfs is the companion index). The resulting bundle will contain only the vcf resource:
105+
* 2) Create a resource bundle for a reference from just the .fasta, but suppress resolution of the secondary index and
106+
* dictionary resources). Let the tool determine the content type. The resulting bundle will contain only the .fasta
107+
* resource:
51108
*
52-
* CreateBundle --primary path/to/my.vcf --output mybundle.json
109+
* CreateBundle \
110+
* --primary path/to/my.fasta \
111+
* --suppress-resource-resolution \
112+
* --output mybundle.json
53113
*
54-
* 3) Create a resource bundle for a VCF. Let the tool determine the content type, but specify the secondary
55-
* index resource explicitly (which suppresses secondary resolution). The resulting bundle will contain the vcf
56-
* and index resources:
114+
* 3) Create a resource bundle for a fasta, but specify the fasta AND the secondary index and dictionary resources
115+
* explicitly (which suppresses automatic secondary resolution). Let the tool determine the content types. The
116+
* resulting bundle will contain the fasta, index and dictionary resources:
57117
*
58-
* CreateBundle --primary path/to/my.vcf --secondary some/other/path/to/vcd.idx --output mybundle.json
118+
* CreateBundle \
119+
* --primary path/to/my.fasta \
120+
* --secondary some/other/path/to/my.fai \
121+
* --secondary some/other/path/to/my.dict \
122+
* --output mybundle.json
59123
*
60-
* Reference bundles: create a bundle using explicitly provided values and content types for the primary and
61-
* secondary resources:
124+
* 4) Create a resource bundle for a fasta, but specify the fasta, index and dictionary resources and the content
125+
* types explicitly. The resulting bundle will contain the fasta, index and dictionary resources:
62126
*
63-
* CreateBundle --primary: path/to/my.fa
127+
* CreateBundle \
128+
* --primary:CT_HAPLOID_REFERENCE path/to/my.fasta \
129+
* --secondary:CT_HAPLOID_REFERENCE_INDEX some/other/path/to/my.fai \
130+
* --secondary:CT_HAPLOID_REFERENCE_DICTIONARY some/other/path/to/my.dict \
131+
* --output mybundle.json
64132
*/
65133
@DocumentedFeature
66134
@CommandLineProgramProperties(
@@ -110,7 +178,7 @@ public class CreateBundle extends CommandLineProgram {
110178
private enum BundleType {
111179
VCF,
112180
REFERENCE,
113-
OTHER
181+
CUSTOM
114182
}
115183
private BundleType outputBundleType;
116184

@@ -129,7 +197,7 @@ protected Object doWork() {
129197
final Bundle bundle = switch (outputBundleType) {
130198
case VCF -> createVCFBundle();
131199
case REFERENCE -> createHaploidReferenceBundle();
132-
case OTHER -> createOtherBundle();
200+
case CUSTOM -> createOtherBundle();
133201
};
134202
writer.write(BundleJSON.toJSON(bundle));
135203
} catch (final IOException e) {
@@ -153,7 +221,7 @@ private BundleType determinePrimaryContentType() {
153221
logger.info(String.format("Primary input content type %s for %s not recognized. A bundle will be created using content types from the provided argument tags.",
154222
primaryContentTag,
155223
primaryResource));
156-
bundleType = BundleType.OTHER;
224+
bundleType = BundleType.CUSTOM;
157225
}
158226
} else {
159227
logger.info(String.format("A content type for the primary input was not provided. Attempting to infer the content type from the %s extension.", primaryResource));

0 commit comments

Comments
 (0)