Skip to content

Commit 5a10c0c

Browse files
committed
More tests and javadoc.
1 parent 0184bb5 commit 5a10c0c

File tree

7 files changed

+287
-96
lines changed

7 files changed

+287
-96
lines changed

src/main/java/org/broadinstitute/hellbender/cmdline/CommandLineProgram.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ public Object instanceMainPostParseArgs() {
156156
tmpDir = new GATKPath(System.getProperty("java.io.tmpdir"));
157157
}
158158

159-
// Build the default headers
159+
// Build the defauNlt headers
160160
final ZonedDateTime startDateTime = ZonedDateTime.now();
161161
this.defaultHeaders.add(new StringHeader(commandLine));
162162
this.defaultHeaders.add(new StringHeader("Started on: " + Utils.getDateTimeForDisplay(startDateTime)));

src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java

+129-69
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package org.broadinstitute.hellbender.engine;
2+
3+
import htsjdk.beta.io.IOPathUtils;
4+
import htsjdk.beta.io.bundle.Bundle;
5+
import htsjdk.beta.io.bundle.BundleJSON;
6+
import htsjdk.io.IOPath;
7+
import org.broadinstitute.hellbender.GATKBaseTest;
8+
import org.testng.Assert;
9+
import org.testng.annotations.Test;
10+
11+
import java.io.IOException;
12+
13+
public class BundleSupportIntegrationTest extends GATKBaseTest {
14+
15+
// this test uses a serialized bundle file to ensure that we don't unintentionally pick up any
16+
// code (like, from htsjdk) that introduces backward compatibility issues
17+
@Test
18+
public void testReadWriteSerializedReferenceBundle() throws IOException {
19+
// This test file contains absolute paths to files on a local dev machine, so it shouldn't really be used
20+
// for anything other than this test, since the absolute paths are unlikely to work on any other machine.
21+
// But here we just want to make sure we can consume and roundtrip it without error
22+
final IOPath testBundleFilePath = new GATKPath("src/test/resources/org/broadinstitute/hellbender/engine/print_reads_bundle_do_not_use.json");
23+
24+
// get our test bundle from the file (ensure we canparse it), then write it out to a temp file, read it back
25+
// in, and compare
26+
final Bundle testBundle = BundleJSON.toBundle(IOPathUtils.getStringFromPath(testBundleFilePath));
27+
final IOPath roundTrippedBundleFilePath = new GATKPath(
28+
createTempPath("testReadWriteSerializedReferenceBundle", ".json").toString());
29+
IOPathUtils.writeStringToPath(roundTrippedBundleFilePath, BundleJSON.toJSON(testBundle));
30+
final Bundle roundTrippedBundle = BundleJSON.toBundle(IOPathUtils.getStringFromPath(testBundleFilePath));
31+
Assert.assertTrue(Bundle.equalsIgnoreOrder(roundTrippedBundle, testBundle));
32+
}
33+
34+
}

src/test/java/org/broadinstitute/hellbender/engine/MultiVariantWalkerIntegrationTest.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,7 @@ private static IOPath createRemoteBundleForFile(
167167
final File index1,
168168
final File vcf2,
169169
final File index2) throws IOException {
170-
//TODO: replace this path with getGCPTestStaging()
171-
final String remotePath = BucketUtils.randomRemotePath("gs://hellbender/test/staging/remoteBundles", "remote_bundle_test", "dir");
170+
final String remotePath = BucketUtils.randomRemotePath(getGCPTestStaging() + "remoteBundles", "remote_bundle_test", "dir");
172171
final Path remoteDirPath = IOUtils.getPath(remotePath + "/");
173172

174173
Files.createDirectory(remoteDirPath);

src/test/java/org/broadinstitute/hellbender/tools/CreateBundleIntegrationTest.java

+57-22
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import htsjdk.beta.io.bundle.*;
44
import htsjdk.beta.plugin.IOUtils;
55
import htsjdk.beta.plugin.variants.VariantsBundle;
6+
import org.broadinstitute.barclay.argparser.CommandLineException;
67
import org.broadinstitute.hellbender.CommandLineProgramTest;
78
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
89
import org.broadinstitute.hellbender.engine.GATKPath;
@@ -13,24 +14,28 @@
1314
import java.util.ArrayList;
1415
import java.util.Arrays;
1516
import java.util.List;
16-
import java.util.stream.Collectors;
1717

1818
public class CreateBundleIntegrationTest extends CommandLineProgramTest {
1919

2020
// force our local paths to use absolute path names to make BundleResource and IOPath equality checks easier,
2121
// since once a bundle is round-tripped/serialized to JSON, the resources will always contain absolute path names
2222
// for local files
23+
24+
//NOTE: These variables are Strings, but they are initialized to Strings obtained by first creating a GATKPath,
25+
// and then calling getURIString on the resulting object. This is just shortcut to normalize them so they
26+
// match the strings that will be embedded in the bundles created by the CreateBundle tool (i.e., to have
27+
// full/absolute paths and protocol schemes).
2328
private final static String LOCAL_VCF = new GATKPath(getTestDataDir() + "/count_variants_withSequenceDict.vcf").getURIString();
2429
private final static String LOCAL_VCF_IDX = new GATKPath(getTestDataDir() + "/count_variants_withSequenceDict.vcf.idx").getURIString();
2530
private final static String LOCAL_VCF_GZIP = new GATKPath("src/test/resources/large/NA24385.vcf.gz").getURIString();
2631
private final static String LOCAL_VCF_TBI = new GATKPath("src/test/resources/large/NA24385.vcf.gz.tbi").getURIString();
2732
private final static String LOCAL_VCF_WITH_NO_INDEX = new GATKPath("src/test/resources/org/broadinstitute/hellbender/tools/count_variants_withSequenceDict_noIndex.vcf").getURIString();
28-
private final static String CLOUD_VCF = "gs://hellbender/test/resources/large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf";
29-
private final static String CLOUD_VCF_IDX = "gs://hellbender/test/resources/large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf.idx";
33+
private final static String CLOUD_VCF = GCS_GATK_TEST_RESOURCES + "large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf";
34+
private final static String CLOUD_VCF_IDX = GCS_GATK_TEST_RESOURCES + "large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf.idx";
3035

31-
private final static String LOCAL_FASTA = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta";
32-
private final static String LOCAL_FASTA_INDEX = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.fai";
33-
private final static String LOCAL_FASTA_DICT = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.dict";
36+
private final static String LOCAL_FASTA = new GATKPath("src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta").getURIString();
37+
private final static String LOCAL_FASTA_INDEX = new GATKPath("src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.fai").getURIString();
38+
private final static String LOCAL_FASTA_DICT = new GATKPath("src/test/resources/large/Homo_sapiens_assembly38.20.21.dict").getURIString();
3439

3540
private final static String CUSTOM_PRIMARY_CT = "primary_ct";
3641
private final static String CUSTOM_SECONDARY_CT = "secondary_ct";
@@ -41,22 +46,21 @@ public Object[][] bundleCases() {
4146
return new Object[][] {
4247
// primary, primary tag, secondary, secondary tag, other(s), other tag(s), suppressResourceResolution, expectedBundle
4348

44-
// VCF bundle cases, with AUTOMATIC secondary resolution, and INFERRED content types
49+
// VCF bundle cases, with AUTOMATIC secondary resolution, and INFERRED primary content types
4550
{LOCAL_VCF, null, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
46-
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
51+
{LOCAL_VCF, null, LOCAL_VCF_IDX, BundleResourceType.CT_VARIANTS_INDEX, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
4752
{LOCAL_VCF_GZIP, null, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
53+
{LOCAL_VCF_GZIP, null, LOCAL_VCF_TBI, BundleResourceType.CT_VARIANTS_INDEX, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
4854
{CLOUD_VCF, null, null, null, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
49-
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
55+
{CLOUD_VCF, null, CLOUD_VCF_IDX, BundleResourceType.CT_VARIANTS_INDEX, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
5056

51-
// VCF bundle cases, with SUPPRESSED secondary resolution, and INFERRED content types
57+
// VCF bundle cases, with SUPPRESSED secondary resolution, and INFERRED primary content types
5258
{LOCAL_VCF, null, null, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF))},
53-
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
59+
// local vcf that has no index, but since suppressSecondaryResourceResolution is true, we don't throw since we don't try to infer the index
5460
{LOCAL_VCF_WITH_NO_INDEX, null, null, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_WITH_NO_INDEX))},
55-
{LOCAL_VCF_GZIP, null, LOCAL_VCF_TBI, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
5661
{CLOUD_VCF, null, null, null, null, null, true, new VariantsBundle(new GATKPath(CLOUD_VCF))},
57-
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
5862

59-
// VCF bundle cases, with AUTOMATIC secondary resolution, and EXPLICIT content types
63+
// VCF bundle cases, with AUTOMATIC secondary resolution, and EXPLICIT primary content types
6064
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
6165
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, LOCAL_VCF_IDX, BundleResourceType.CT_VARIANTS_INDEX, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
6266

@@ -70,9 +74,24 @@ public Object[][] bundleCases() {
7074
.addPrimary(new IOPathResource(new GATKPath(LOCAL_VCF), BundleResourceType.CT_VARIANT_CONTEXTS))
7175
.addSecondary(new IOPathResource(new GATKPath(LOCAL_VCF_IDX), BundleResourceType.CT_VARIANTS_INDEX))
7276
.addSecondary(new IOPathResource(new GATKPath(new GATKPath("someVariantsCompanion.txt").getURIString()), "someVariantsCT"))
73-
.build()},
77+
.build()
78+
},
7479

7580
// reference bundles
81+
{ LOCAL_FASTA, null, null, null, null, null, false,
82+
new BundleBuilder()
83+
.addPrimary(new IOPathResource(new GATKPath(LOCAL_FASTA), BundleResourceType.CT_HAPLOID_REFERENCE))
84+
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_INDEX), BundleResourceType.CT_REFERENCE_INDEX))
85+
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_DICT), BundleResourceType.CT_REFERENCE_DICTIONARY))
86+
.build()
87+
},
88+
{ LOCAL_FASTA, BundleResourceType.CT_HAPLOID_REFERENCE, LOCAL_FASTA_INDEX, BundleResourceType.CT_REFERENCE_INDEX, Arrays.asList(LOCAL_FASTA_DICT), Arrays.asList(BundleResourceType.CT_REFERENCE_DICTIONARY), false,
89+
new BundleBuilder()
90+
.addPrimary(new IOPathResource(new GATKPath(LOCAL_FASTA), BundleResourceType.CT_HAPLOID_REFERENCE))
91+
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_INDEX), BundleResourceType.CT_REFERENCE_INDEX))
92+
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_DICT), BundleResourceType.CT_REFERENCE_DICTIONARY))
93+
.build()
94+
},
7695

7796
// "custom" bundles
7897
{
@@ -105,17 +124,26 @@ public Object[][] negativeBundleCases() {
105124
return new Object[][] {
106125
// primary, primary tag, secondary, secondary tag, other(s), other tag(s), suppressIndexResolution, expectedBundle
107126

108-
// no index file can be inferred
127+
// no vcf index file can be inferred
109128
{LOCAL_VCF_WITH_NO_INDEX, null, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF_WITH_NO_INDEX))},
129+
// vcf bundle with secondary/other content type not explicitly provided
130+
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, null, null, Arrays.asList("other.txt"), null, false, null},
131+
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
132+
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
133+
{LOCAL_VCF_GZIP, null, LOCAL_VCF_TBI, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
134+
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
135+
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
110136
// primary content type not provided, and cannot be inferred from the extension
111137
{"primaryFile.ext", null, null, null, null, null, false, null},
112138
// secondary content type not provided
113139
{"primaryFile.ext", CUSTOM_PRIMARY_CT, "secondaryFile.ext", null, null, null, false, null},
114140

141+
// reference input with unknown content type specified
142+
{ LOCAL_FASTA, null, LOCAL_FASTA_INDEX, "unknown", null, null, false, null},
143+
115144
// other bundle with other content type not provided
116145
{"primaryFile.ext", CUSTOM_PRIMARY_CT, "secondaryFile.ext", CUSTOM_SECONDARY_CT, Arrays.asList("other.txt"), null, false, null},
117-
// vcf bundle with other content type not provided
118-
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, null, null, Arrays.asList("other.txt"), null, false, null},
146+
119147
};
120148
}
121149

@@ -145,6 +173,17 @@ public void testNegativeBundleCases(
145173
doCreateBundleTest (primaryInput, primaryInputTag, secondaryInput, secondaryInputTag, otherInputs, otherInputTags, suppressResourceResolution, expectedBundle);
146174
}
147175

176+
@Test(expectedExceptions={CommandLineException.class})
177+
public void testRequireBundleExtension() {
178+
final GATKPath outputPath = new GATKPath(createTempFile("test", ".bundle.BOGUS").getAbsolutePath().toString());
179+
final List<String> args = new ArrayList<>();
180+
args.add("--" + StandardArgumentDefinitions.PRIMARY_RESOURCE_LONG_NAME);
181+
args.add(LOCAL_FASTA);
182+
args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME);
183+
args.add(outputPath.toString());
184+
runCommandLine(args);
185+
}
186+
148187
private void doCreateBundleTest(
149188
final String primaryInput,
150189
final String primaryInputTag,
@@ -176,10 +215,6 @@ private void doCreateBundleTest(
176215
args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME);
177216
args.add(outputPath.toString());
178217

179-
System.out.println();
180-
System.out.println(args.stream().collect(Collectors.joining("\n ")));
181-
System.out.println();
182-
183218
runCommandLine(args);
184219

185220
final Bundle actualBundle = BundleJSON.toBundle(IOUtils.getStringFromPath(outputPath), GATKPath::new);

src/test/java/org/broadinstitute/hellbender/tools/PrintReadsIntegrationTest.java

+55-2
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,29 @@
11
package org.broadinstitute.hellbender.tools;
22

3-
import htsjdk.samtools.SamReaderFactory;
4-
import htsjdk.samtools.ValidationStringency;
3+
import htsjdk.beta.io.IOPathUtils;
4+
import htsjdk.beta.io.bundle.*;
5+
import htsjdk.io.IOPath;
6+
import htsjdk.samtools.*;
7+
import htsjdk.samtools.cram.ref.ReferenceSource;
58
import org.broadinstitute.hellbender.GATKBaseTest;
69
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
10+
import org.broadinstitute.hellbender.engine.GATKPath;
711
import org.broadinstitute.hellbender.engine.ReadsDataSource;
812
import org.broadinstitute.hellbender.engine.ReadsPathDataSource;
913
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
1014
import org.broadinstitute.hellbender.testutils.SamAssertionUtils;
1115
import org.broadinstitute.hellbender.utils.SimpleInterval;
1216
import org.broadinstitute.hellbender.utils.Utils;
1317
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
18+
import org.broadinstitute.hellbender.utils.io.IOUtils;
1419
import org.testng.Assert;
1520
import org.testng.annotations.DataProvider;
1621
import org.testng.annotations.Test;
1722

1823
import java.io.File;
1924
import java.io.IOException;
25+
import java.nio.file.Files;
26+
import java.nio.file.Path;
2027
import java.util.Arrays;
2128
import java.util.List;
2229

@@ -104,4 +111,50 @@ public void testHttpPaths(String reads, String index, String nonHttpReads, Strin
104111
SamAssertionUtils.assertEqualBamFiles(out, out2, false, ValidationStringency.DEFAULT_STRINGENCY);
105112
}
106113

114+
@Test(groups = {"cloud"})
115+
public void testPrintReadsWithReferenceBundle() throws IOException {
116+
// test that both reading and writing a cram work when the reference is specified via a bundle where the
117+
// reference, reference index, and reference dictionary are all in different buckets
118+
final IOPath testFastaFile = new GATKPath(getTestDataDir() + "/print_reads.fasta");
119+
final IOPath testIndexFile = new GATKPath(getTestDataDir() + "/print_reads.fasta.fai");
120+
final IOPath testDictFile = new GATKPath(getTestDataDir() + "/print_reads.dict");
121+
122+
final String targetBucketName = BucketUtils.randomRemotePath(getGCPTestStaging(), "testPrintReadsWithReferenceBundle", "") + "/";
123+
final IOPath targetBucket = new GATKPath(targetBucketName);
124+
IOUtils.deleteOnExit(targetBucket.toPath());
125+
126+
final Path remoteFasta = Files.copy(testFastaFile.toPath(), new GATKPath(targetBucketName + "print_reads.fasta").toPath());
127+
final IOPath targetIndex = new GATKPath(targetBucketName + "refindex/print_reads.fasta.fai");
128+
final Path remoteFastaIndex = Files.copy(testIndexFile.toPath(), targetIndex.toPath());
129+
final IOPath targetDict = new GATKPath(targetBucketName + "refdict/print_reads.dict");
130+
final Path remoteFastaDict = Files.copy(testDictFile.toPath(), targetDict.toPath());
131+
132+
// create a bundle with the remote reference, index, and dict files
133+
final Bundle refBundle = new BundleBuilder()
134+
.addPrimary(new IOPathResource(new GATKPath(remoteFasta.toUri().toString()), BundleResourceType.CT_HAPLOID_REFERENCE))
135+
.addSecondary(new IOPathResource(new GATKPath(remoteFastaIndex.toUri().toString()), BundleResourceType.CT_REFERENCE_INDEX))
136+
.addSecondary(new IOPathResource(new GATKPath(remoteFastaDict.toUri().toString()), BundleResourceType.CT_REFERENCE_DICTIONARY))
137+
.build();
138+
final IOPath bundleFilePath = new GATKPath(targetBucketName + "refBundle.json");
139+
IOPathUtils.writeStringToPath(bundleFilePath, BundleJSON.toJSON(refBundle));
140+
141+
final IOPath targetOutCRAM = new GATKPath(IOUtils.createTempFile("testReferenceSequenceForNioBundle", ".cram").getAbsolutePath());
142+
final ArgumentsBuilder args = new ArgumentsBuilder()
143+
.addInput(getTestDataDir() + "/print_reads.cram")
144+
.addReference(bundleFilePath.toString())
145+
.addOutput(targetOutCRAM.toString());
146+
runCommandLine(args);
147+
148+
int count = 0;
149+
try (final SamReader in = SamReaderFactory.makeDefault()
150+
.validationStringency(ValidationStringency.SILENT)
151+
.referenceSource(new ReferenceSource(bundleFilePath.toPath()))
152+
.open(targetOutCRAM.toPath())) {
153+
for (@SuppressWarnings("unused") final SAMRecord rec : in) {
154+
count++;
155+
}
156+
}
157+
Assert.assertEquals(count, 8);
158+
}
159+
107160
}

0 commit comments

Comments
 (0)