Skip to content

Commit 6dcdeff

Browse files
committed
Add bundle support for references.
1 parent c2b6beb commit 6dcdeff

11 files changed

+546
-141
lines changed

src/main/java/org/broadinstitute/hellbender/cmdline/StandardArgumentDefinitions.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ private StandardArgumentDefinitions(){}
5656
public static final String INTERVALS_SHORT_NAME = "L";
5757
public static final String COMPARISON_SHORT_NAME = "comp";
5858
public static final String READ_INDEX_SHORT_NAME = READ_INDEX_LONG_NAME;
59-
public static final String PRIMARY_INPUT_LONG_NAME = "primary";
60-
public static final String PRIMARY_INPUT_SHORT_NAME = "PI";
61-
public static final String SECONDARY_INPUT_LONG_NAME = "secondaryI";
62-
public static final String SECONDARY_INPUT_SHORT_NAME = "SI";
59+
public static final String PRIMARY_RESOURCE_LONG_NAME = "primary-resource";
60+
public static final String PRIMARY_RESOURCE_SHORT_NAME = "PR";
61+
public static final String SECONDARY_RESOURCE_LONG_NAME = "secondary-resource";
62+
public static final String SECONDARY_RESOURCE_SHORT_NAME = "SR";
6363
public static final String LENIENT_SHORT_NAME = "LE";
6464
public static final String READ_VALIDATION_STRINGENCY_SHORT_NAME = "VS";
6565
public static final String SAMPLE_ALIAS_SHORT_NAME = "ALIAS";

src/main/java/org/broadinstitute/hellbender/engine/FeatureInput.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ public final class FeatureInput<T extends Feature> extends GATKPath implements S
5454
private transient Class<FeatureCodec<T, ?>> featureCodecClass;
5555

5656
/**
57-
* retain any containing bundle in case we need to extract other resources from it
57+
* retain the parent (enclosing) bundle from which this feature input is derived, in case we need to extract
58+
* other resources from it
5859
*/
5960
private Bundle parentBundle;
6061

@@ -148,7 +149,7 @@ public FeatureInput(
148149
final Bundle featureBundle,
149150
final String name) {
150151
super(primaryResourcePath);
151-
// retain the containing bundle for later so we can interrogate it for other resources, like the index
152+
// retain the enclosing bundle for later, so we can interrogate it for other resources such as the index
152153
this.parentBundle = featureBundle;
153154
if (name != null) {
154155
if (primaryResourcePath.getTag() != null) {

src/main/java/org/broadinstitute/hellbender/engine/MultiVariantWalker.java

+8-6
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,17 @@ protected void initializeDrivingVariants() {
8686
final List<Bundle> bundles = BundleJSON.toBundleList(IOUtils.getStringFromPath(gatkPath), GATKPath::new);
8787
for (final Bundle bundle : bundles) {
8888
if (bundle.getPrimaryContentType().equals(BundleResourceType.CT_VARIANT_CONTEXTS)) {
89-
// use the bundle primary resource as the FeatureInput URI, and tear off and attach the
90-
// individual bundle the bundle to the FI as the parent bundle so downstream code can
91-
// extract other resources from it on demand
92-
// note that if the original value from the user has a tag, we can't use it unless there
93-
// is only one input, since FIs have to be unique
89+
// use the bundle primary resource as the FeatureInput URI, and tear off and attach
90+
// the enclosing bundle as the parent bundle for the FI so downstream code can extract
91+
// other resources from it on demand. note that if the original value from the user has
92+
// a tag, we can't propagate it unless there is only one input, since FIs have to be
93+
// unique
9494
final FeatureInput<VariantContext> bundleFI = new FeatureInput<>(
9595
new GATKPath(bundle.getPrimaryResource().getIOPath().get().getURIString()),
9696
bundle,
97-
bundles.size() > 1 ? gatkPath.getTag() : "drivingVariants"
97+
bundles.size() > 1 ?
98+
gatkPath.getTag() :
99+
"drivingVariants"
98100
);
99101
if (drivingVariantsFeatureInputs.contains(bundleFI)) {
100102
throw new UserException.BadInput("Feature inputs must be unique: " + gatkPath);

src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java

+266-64
Large diffs are not rendered by default.

src/main/java/org/broadinstitute/hellbender/utils/fasta/CachingIndexedFastaSequenceFile.java

+18-7
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package org.broadinstitute.hellbender.utils.fasta;
22

3+
import htsjdk.beta.io.bundle.Bundle;
4+
import htsjdk.beta.io.bundle.BundleJSON;
5+
import htsjdk.beta.plugin.IOUtils;
6+
import htsjdk.io.IOPath;
37
import htsjdk.samtools.SAMException;
48
import htsjdk.samtools.SAMSequenceDictionary;
59
import htsjdk.samtools.SAMSequenceRecord;
@@ -143,14 +147,24 @@ public CachingIndexedFastaSequenceFile(final Path fasta, boolean preserveAmbigui
143147
* @param preserveIUPAC If true, we will keep the IUPAC bases in the FASTA, otherwise they are converted to Ns
144148
*/
145149
public CachingIndexedFastaSequenceFile(final Path fasta, final long cacheSize, final boolean preserveCase, final boolean preserveIUPAC) {
146-
// Check the FASTA path:
147-
checkFastaPath(fasta);
148150
Utils.validate(cacheSize > 0, () -> "Cache size must be > 0 but was " + cacheSize);
149151

150152
// Read reference data by creating an IndexedFastaSequenceFile.
151153
try {
152-
final ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fasta, true, true);
153-
sequenceFile = requireIndex(fasta, referenceSequenceFile);
154+
final IOPath fastaPath = new GATKPath(fasta.toUri().toString());
155+
if (fastaPath.hasExtension(BundleJSON.BUNDLE_EXTENSION)) {
156+
final Bundle referenceBundle = BundleJSON.toBundle(IOUtils.getStringFromPath(fastaPath), GATKPath::new);
157+
final ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFileFromBundle(
158+
referenceBundle,
159+
true,
160+
true);
161+
sequenceFile = requireIndex(fasta, referenceSequenceFile);
162+
} else {
163+
// Check the FASTA path:
164+
checkFastaPath(fasta);
165+
final ReferenceSequenceFile referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(fasta, GATKPath::new, true, true);
166+
sequenceFile = requireIndex(fasta, referenceSequenceFile);
167+
}
154168
this.cacheSize = cacheSize;
155169
this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
156170
this.preserveCase = preserveCase;
@@ -159,9 +173,6 @@ public CachingIndexedFastaSequenceFile(final Path fasta, final long cacheSize, f
159173
catch (final IllegalArgumentException e) {
160174
throw new UserException.CouldNotReadInputFile(fasta, "Could not read reference sequence. The FASTA must have either a .fasta or .fa extension", e);
161175
}
162-
catch (final Exception e) {
163-
throw new UserException.CouldNotReadInputFile(fasta, e);
164-
}
165176
}
166177

167178
/**
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package org.broadinstitute.hellbender.engine;
2+
3+
import htsjdk.beta.io.IOPathUtils;
4+
import htsjdk.beta.io.bundle.Bundle;
5+
import htsjdk.beta.io.bundle.BundleJSON;
6+
import htsjdk.io.IOPath;
7+
import org.broadinstitute.hellbender.GATKBaseTest;
8+
import org.testng.Assert;
9+
import org.testng.annotations.Test;
10+
11+
import java.io.IOException;
12+
13+
public class BundleSupportIntegrationTest extends GATKBaseTest {
14+
15+
// this test uses a serialized bundle file to ensure that we don't unintentionally pick up any
16+
// code (like, from htsjdk) that introduces backward compatibility issues
17+
@Test
18+
public void testReadWriteSerializedReferenceBundle() throws IOException {
19+
// This test file contains absolute paths to files on a local dev machine, so it shouldn't really be used
20+
// for anything other than this test, since the absolute paths are unlikely to work on any other machine.
21+
// But here we just want to make sure we can consume and roundtrip it without error
22+
final IOPath testBundleFilePath = new GATKPath("src/test/resources/org/broadinstitute/hellbender/engine/print_reads_bundle_do_not_use.json");
23+
24+
// get our test bundle from the file (ensure we canparse it), then write it out to a temp file, read it back
25+
// in, and compare
26+
final Bundle testBundle = BundleJSON.toBundle(IOPathUtils.getStringFromPath(testBundleFilePath));
27+
final IOPath roundTrippedBundleFilePath = new GATKPath(
28+
createTempPath("testReadWriteSerializedReferenceBundle", ".json").toString());
29+
IOPathUtils.writeStringToPath(roundTrippedBundleFilePath, BundleJSON.toJSON(testBundle));
30+
final Bundle roundTrippedBundle = BundleJSON.toBundle(IOPathUtils.getStringFromPath(testBundleFilePath));
31+
Assert.assertTrue(Bundle.equalsIgnoreOrder(roundTrippedBundle, testBundle));
32+
}
33+
34+
}

src/test/java/org/broadinstitute/hellbender/engine/MultiVariantWalkerIntegrationTest.java

+1-2
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,7 @@ private static IOPath createRemoteBundleForFile(
167167
final File index1,
168168
final File vcf2,
169169
final File index2) throws IOException {
170-
//TODO: replace this path with getGCPTestStaging()
171-
final String remotePath = BucketUtils.randomRemotePath("gs://hellbender/test/staging/remoteBundles", "remote_bundle_test", "dir");
170+
final String remotePath = BucketUtils.randomRemotePath(getGCPTestStaging() + "remoteBundles", "remote_bundle_test", "dir");
172171
final Path remoteDirPath = IOUtils.getPath(remotePath + "/");
173172

174173
Files.createDirectory(remoteDirPath);

src/test/java/org/broadinstitute/hellbender/tools/AbstractPrintReadsIntegrationTest.java

+50-6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package org.broadinstitute.hellbender.tools;
22

3+
import htsjdk.beta.io.IOPathUtils;
4+
import htsjdk.beta.io.bundle.Bundle;
5+
import htsjdk.beta.io.bundle.BundleJSON;
6+
import htsjdk.beta.plugin.registry.HaploidReferenceResolver;
37
import htsjdk.samtools.SAMFileHeader;
48
import htsjdk.samtools.SAMRecord;
59
import htsjdk.samtools.SamReader;
@@ -9,6 +13,7 @@
913
import org.broadinstitute.hellbender.GATKBaseTest;
1014
import org.broadinstitute.hellbender.cmdline.ReadFilterArgumentDefinitions;
1115
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
16+
import org.broadinstitute.hellbender.engine.GATKPath;
1217
import org.broadinstitute.hellbender.engine.ReadsDataSource;
1318
import org.broadinstitute.hellbender.engine.ReadsPathDataSource;
1419
import org.broadinstitute.hellbender.engine.filters.ReadLengthReadFilter;
@@ -19,6 +24,7 @@
1924
import org.broadinstitute.hellbender.testutils.SamAssertionUtils;
2025
import org.broadinstitute.hellbender.utils.Utils;
2126
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
27+
import org.broadinstitute.hellbender.utils.io.IOUtils;
2228
import org.broadinstitute.hellbender.utils.read.GATKRead;
2329
import org.testng.Assert;
2430
import org.testng.annotations.DataProvider;
@@ -37,14 +43,20 @@ public void doFileToFile(String fileIn, String extOut, String reference, boolean
3743
String samFile = fileIn;
3844
final File outFile = GATKBaseTest.createTempFile(samFile + ".", extOut);
3945
final File ORIG_BAM = new File(TEST_DATA_DIR, samFile);
40-
final File refFile;
46+
final GATKPath refFile;
4147

4248
final ArrayList<String> args = new ArrayList<>();
4349
args.add("--input"); args.add(ORIG_BAM.getAbsolutePath());
4450
args.add("--output"); args.add(outFile.getAbsolutePath());
4551
if (reference != null) {
46-
refFile = new File(TEST_DATA_DIR, reference);
47-
args.add("-R"); args.add(refFile.getAbsolutePath());
52+
if (reference.endsWith(BundleJSON.BUNDLE_EXTENSION)) {
53+
// the test json files are temporary files, not files in TEST_DATA_DIR
54+
refFile = new GATKPath(reference);
55+
args.add("-R"); args.add(reference);
56+
} else {
57+
refFile = new GATKPath(new File(TEST_DATA_DIR, reference).getAbsolutePath());
58+
args.add("-R"); args.add(refFile.toString());
59+
}
4860
}
4961
else {
5062
refFile = null;
@@ -55,13 +67,33 @@ public void doFileToFile(String fileIn, String extOut, String reference, boolean
5567
}
5668
runCommandLine(args);
5769

58-
SamAssertionUtils.assertSamsEqual(outFile, ORIG_BAM, refFile);
70+
SamAssertionUtils.assertSamsEqual(outFile, ORIG_BAM, refFile == null ? null : refFile.toPath().toFile());
5971

6072
if (testMD5) {
6173
checkMD5asExpected(outFile);
6274
}
6375
}
6476

77+
public void doFileToFileUsingReferenceBundle(String fileIn, String extOut, String reference, boolean testMD5) throws Exception {
78+
final String referenceToUse;
79+
if (reference != null) {
80+
// create the bundle, using inference to find the sibling files, then write the bundle out to a temp file
81+
final Bundle referenceBundle = HaploidReferenceResolver.referenceBundleFromFastaPath(
82+
new GATKPath(new File(TEST_DATA_DIR, reference).toPath().toString()),
83+
GATKPath::new);
84+
final GATKPath tempBundlePath = new GATKPath(
85+
IOUtils.createTempFile("printReadsRefBundle", ".json").getAbsolutePath()
86+
);
87+
IOPathUtils.writeStringToPath(tempBundlePath, BundleJSON.toJSON(referenceBundle));
88+
referenceToUse = tempBundlePath.toString();
89+
} else {
90+
referenceToUse = reference;
91+
}
92+
93+
// no run the regular test, but using the reference bundle
94+
doFileToFile(fileIn, extOut, referenceToUse, testMD5);
95+
}
96+
6597
private void checkMD5asExpected(final File outFile) throws IOException {
6698
final File md5File = new File(outFile.getAbsolutePath() + ".md5");
6799
if (md5File.exists()) {
@@ -74,8 +106,8 @@ private void checkMD5asExpected(final File outFile) throws IOException {
74106
}
75107

76108
@Test(dataProvider="testingData")
77-
public void testFileToFile(String fileIn, String extOut, String reference) throws Exception {
78-
doFileToFile(fileIn, extOut, reference, false);
109+
public void testFileToFileWithReferenceBundle(String fileIn, String extOut, String reference) throws Exception {
110+
doFileToFileUsingReferenceBundle(fileIn, extOut, reference, false);
79111
}
80112

81113
@DataProvider(name="testingData")
@@ -120,6 +152,18 @@ public Object[][] testingData() {
120152
};
121153
}
122154

155+
@Test(dataProvider="testingData")
156+
public void testFileToFileUsingReferenceBundle(String fileIn, String extOut, String reference) throws Exception {
157+
if (reference != null) {
158+
doFileToFileUsingReferenceBundle(fileIn, extOut, reference, false);
159+
}
160+
}
161+
162+
@Test(dataProvider="testingData")
163+
public void testFileToFile(String fileIn, String extOut, String reference) throws Exception {
164+
doFileToFile(fileIn, extOut, reference, false);
165+
}
166+
123167
@Test
124168
public void testReadThatConsumesNoReferenceBases() throws IOException {
125169
final File zeroRefBasesReadBam = new File(TEST_DATA_DIR, "read_consumes_zero_ref_bases.bam");

0 commit comments

Comments
 (0)