Skip to content

Commit 204e9c2

Browse files
committed
save_metadata now uses rusqlite to save the data in an SQL database
1 parent 51380be commit 204e9c2

File tree

3 files changed

+40
-12
lines changed

3 files changed

+40
-12
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ ciborium = "0.2"
4242
snap = "1.1"
4343
memmap2 = "0.9"
4444
arrayref = "0.3"
45-
mysql = "26.0.0"
45+
rusqlite = "0.33.0"
4646
# ffi
4747
pyo3 = { version = "0.21", features = ["auto-initialize"], optional = true }
4848
# logging

src/multisketch.rs

+34-9
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! The class to support .skm/.skd reading and writing, containing multiple [`Sketch`] objects
2+
use anyhow::anyhow;
23
use anyhow::bail;
34
use anyhow::Error;
4-
use anyhow::{Result, anyhow};
55
// use thiserror::Error;
66
use core::panic;
77
use std::fmt;
@@ -17,6 +17,9 @@ use crate::sketch::{Sketch, BBITS};
1717
use crate::sketch_datafile::SketchArrayFile;
1818

1919
use std::collections::HashSet;
20+
21+
use rusqlite::{params, Connection, Result};
22+
2023
#[derive(Serialize, Deserialize)]
2124
pub struct MultiSketch {
2225
pub sketch_size: u64,
@@ -66,14 +69,35 @@ impl MultiSketch {
6669

6770
/// Saves the metadata
6871
pub fn save_metadata(&self, file_prefix: &str) -> Result<(), Error> {
69-
let filename = format!("{}.skm", file_prefix);
72+
let filename = format!("{}.db", file_prefix);
7073
log::info!("Saving sketch metadata to {filename}");
71-
let serial_file = BufWriter::new(File::create(filename)?);
72-
let mut compress_writer = snap::write::FrameEncoder::new(serial_file);
73-
ciborium::ser::into_writer(self, &mut compress_writer)?;
74+
// Creates database
75+
let conn = Connection::open(filename)?;
76+
// Initialise table in database
77+
conn.execute(
78+
"CREATE TABLE sketch_metadata (
79+
id INTEGER PRIMARY KEY,
80+
name TEXT NOT NULL,
81+
length INTEGER
82+
)",
83+
(),
84+
)?;
85+
// Iterate over metadata and add to database
86+
for (index, metadata) in self.sketch_metadata.iter().enumerate() {
87+
conn.execute(
88+
"INSERT INTO sketch_metadata (id, name, length) VALUES (?1, ?2, ?3)",
89+
(index, metadata.name(), metadata.seq_length()),
90+
)?;
91+
}
7492
Ok(())
7593
}
7694

95+
pub fn query_metadata<T>(path: String, args: T) -> Result<Option<Vec<usize>>, Error> {
96+
let conn = Connection::open(path)?;
97+
conn.execute("SELECT id FROM sketch_metadata WHERE", ())?;
98+
todo!()
99+
}
100+
77101
pub fn load(file_prefix: &str) -> Result<Self, Error> {
78102
let filename = format!("{}.skm", file_prefix);
79103
log::info!("Loading sketch metadata from {filename}");
@@ -206,10 +230,8 @@ impl MultiSketch {
206230
let mut removed_samples = Vec::new();
207231

208232
for sketch in &self.sketch_metadata {
209-
210233
if !genome_ids_to_remove.contains(&(*sketch.name()).to_string()) {
211234
new_sketch_metadata.push(sketch.clone());
212-
213235
} else {
214236
removed_samples.push(sketch.name());
215237
}
@@ -219,7 +241,10 @@ impl MultiSketch {
219241
let set2: HashSet<&str> = genome_ids_to_remove.iter().map(AsRef::as_ref).collect();
220242
let missing: Vec<&&str> = set2.difference(&set1).collect();
221243
if !missing.is_empty() {
222-
bail!("The following samples have not been found in the database: {:?}", missing);
244+
bail!(
245+
"The following samples have not been found in the database: {:?}",
246+
missing
247+
);
223248
}
224249

225250
self.sketch_metadata = new_sketch_metadata;
@@ -232,7 +257,7 @@ impl MultiSketch {
232257
input_prefix: &str,
233258
output_file: &str,
234259
genome_ids_to_remove: &[String],
235-
) -> anyhow::Result<()> {
260+
) -> anyhow::Result<()> {
236261
let mut positions_to_remove = Vec::new();
237262
let mut missing_ids = Vec::new();
238263

src/sketch.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ impl Sketch {
113113
&self.name
114114
}
115115

116+
pub fn seq_length(&self) -> usize {
117+
self.seq_length
118+
}
119+
116120
pub fn set_index(&mut self, index: usize) {
117121
self.index = Some(index);
118122
}
@@ -221,8 +225,7 @@ pub fn sketch_files(
221225
output_prefix: &str,
222226
input_files: &[InputFastx],
223227
concat_fasta: bool,
224-
#[cfg(feature = "3di")]
225-
convert_pdb: bool,
228+
#[cfg(feature = "3di")] convert_pdb: bool,
226229
k: &[usize],
227230
sketch_size: u64,
228231
seq_type: &HashType,

0 commit comments

Comments
 (0)