Skip to content

Commit 098eea8

Browse files
authored
Reducing the number of call to fsync on the directory. (#1228)
This work by introducing a new API method in the Directory trait. The user needs to explicitely call this method. (In particular, once before a commmit) Closes #1225
1 parent 466dc82 commit 098eea8

6 files changed

+86
-51
lines changed

CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ Tantivy 0.17
22
================================
33
- Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211
44
- Facets are necessarily indexed. Existing index with indexed facets should work out of the box. Index without facets that are marked with index: false should be broken (but they were already broken in a sense). (@fulmicoton) #1195 .
5+
- Bugfix that could in theory impact durability in theory on some filesystems [#1224](https://github.com/quickwit-inc/tantivy/issues/1224)
6+
- Reduce the number of fsync calls [#1225](https://github.com/quickwit-inc/tantivy/issues/1225)
57

68
Tantivy 0.16.2
79
================================

src/directory/directory.rs

+14-2
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,16 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
142142
/// Opens a writer for the *virtual file* associated with
143143
/// a Path.
144144
///
145-
/// Right after this call, the file should be created
146-
/// and any subsequent call to `open_read` for the
145+
/// Right after this call, for the span of the execution of the program
146+
/// the file should be created and any subsequent call to `open_read` for the
147147
/// same path should return a `FileSlice`.
148148
///
149+
/// However, depending on the directory implementation,
150+
/// it might be required to call `sync_directory` to ensure
151+
/// that the file is durably created.
152+
/// (The semantics here are the same when dealing with
153+
/// a posix filesystem.)
154+
///
149155
/// Write operations may be aggressively buffered.
150156
/// The client of this trait is responsible for calling flush
151157
/// to ensure that subsequent `read` operations
@@ -176,6 +182,12 @@ pub trait Directory: DirectoryClone + fmt::Debug + Send + Sync + 'static {
176182
/// The file may or may not previously exist.
177183
fn atomic_write(&self, path: &Path, data: &[u8]) -> io::Result<()>;
178184

185+
/// Sync the directory.
186+
///
187+
/// This call is required to ensure that newly created files are
188+
/// effectively stored durably.
189+
fn sync_directory(&self) -> io::Result<()>;
190+
179191
/// Acquire a lock in the given directory.
180192
///
181193
/// The method is blocking or not depending on the `Lock` object.

src/directory/managed_directory.rs

+21-2
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ impl ManagedDirectory {
192192
for delete_file in &deleted_files {
193193
managed_paths_write.remove(delete_file);
194194
}
195+
self.directory.sync_directory()?;
195196
save_managed_paths(self.directory.as_mut(), &meta_informations_wlock)?;
196197
}
197198

@@ -222,9 +223,22 @@ impl ManagedDirectory {
222223
.write()
223224
.expect("Managed file lock poisoned");
224225
let has_changed = meta_wlock.managed_paths.insert(filepath.to_owned());
225-
if has_changed {
226-
save_managed_paths(self.directory.as_ref(), &meta_wlock)?;
226+
if !has_changed {
227+
return Ok(());
228+
}
229+
save_managed_paths(self.directory.as_ref(), &meta_wlock)?;
230+
// This is not the first file we add.
231+
// Therefore, we are sure that `.managed.json` has been already
232+
// properly created and we do not need to sync its parent directory.
233+
//
234+
// (It might seem like a nicer solution to create the managed_json on the
235+
// creation of the ManagedDirectory instance but it would actually
236+
// prevent the use of read-only directories..)
237+
let managed_file_definitely_already_exists = meta_wlock.managed_paths.len() > 1;
238+
if managed_file_definitely_already_exists {
239+
return Ok(());
227240
}
241+
self.directory.sync_directory()?;
228242
Ok(())
229243
}
230244

@@ -310,6 +324,11 @@ impl Directory for ManagedDirectory {
310324
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
311325
self.directory.watch(watch_callback)
312326
}
327+
328+
fn sync_directory(&self) -> io::Result<()> {
329+
self.directory.sync_directory()?;
330+
Ok(())
331+
}
313332
}
314333

315334
impl Clone for ManagedDirectory {

src/directory/mmap_directory.rs

+41-46
Original file line numberDiff line numberDiff line change
@@ -211,33 +211,6 @@ impl MmapDirectory {
211211
self.inner.root_path.join(relative_path)
212212
}
213213

214-
/// Sync the root directory.
215-
/// In certain FS, this is required to persistently create
216-
/// a file.
217-
fn sync_directory(&self) -> Result<(), io::Error> {
218-
let mut open_opts = OpenOptions::new();
219-
220-
// Linux needs read to be set, otherwise returns EINVAL
221-
// write must not be set, or it fails with EISDIR
222-
open_opts.read(true);
223-
224-
// On Windows, opening a directory requires FILE_FLAG_BACKUP_SEMANTICS
225-
// and calling sync_all() only works if write access is requested.
226-
#[cfg(windows)]
227-
{
228-
use std::os::windows::fs::OpenOptionsExt;
229-
use winapi::um::winbase;
230-
231-
open_opts
232-
.write(true)
233-
.custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS);
234-
}
235-
236-
let fd = open_opts.open(&self.inner.root_path)?;
237-
fd.sync_data()?;
238-
Ok(())
239-
}
240-
241214
/// Returns some statistical information
242215
/// about the Mmap cache.
243216
///
@@ -367,22 +340,17 @@ impl Directory for MmapDirectory {
367340
/// removed before the file is deleted.
368341
fn delete(&self, path: &Path) -> result::Result<(), DeleteError> {
369342
let full_path = self.resolve_path(path);
370-
match fs::remove_file(&full_path) {
371-
Ok(_) => self.sync_directory().map_err(|e| DeleteError::IoError {
372-
io_error: e,
373-
filepath: path.to_path_buf(),
374-
}),
375-
Err(e) => {
376-
if e.kind() == io::ErrorKind::NotFound {
377-
Err(DeleteError::FileDoesNotExist(path.to_owned()))
378-
} else {
379-
Err(DeleteError::IoError {
380-
io_error: e,
381-
filepath: path.to_path_buf(),
382-
})
343+
fs::remove_file(&full_path).map_err(|e| {
344+
if e.kind() == io::ErrorKind::NotFound {
345+
DeleteError::FileDoesNotExist(path.to_owned())
346+
} else {
347+
DeleteError::IoError {
348+
io_error: e,
349+
filepath: path.to_path_buf(),
383350
}
384351
}
385-
}
352+
})?;
353+
Ok(())
386354
}
387355

388356
fn exists(&self, path: &Path) -> Result<bool, OpenReadError> {
@@ -411,10 +379,13 @@ impl Directory for MmapDirectory {
411379
file.flush()
412380
.map_err(|io_error| OpenWriteError::wrap_io_error(io_error, path.to_path_buf()))?;
413381

414-
// Apparetntly, on some filesystem syncing the parent
415-
// directory is required.
416-
self.sync_directory()
417-
.map_err(|io_err| OpenWriteError::wrap_io_error(io_err, path.to_path_buf()))?;
382+
// Note we actually do not sync the parent directory here.
383+
//
384+
// A newly created file, may, in some case, be created and even flushed to disk.
385+
// and then lost...
386+
//
387+
// The file will only be durably written after we terminate AND
388+
// sync_directory() is called.
418389

419390
let writer = SafeFileWriter::new(file);
420391
Ok(BufWriter::new(Box::new(writer)))
@@ -444,7 +415,7 @@ impl Directory for MmapDirectory {
444415
debug!("Atomic Write {:?}", path);
445416
let full_path = self.resolve_path(path);
446417
atomic_write(&full_path, content)?;
447-
self.sync_directory()
418+
Ok(())
448419
}
449420

450421
fn acquire_lock(&self, lock: &Lock) -> Result<DirectoryLock, LockError> {
@@ -470,6 +441,30 @@ impl Directory for MmapDirectory {
470441
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
471442
Ok(self.inner.watch(watch_callback))
472443
}
444+
445+
fn sync_directory(&self) -> Result<(), io::Error> {
446+
let mut open_opts = OpenOptions::new();
447+
448+
// Linux needs read to be set, otherwise returns EINVAL
449+
// write must not be set, or it fails with EISDIR
450+
open_opts.read(true);
451+
452+
// On Windows, opening a directory requires FILE_FLAG_BACKUP_SEMANTICS
453+
// and calling sync_all() only works if write access is requested.
454+
#[cfg(windows)]
455+
{
456+
use std::os::windows::fs::OpenOptionsExt;
457+
use winapi::um::winbase;
458+
459+
open_opts
460+
.write(true)
461+
.custom_flags(winbase::FILE_FLAG_BACKUP_SEMANTICS);
462+
}
463+
464+
let fd = open_opts.open(&self.inner.root_path)?;
465+
fd.sync_data()?;
466+
Ok(())
467+
}
473468
}
474469

475470
#[cfg(test)]

src/directory/ram_directory.rs

+4
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,10 @@ impl Directory for RamDirectory {
225225
fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
226226
Ok(self.fs.write().unwrap().watch(watch_callback))
227227
}
228+
229+
fn sync_directory(&self) -> io::Result<()> {
230+
Ok(())
231+
}
228232
}
229233

230234
#[cfg(test)]

src/indexer/segment_updater.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ pub fn save_new_metas(
6161
payload: None,
6262
},
6363
directory,
64-
)
64+
)?;
65+
directory.sync_directory()?;
66+
Ok(())
6567
}
6668

6769
/// Save the index meta file.
@@ -82,6 +84,7 @@ fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()>
8284
io::ErrorKind::Other,
8385
msg.unwrap_or_else(|| "Undefined".to_string())
8486
))));
87+
directory.sync_directory()?;
8588
directory.atomic_write(&META_FILEPATH, &buffer[..])?;
8689
debug!("Saved metas {:?}", serde_json::to_string_pretty(&metas));
8790
Ok(())

0 commit comments

Comments
 (0)