Skip to content

Commit 4478993

Browse files
authored
Merge pull request #10516 from shreyas-s-rao/wal-verify-func
wal: add Verify function to perform corruption check on wal contents
2 parents dc50416 + bb3eb8f commit 4478993

File tree

2 files changed

+175
-27
lines changed

2 files changed

+175
-27
lines changed

Diff for: wal/wal.go

+123-27
Original file line numberDiff line numberDiff line change
@@ -299,17 +299,55 @@ func OpenForRead(lg *zap.Logger, dirpath string, snap walpb.Snapshot) (*WAL, err
299299
}
300300

301301
func openAtIndex(lg *zap.Logger, dirpath string, snap walpb.Snapshot, write bool) (*WAL, error) {
302-
names, err := readWALNames(lg, dirpath)
302+
names, nameIndex, err := selectWALFiles(lg, dirpath, snap)
303303
if err != nil {
304304
return nil, err
305305
}
306306

307+
rs, ls, closer, err := openWALFiles(lg, dirpath, names, nameIndex, write)
308+
if err != nil {
309+
return nil, err
310+
}
311+
312+
// create a WAL ready for reading
313+
w := &WAL{
314+
dir: dirpath,
315+
start: snap,
316+
decoder: newDecoder(rs...),
317+
readClose: closer,
318+
locks: ls,
319+
}
320+
321+
if write {
322+
// write reuses the file descriptors from read; don't close so
323+
// WAL can append without dropping the file lock
324+
w.readClose = nil
325+
if _, _, err := parseWALName(filepath.Base(w.tail().Name())); err != nil {
326+
closer()
327+
return nil, err
328+
}
329+
w.fp = newFilePipeline(lg, w.dir, SegmentSizeBytes)
330+
}
331+
332+
return w, nil
333+
}
334+
335+
func selectWALFiles(lg *zap.Logger, dirpath string, snap walpb.Snapshot) ([]string, int, error) {
336+
names, err := readWALNames(lg, dirpath)
337+
if err != nil {
338+
return nil, -1, err
339+
}
340+
307341
nameIndex, ok := searchIndex(lg, names, snap.Index)
308342
if !ok || !isValidSeq(lg, names[nameIndex:]) {
309-
return nil, ErrFileNotFound
343+
err = ErrFileNotFound
344+
return nil, -1, err
310345
}
311346

312-
// open the wal files
347+
return names, nameIndex, nil
348+
}
349+
350+
func openWALFiles(lg *zap.Logger, dirpath string, names []string, nameIndex int, write bool) ([]io.Reader, []*fileutil.LockedFile, func() error, error) {
313351
rcs := make([]io.ReadCloser, 0)
314352
rs := make([]io.Reader, 0)
315353
ls := make([]*fileutil.LockedFile, 0)
@@ -319,15 +357,15 @@ func openAtIndex(lg *zap.Logger, dirpath string, snap walpb.Snapshot, write bool
319357
l, err := fileutil.TryLockFile(p, os.O_RDWR, fileutil.PrivateFileMode)
320358
if err != nil {
321359
closeAll(rcs...)
322-
return nil, err
360+
return nil, nil, nil, err
323361
}
324362
ls = append(ls, l)
325363
rcs = append(rcs, l)
326364
} else {
327365
rf, err := os.OpenFile(p, os.O_RDONLY, fileutil.PrivateFileMode)
328366
if err != nil {
329367
closeAll(rcs...)
330-
return nil, err
368+
return nil, nil, nil, err
331369
}
332370
ls = append(ls, nil)
333371
rcs = append(rcs, rf)
@@ -337,28 +375,7 @@ func openAtIndex(lg *zap.Logger, dirpath string, snap walpb.Snapshot, write bool
337375

338376
closer := func() error { return closeAll(rcs...) }
339377

340-
// create a WAL ready for reading
341-
w := &WAL{
342-
lg: lg,
343-
dir: dirpath,
344-
start: snap,
345-
decoder: newDecoder(rs...),
346-
readClose: closer,
347-
locks: ls,
348-
}
349-
350-
if write {
351-
// write reuses the file descriptors from read; don't close so
352-
// WAL can append without dropping the file lock
353-
w.readClose = nil
354-
if _, _, err := parseWALName(filepath.Base(w.tail().Name())); err != nil {
355-
closer()
356-
return nil, err
357-
}
358-
w.fp = newFilePipeline(w.lg, w.dir, SegmentSizeBytes)
359-
}
360-
361-
return w, nil
378+
return rs, ls, closer, nil
362379
}
363380

364381
// ReadAll reads out records of the current WAL.
@@ -480,6 +497,85 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
480497
return metadata, state, ents, err
481498
}
482499

500+
// Verify reads through the given WAL and verifies that it is not corrupted.
501+
// It creates a new decoder to read through the records of the given WAL.
502+
// It does not conflict with any open WAL, but it is recommended not to
503+
// call this function after opening the WAL for writing.
504+
// If it cannot read out the expected snap, it will return ErrSnapshotNotFound.
505+
// If the loaded snap doesn't match with the expected one, it will
506+
// return error ErrSnapshotMismatch.
507+
func Verify(lg *zap.Logger, walDir string, snap walpb.Snapshot) error {
508+
var metadata []byte
509+
var err error
510+
var match bool
511+
512+
rec := &walpb.Record{}
513+
514+
names, nameIndex, err := selectWALFiles(lg, walDir, snap)
515+
if err != nil {
516+
return err
517+
}
518+
519+
// open wal files in read mode, so that there is no conflict
520+
// when the same WAL is opened elsewhere in write mode
521+
rs, _, closer, err := openWALFiles(lg, walDir, names, nameIndex, false)
522+
if err != nil {
523+
return err
524+
}
525+
526+
// create a new decoder from the readers on the WAL files
527+
decoder := newDecoder(rs...)
528+
529+
for err = decoder.decode(rec); err == nil; err = decoder.decode(rec) {
530+
switch rec.Type {
531+
case metadataType:
532+
if metadata != nil && !bytes.Equal(metadata, rec.Data) {
533+
return ErrMetadataConflict
534+
}
535+
metadata = rec.Data
536+
case crcType:
537+
crc := decoder.crc.Sum32()
538+
// Current crc of decoder must match the crc of the record.
539+
// We need not match 0 crc, since the decoder is a new one at this point.
540+
if crc != 0 && rec.Validate(crc) != nil {
541+
return ErrCRCMismatch
542+
}
543+
decoder.updateCRC(rec.Crc)
544+
case snapshotType:
545+
var loadedSnap walpb.Snapshot
546+
pbutil.MustUnmarshal(&loadedSnap, rec.Data)
547+
if loadedSnap.Index == snap.Index {
548+
if loadedSnap.Term != snap.Term {
549+
return ErrSnapshotMismatch
550+
}
551+
match = true
552+
}
553+
// We ignore all entry and state type records as these
554+
// are not necessary for validating the WAL contents
555+
case entryType:
556+
case stateType:
557+
default:
558+
return fmt.Errorf("unexpected block type %d", rec.Type)
559+
}
560+
}
561+
562+
if closer != nil {
563+
closer()
564+
}
565+
566+
// We do not have to read out all the WAL entries
567+
// as the decoder is opened in read mode.
568+
if err != io.EOF && err != io.ErrUnexpectedEOF {
569+
return err
570+
}
571+
572+
if !match {
573+
return ErrSnapshotNotFound
574+
}
575+
576+
return nil
577+
}
578+
483579
// cut closes current file written and creates a new one ready to append.
484580
// cut first creates a temp wal file and writes necessary headers into it.
485581
// Then cut atomically rename temp wal file to a wal file.

Diff for: wal/wal_test.go

+52
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"io/ioutil"
2121
"math"
2222
"os"
23+
"path"
2324
"path/filepath"
2425
"reflect"
2526
"testing"
@@ -186,6 +187,57 @@ func TestOpenAtIndex(t *testing.T) {
186187
}
187188
}
188189

190+
// TestVerify tests that Verify throws a non-nil error when the WAL is corrupted.
191+
// The test creates a WAL directory and cuts out multiple WAL files. Then
192+
// it corrupts one of the files by completely truncating it.
193+
func TestVerify(t *testing.T) {
194+
walDir, err := ioutil.TempDir(os.TempDir(), "waltest")
195+
if err != nil {
196+
t.Fatal(err)
197+
}
198+
defer os.RemoveAll(walDir)
199+
200+
// create WAL
201+
w, err := Create(zap.NewExample(), walDir, nil)
202+
if err != nil {
203+
t.Fatal(err)
204+
}
205+
defer w.Close()
206+
207+
// make 5 separate files
208+
for i := 0; i < 5; i++ {
209+
es := []raftpb.Entry{{Index: uint64(i), Data: []byte("waldata" + string(i+1))}}
210+
if err = w.Save(raftpb.HardState{}, es); err != nil {
211+
t.Fatal(err)
212+
}
213+
if err = w.cut(); err != nil {
214+
t.Fatal(err)
215+
}
216+
}
217+
218+
// to verify the WAL is not corrupted at this point
219+
err = Verify(zap.NewExample(), walDir, walpb.Snapshot{})
220+
if err != nil {
221+
t.Errorf("expected a nil error, got %v", err)
222+
}
223+
224+
walFiles, err := ioutil.ReadDir(walDir)
225+
if err != nil {
226+
t.Fatal(err)
227+
}
228+
229+
// corrupt the WAL by truncating one of the WAL files completely
230+
err = os.Truncate(path.Join(walDir, walFiles[2].Name()), 0)
231+
if err != nil {
232+
t.Fatal(err)
233+
}
234+
235+
err = Verify(zap.NewExample(), walDir, walpb.Snapshot{})
236+
if err == nil {
237+
t.Error("expected a non-nil error, got nil")
238+
}
239+
}
240+
189241
// TODO: split it into smaller tests for better readability
190242
func TestCut(t *testing.T) {
191243
p, err := ioutil.TempDir(os.TempDir(), "waltest")

0 commit comments

Comments
 (0)