Skip to content

Commit 9a99293

Browse files
darkowlzzstealthybox
authored andcommitted
Add lockfile at snapshot activation to avoid race condition
This creates an ignite lock file at /tmp/ignite-snapshot.lock when an overlay snapshot is created. The locking is handled via pid file using github.com/nightlyone/lockfile package. This helps avoid the race condition when multiple ignite processes try to create loop device and use the device mapper for overlay snapshot at the same time. When a process obtains a lock, other processes retry to obtain a lock, until a lock is obtained. Once the snapshot is activated, the lock is released.
1 parent edfdea7 commit 9a99293

File tree

1 file changed

+57
-0
lines changed

1 file changed

+57
-0
lines changed

pkg/dmlegacy/snapshot.go

+57
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ import (
44
"fmt"
55
"os"
66
"path"
7+
"path/filepath"
8+
9+
"github.com/nightlyone/lockfile"
710

811
api "github.com/weaveworks/ignite/pkg/apis/ignite"
912
"github.com/weaveworks/ignite/pkg/constants"
@@ -12,6 +15,8 @@ import (
1215
"github.com/weaveworks/ignite/pkg/util"
1316
)
1417

18+
const snapshotLockFileName = "ignite-snapshot.lock"
19+
1520
// ActivateSnapshot sets up the snapshot with devicemapper so that it is active and can be used
1621
func ActivateSnapshot(vm *api.VM) error {
1722
device := util.NewPrefixer().Prefix(vm.GetUID())
@@ -28,6 +33,28 @@ func ActivateSnapshot(vm *api.VM) error {
2833
return err
2934
}
3035

36+
// NOTE: Multiple ignite processes trying to create loop devices at the
37+
// same time results in race condition. When multiple processes request for
38+
// a free loop device at the same time, they may get the same device ID and
39+
// try to create the same device multiple times.
40+
// Serialize this operation by creating a global lock file when creating a
41+
// loop device and release the lock after setting up device mapper using the
42+
// loop device.
43+
44+
// Global lock path.
45+
glpath := filepath.Join(os.TempDir(), snapshotLockFileName)
46+
47+
// Create a lockfile and obtain a lock.
48+
lock, err := lockfile.New(glpath)
49+
if err != nil {
50+
return fmt.Errorf("failed to create lock: %v", err)
51+
}
52+
if err := obtainLock(lock); err != nil {
53+
return err
54+
}
55+
// Release the lock at the end.
56+
defer lock.Unlock()
57+
3158
// Setup loop device for the image
3259
imageLoop, err := newLoopDev(path.Join(constants.IMAGE_DIR, imageUID.String(), constants.IMAGE_FS), true)
3360
if err != nil {
@@ -101,3 +128,33 @@ func ActivateSnapshot(vm *api.VM) error {
101128

102129
return overlayLoop.Detach()
103130
}
131+
132+
// obtainLock tries to obtain a lock and retries if the lock is owned by
133+
// another process, until a lock is obtained.
134+
func obtainLock(lock lockfile.Lockfile) error {
135+
// Check if the lock has any owner.
136+
process, err := lock.GetOwner()
137+
if err == nil {
138+
// A lock already exists. Check if the lock owner is the current process
139+
// itself.
140+
if process.Pid == os.Getpid() {
141+
return fmt.Errorf("lockfile %q already locked by this process", lock)
142+
}
143+
144+
// A lock already exists, but it's owned by some other process. Continue
145+
// to obtain lock, in case the lock owner no longer exists.
146+
}
147+
148+
// Obtain a lock. Retry if the lock can't be obtained.
149+
err = lock.TryLock()
150+
for err != nil {
151+
// Check if it's a lock temporary error that can be mitigated with a
152+
// retry. Fail if any other error.
153+
if _, ok := err.(interface{ Temporary() bool }); !ok {
154+
return fmt.Errorf("unable to lock %q: %v", lock, err)
155+
}
156+
err = lock.TryLock()
157+
}
158+
159+
return nil
160+
}

0 commit comments

Comments
 (0)