Skip to content

Commit a855fbe

Browse files
fdmananakdave
authored andcommitted
btrfs: fix lockdep splat when enabling and disabling qgroups
When running test case btrfs/017 from fstests, lockdep reported the following splat: [ 1297.067385] ====================================================== [ 1297.067708] WARNING: possible circular locking dependency detected [ 1297.068022] 5.10.0-rc4-btrfs-next-73 #1 Not tainted [ 1297.068322] ------------------------------------------------------ [ 1297.068629] btrfs/189080 is trying to acquire lock: [ 1297.068929] ffff9f2725731690 (sb_internal#2){.+.+}-{0:0}, at: btrfs_quota_enable+0xaf/0xa70 [btrfs] [ 1297.069274] but task is already holding lock: [ 1297.069868] ffff9f2702b61a08 (&fs_info->qgroup_ioctl_lock){+.+.}-{3:3}, at: btrfs_quota_enable+0x3b/0xa70 [btrfs] [ 1297.070219] which lock already depends on the new lock. [ 1297.071131] the existing dependency chain (in reverse order) is: [ 1297.071721] -> #1 (&fs_info->qgroup_ioctl_lock){+.+.}-{3:3}: [ 1297.072375] lock_acquire+0xd8/0x490 [ 1297.072710] __mutex_lock+0xa3/0xb30 [ 1297.073061] btrfs_qgroup_inherit+0x59/0x6a0 [btrfs] [ 1297.073421] create_subvol+0x194/0x990 [btrfs] [ 1297.073780] btrfs_mksubvol+0x3fb/0x4a0 [btrfs] [ 1297.074133] __btrfs_ioctl_snap_create+0x119/0x1a0 [btrfs] [ 1297.074498] btrfs_ioctl_snap_create+0x58/0x80 [btrfs] [ 1297.074872] btrfs_ioctl+0x1a90/0x36f0 [btrfs] [ 1297.075245] __x64_sys_ioctl+0x83/0xb0 [ 1297.075617] do_syscall_64+0x33/0x80 [ 1297.075993] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1297.076380] -> #0 (sb_internal#2){.+.+}-{0:0}: [ 1297.077166] check_prev_add+0x91/0xc60 [ 1297.077572] __lock_acquire+0x1740/0x3110 [ 1297.077984] lock_acquire+0xd8/0x490 [ 1297.078411] start_transaction+0x3c5/0x760 [btrfs] [ 1297.078853] btrfs_quota_enable+0xaf/0xa70 [btrfs] [ 1297.079323] btrfs_ioctl+0x2c60/0x36f0 [btrfs] [ 1297.079789] __x64_sys_ioctl+0x83/0xb0 [ 1297.080232] do_syscall_64+0x33/0x80 [ 1297.080680] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1297.081139] other info that might help us debug this: [ 1297.082536] Possible unsafe locking scenario: [ 1297.083510] CPU0 CPU1 [ 1297.084005] ---- ---- [ 1297.084500] lock(&fs_info->qgroup_ioctl_lock); [ 1297.084994] lock(sb_internal#2); [ 1297.085485] lock(&fs_info->qgroup_ioctl_lock); [ 1297.085974] lock(sb_internal#2); [ 1297.086454] *** DEADLOCK *** [ 1297.087880] 3 locks held by btrfs/189080: [ 1297.088324] #0: ffff9f2725731470 (sb_writers#14){.+.+}-{0:0}, at: btrfs_ioctl+0xa73/0x36f0 [btrfs] [ 1297.088799] #1: ffff9f2702b60cc0 (&fs_info->subvol_sem){++++}-{3:3}, at: btrfs_ioctl+0x1f4d/0x36f0 [btrfs] [ 1297.089284] #2: ffff9f2702b61a08 (&fs_info->qgroup_ioctl_lock){+.+.}-{3:3}, at: btrfs_quota_enable+0x3b/0xa70 [btrfs] [ 1297.089771] stack backtrace: [ 1297.090662] CPU: 5 PID: 189080 Comm: btrfs Not tainted 5.10.0-rc4-btrfs-next-73 #1 [ 1297.091132] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 1297.092123] Call Trace: [ 1297.092629] dump_stack+0x8d/0xb5 [ 1297.093115] check_noncircular+0xff/0x110 [ 1297.093596] check_prev_add+0x91/0xc60 [ 1297.094076] ? kvm_clock_read+0x14/0x30 [ 1297.094553] ? kvm_sched_clock_read+0x5/0x10 [ 1297.095029] __lock_acquire+0x1740/0x3110 [ 1297.095510] lock_acquire+0xd8/0x490 [ 1297.095993] ? btrfs_quota_enable+0xaf/0xa70 [btrfs] [ 1297.096476] start_transaction+0x3c5/0x760 [btrfs] [ 1297.096962] ? btrfs_quota_enable+0xaf/0xa70 [btrfs] [ 1297.097451] btrfs_quota_enable+0xaf/0xa70 [btrfs] [ 1297.097941] ? btrfs_ioctl+0x1f4d/0x36f0 [btrfs] [ 1297.098429] btrfs_ioctl+0x2c60/0x36f0 [btrfs] [ 1297.098904] ? do_user_addr_fault+0x20c/0x430 [ 1297.099382] ? kvm_clock_read+0x14/0x30 [ 1297.099854] ? kvm_sched_clock_read+0x5/0x10 [ 1297.100328] ? sched_clock+0x5/0x10 [ 1297.100801] ? sched_clock_cpu+0x12/0x180 [ 1297.101272] ? __x64_sys_ioctl+0x83/0xb0 [ 1297.101739] __x64_sys_ioctl+0x83/0xb0 [ 1297.102207] do_syscall_64+0x33/0x80 [ 1297.102673] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 1297.103148] RIP: 0033:0x7f773ff65d87 This is because during the quota enable ioctl we lock first the mutex qgroup_ioctl_lock and then start a transaction, and starting a transaction acquires a fs freeze semaphore (at the VFS level). However, every other code path, except for the quota disable ioctl path, we do the opposite: we start a transaction and then lock the mutex. So fix this by making the quota enable and disable paths to start the transaction without having the mutex locked, and then, after starting the transaction, lock the mutex and check if some other task already enabled or disabled the quotas, bailing with success if that was the case. Signed-off-by: Filipe Manana <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 7aa6d35 commit a855fbe

File tree

2 files changed

+53
-9
lines changed

2 files changed

+53
-9
lines changed

fs/btrfs/ctree.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,10 @@ struct btrfs_fs_info {
878878
*/
879879
struct ulist *qgroup_ulist;
880880

881-
/* protect user change for quota operations */
881+
/*
882+
* Protect user change for quota operations. If a transaction is needed,
883+
* it must be started before locking this lock.
884+
*/
882885
struct mutex qgroup_ioctl_lock;
883886

884887
/* list of dirty qgroups to be written at next commit */

fs/btrfs/qgroup.c

+49-8
Original file line numberDiff line numberDiff line change
@@ -937,22 +937,39 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
937937
struct btrfs_key found_key;
938938
struct btrfs_qgroup *qgroup = NULL;
939939
struct btrfs_trans_handle *trans = NULL;
940+
struct ulist *ulist = NULL;
940941
int ret = 0;
941942
int slot;
942943

943944
mutex_lock(&fs_info->qgroup_ioctl_lock);
944945
if (fs_info->quota_root)
945946
goto out;
946947

947-
fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
948-
if (!fs_info->qgroup_ulist) {
948+
ulist = ulist_alloc(GFP_KERNEL);
949+
if (!ulist) {
949950
ret = -ENOMEM;
950951
goto out;
951952
}
952953

953954
ret = btrfs_sysfs_add_qgroups(fs_info);
954955
if (ret < 0)
955956
goto out;
957+
958+
/*
959+
* Unlock qgroup_ioctl_lock before starting the transaction. This is to
960+
* avoid lock acquisition inversion problems (reported by lockdep) between
961+
* qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
962+
* start a transaction.
963+
* After we started the transaction lock qgroup_ioctl_lock again and
964+
* check if someone else created the quota root in the meanwhile. If so,
965+
* just return success and release the transaction handle.
966+
*
967+
* Also we don't need to worry about someone else calling
968+
* btrfs_sysfs_add_qgroups() after we unlock and getting an error because
969+
* that function returns 0 (success) when the sysfs entries already exist.
970+
*/
971+
mutex_unlock(&fs_info->qgroup_ioctl_lock);
972+
956973
/*
957974
* 1 for quota root item
958975
* 1 for BTRFS_QGROUP_STATUS item
@@ -962,12 +979,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
962979
* would be a lot of overkill.
963980
*/
964981
trans = btrfs_start_transaction(tree_root, 2);
982+
983+
mutex_lock(&fs_info->qgroup_ioctl_lock);
965984
if (IS_ERR(trans)) {
966985
ret = PTR_ERR(trans);
967986
trans = NULL;
968987
goto out;
969988
}
970989

990+
if (fs_info->quota_root)
991+
goto out;
992+
993+
fs_info->qgroup_ulist = ulist;
994+
ulist = NULL;
995+
971996
/*
972997
* initially create the quota tree
973998
*/
@@ -1125,11 +1150,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
11251150
if (ret) {
11261151
ulist_free(fs_info->qgroup_ulist);
11271152
fs_info->qgroup_ulist = NULL;
1128-
if (trans)
1129-
btrfs_end_transaction(trans);
11301153
btrfs_sysfs_del_qgroups(fs_info);
11311154
}
11321155
mutex_unlock(&fs_info->qgroup_ioctl_lock);
1156+
if (ret && trans)
1157+
btrfs_end_transaction(trans);
1158+
else if (trans)
1159+
ret = btrfs_end_transaction(trans);
1160+
ulist_free(ulist);
11331161
return ret;
11341162
}
11351163

@@ -1142,19 +1170,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
11421170
mutex_lock(&fs_info->qgroup_ioctl_lock);
11431171
if (!fs_info->quota_root)
11441172
goto out;
1173+
mutex_unlock(&fs_info->qgroup_ioctl_lock);
11451174

11461175
/*
11471176
* 1 For the root item
11481177
*
11491178
* We should also reserve enough items for the quota tree deletion in
11501179
* btrfs_clean_quota_tree but this is not done.
1180+
*
1181+
* Also, we must always start a transaction without holding the mutex
1182+
* qgroup_ioctl_lock, see btrfs_quota_enable().
11511183
*/
11521184
trans = btrfs_start_transaction(fs_info->tree_root, 1);
1185+
1186+
mutex_lock(&fs_info->qgroup_ioctl_lock);
11531187
if (IS_ERR(trans)) {
11541188
ret = PTR_ERR(trans);
1189+
trans = NULL;
11551190
goto out;
11561191
}
11571192

1193+
if (!fs_info->quota_root)
1194+
goto out;
1195+
11581196
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
11591197
btrfs_qgroup_wait_for_completion(fs_info, false);
11601198
spin_lock(&fs_info->qgroup_lock);
@@ -1168,13 +1206,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
11681206
ret = btrfs_clean_quota_tree(trans, quota_root);
11691207
if (ret) {
11701208
btrfs_abort_transaction(trans, ret);
1171-
goto end_trans;
1209+
goto out;
11721210
}
11731211

11741212
ret = btrfs_del_root(trans, &quota_root->root_key);
11751213
if (ret) {
11761214
btrfs_abort_transaction(trans, ret);
1177-
goto end_trans;
1215+
goto out;
11781216
}
11791217

11801218
list_del(&quota_root->dirty_list);
@@ -1186,10 +1224,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
11861224

11871225
btrfs_put_root(quota_root);
11881226

1189-
end_trans:
1190-
ret = btrfs_end_transaction(trans);
11911227
out:
11921228
mutex_unlock(&fs_info->qgroup_ioctl_lock);
1229+
if (ret && trans)
1230+
btrfs_end_transaction(trans);
1231+
else if (trans)
1232+
ret = btrfs_end_transaction(trans);
1233+
11931234
return ret;
11941235
}
11951236

0 commit comments

Comments
 (0)