Skip to content

Commit 1750612

Browse files
committed
Avoid SIGBUS on Linux when a DSM memory request overruns tmpfs.
On Linux, shared memory segments created with shm_open() are backed by swap files created in tmpfs. If the swap file needs to be extended, but there's no tmpfs space left, you get a very unfriendly SIGBUS trap. To avoid this, force allocation of the full request size when we create the segment. This adds a few cycles, but none that we wouldn't expend later anyway, assuming the request isn't hugely bigger than the actual need. Make this code #ifdef __linux__, because (a) there's not currently a reason to think the same problem exists on other platforms, and (b) applying posix_fallocate() to an FD created by shm_open() isn't very portable anyway. Back-patch to 9.4 where the DSM code came in. Thomas Munro, per a bug report from Amul Sul Discussion: https://postgr.es/m/[email protected]
1 parent 10aafbd commit 1750612

File tree

5 files changed

+60
-4
lines changed

5 files changed

+60
-4
lines changed

configure

+1-1
Original file line numberDiff line numberDiff line change
@@ -12517,7 +12517,7 @@ fi
1251712517
LIBS_including_readline="$LIBS"
1251812518
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
1251912519

12520-
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
12520+
for ac_func in cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l
1252112521
do :
1252212522
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
1252312523
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"

configure.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -1457,7 +1457,7 @@ PGAC_FUNC_WCSTOMBS_L
14571457
LIBS_including_readline="$LIBS"
14581458
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
14591459

1460-
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
1460+
AC_CHECK_FUNCS([cbrt dlopen fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate pstat pthread_is_threaded_np readlink setproctitle setsid shm_open symlink sync_file_range towlower utime utimes wcstombs wcstombs_l])
14611461

14621462
AC_REPLACE_FUNCS(fseeko)
14631463
case $host_os in

src/backend/storage/ipc/dsm_impl.c

+52-2
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
static bool dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
7474
void **impl_private, void **mapped_address,
7575
Size *mapped_size, int elevel);
76+
static int dsm_impl_posix_resize(int fd, off_t size);
7677
#endif
7778
#ifdef USE_DSM_SYSV
7879
static bool dsm_impl_sysv(dsm_op op, dsm_handle handle, Size request_size,
@@ -319,7 +320,8 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
319320
}
320321
request_size = st.st_size;
321322
}
322-
else if (*mapped_size != request_size && ftruncate(fd, request_size))
323+
else if (*mapped_size != request_size &&
324+
dsm_impl_posix_resize(fd, request_size) != 0)
323325
{
324326
int save_errno;
325327

@@ -392,7 +394,55 @@ dsm_impl_posix(dsm_op op, dsm_handle handle, Size request_size,
392394

393395
return true;
394396
}
395-
#endif
397+
398+
/*
399+
* Set the size of a virtual memory region associated with a file descriptor.
400+
* If necessary, also ensure that virtual memory is actually allocated by the
401+
* operating system, to avoid nasty surprises later.
402+
*
403+
* Returns non-zero if either truncation or allocation fails, and sets errno.
404+
*/
405+
static int
406+
dsm_impl_posix_resize(int fd, off_t size)
407+
{
408+
int rc;
409+
410+
/* Truncate (or extend) the file to the requested size. */
411+
rc = ftruncate(fd, size);
412+
413+
/*
414+
* On Linux, a shm_open fd is backed by a tmpfs file. After resizing with
415+
* ftruncate, the file may contain a hole. Accessing memory backed by a
416+
* hole causes tmpfs to allocate pages, which fails with SIGBUS if there
417+
* is no more tmpfs space available. So we ask tmpfs to allocate pages
418+
* here, so we can fail gracefully with ENOSPC now rather than risking
419+
* SIGBUS later.
420+
*/
421+
#if defined(HAVE_POSIX_FALLOCATE) && defined(__linux__)
422+
if (rc == 0)
423+
{
424+
/* We may get interrupted, if so just retry. */
425+
do
426+
{
427+
rc = posix_fallocate(fd, 0, size);
428+
} while (rc == -1 && errno == EINTR);
429+
430+
if (rc != 0 && errno == ENOSYS)
431+
{
432+
/*
433+
* Kernel too old (< 2.6.23). Rather than fail, just trust that
434+
* we won't hit the problem (it typically doesn't show up without
435+
* many-GB-sized requests, anyway).
436+
*/
437+
rc = 0;
438+
}
439+
}
440+
#endif /* HAVE_POSIX_FALLOCATE && __linux__ */
441+
442+
return rc;
443+
}
444+
445+
#endif /* USE_DSM_POSIX */
396446

397447
#ifdef USE_DSM_SYSV
398448
/*

src/include/pg_config.h.in

+3
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,9 @@
390390
/* Define to 1 if you have the `posix_fadvise' function. */
391391
#undef HAVE_POSIX_FADVISE
392392

393+
/* Define to 1 if you have the `posix_fallocate' function. */
394+
#undef HAVE_POSIX_FALLOCATE
395+
393396
/* Define to 1 if the assembler supports PPC's LWARX mutex hint bit. */
394397
#undef HAVE_PPC_LWARX_MUTEX_HINT
395398

src/include/pg_config.h.win32

+3
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@
258258
/* Define to 1 if you have the <poll.h> header file. */
259259
/* #undef HAVE_POLL_H */
260260

261+
/* Define to 1 if you have the `posix_fallocate' function. */
262+
/* #undef HAVE_POSIX_FALLOCATE */
263+
261264
/* Define to 1 if you have the `pstat' function. */
262265
/* #undef HAVE_PSTAT */
263266

0 commit comments

Comments
 (0)