Skip to content

Commit c12abb5

Browse files
joyeecheungRafaelGSS
authored andcommitted
src: make NearHeapLimitCallback() more robust
Instead of removing the callback before generating heap snapshot and then adding it back after the heap snapshot is generated, just remove it once the heap snapshot limit is reached. Otherwise if the worker callback kicks in and sets the heap limit to higher value during the heap snapshot generation, the current_heap_limit in the heap snapshot callback becomes invalid, and we might return a heap limit lower than the current one, resulting in OOM. In addition add more logs and checks in Worker::NearHeapLimit() to help us catch problems. PR-URL: #44581 Refs: nodejs/reliability#372 Reviewed-By: theanarkh <[email protected]> Reviewed-By: Rich Trott <[email protected]>
1 parent 5dd86c3 commit c12abb5

File tree

4 files changed

+38
-19
lines changed

4 files changed

+38
-19
lines changed

src/env-inl.h

+4
Original file line numberDiff line numberDiff line change
@@ -903,6 +903,10 @@ inline void Environment::set_heap_snapshot_near_heap_limit(uint32_t limit) {
903903
heap_snapshot_near_heap_limit_ = limit;
904904
}
905905

906+
inline bool Environment::is_in_heapsnapshot_heap_limit_callback() const {
907+
return is_in_heapsnapshot_heap_limit_callback_;
908+
}
909+
906910
inline void Environment::AddHeapSnapshotNearHeapLimitCallback() {
907911
DCHECK(!heapsnapshot_near_heap_limit_callback_added_);
908912
heapsnapshot_near_heap_limit_callback_added_ = true;

src/env.cc

+20-16
Original file line numberDiff line numberDiff line change
@@ -1838,7 +1838,7 @@ size_t Environment::NearHeapLimitCallback(void* data,
18381838
"Invoked NearHeapLimitCallback, processing=%d, "
18391839
"current_limit=%" PRIu64 ", "
18401840
"initial_limit=%" PRIu64 "\n",
1841-
env->is_processing_heap_limit_callback_,
1841+
env->is_in_heapsnapshot_heap_limit_callback_,
18421842
static_cast<uint64_t>(current_heap_limit),
18431843
static_cast<uint64_t>(initial_heap_limit));
18441844

@@ -1890,8 +1890,8 @@ size_t Environment::NearHeapLimitCallback(void* data,
18901890
// new limit, so in a heap with unbounded growth the isolate
18911891
// may eventually crash with this new limit - effectively raising
18921892
// the heap limit to the new one.
1893-
if (env->is_processing_heap_limit_callback_) {
1894-
size_t new_limit = current_heap_limit + max_young_gen_size;
1893+
size_t new_limit = current_heap_limit + max_young_gen_size;
1894+
if (env->is_in_heapsnapshot_heap_limit_callback_) {
18951895
Debug(env,
18961896
DebugCategory::DIAGNOSTICS,
18971897
"Not generating snapshots in nested callback. "
@@ -1907,14 +1907,14 @@ size_t Environment::NearHeapLimitCallback(void* data,
19071907
Debug(env,
19081908
DebugCategory::DIAGNOSTICS,
19091909
"Not generating snapshots because it's too risky.\n");
1910-
env->RemoveHeapSnapshotNearHeapLimitCallback(initial_heap_limit);
1910+
env->RemoveHeapSnapshotNearHeapLimitCallback(0);
19111911
// The new limit must be higher than current_heap_limit or V8 might
19121912
// crash.
1913-
return current_heap_limit + 1;
1913+
return new_limit;
19141914
}
19151915

19161916
// Take the snapshot synchronously.
1917-
env->is_processing_heap_limit_callback_ = true;
1917+
env->is_in_heapsnapshot_heap_limit_callback_ = true;
19181918

19191919
std::string dir = env->options()->diagnostic_dir;
19201920
if (dir.empty()) {
@@ -1925,29 +1925,33 @@ size_t Environment::NearHeapLimitCallback(void* data,
19251925

19261926
Debug(env, DebugCategory::DIAGNOSTICS, "Start generating %s...\n", *name);
19271927

1928-
// Remove the callback first in case it's triggered when generating
1929-
// the snapshot.
1930-
env->RemoveHeapSnapshotNearHeapLimitCallback(initial_heap_limit);
1931-
19321928
heap::WriteSnapshot(env, filename.c_str());
19331929
env->heap_limit_snapshot_taken_ += 1;
19341930

1935-
// Don't take more snapshots than the number specified by
1936-
// --heapsnapshot-near-heap-limit.
1937-
if (env->heap_limit_snapshot_taken_ < env->heap_snapshot_near_heap_limit_) {
1938-
env->AddHeapSnapshotNearHeapLimitCallback();
1931+
Debug(env,
1932+
DebugCategory::DIAGNOSTICS,
1933+
"%" PRIu32 "/%" PRIu32 " snapshots taken.\n",
1934+
env->heap_limit_snapshot_taken_,
1935+
env->heap_snapshot_near_heap_limit_);
1936+
1937+
// Don't take more snapshots than the limit specified.
1938+
if (env->heap_limit_snapshot_taken_ == env->heap_snapshot_near_heap_limit_) {
1939+
Debug(env,
1940+
DebugCategory::DIAGNOSTICS,
1941+
"Removing the near heap limit callback");
1942+
env->RemoveHeapSnapshotNearHeapLimitCallback(0);
19391943
}
19401944

19411945
FPrintF(stderr, "Wrote snapshot to %s\n", filename.c_str());
19421946
// Tell V8 to reset the heap limit once the heap usage falls down to
19431947
// 95% of the initial limit.
19441948
env->isolate()->AutomaticallyRestoreInitialHeapLimit(0.95);
19451949

1946-
env->is_processing_heap_limit_callback_ = false;
1950+
env->is_in_heapsnapshot_heap_limit_callback_ = false;
19471951

19481952
// The new limit must be higher than current_heap_limit or V8 might
19491953
// crash.
1950-
return current_heap_limit + 1;
1954+
return new_limit;
19511955
}
19521956

19531957
inline size_t Environment::SelfSize() const {

src/env.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1433,6 +1433,7 @@ class Environment : public MemoryRetainer {
14331433
void ForEachBindingData(T&& iterator);
14341434

14351435
inline void set_heap_snapshot_near_heap_limit(uint32_t limit);
1436+
inline bool is_in_heapsnapshot_heap_limit_callback() const;
14361437

14371438
inline void AddHeapSnapshotNearHeapLimitCallback();
14381439

@@ -1494,7 +1495,7 @@ class Environment : public MemoryRetainer {
14941495
std::vector<std::string> argv_;
14951496
std::string exec_path_;
14961497

1497-
bool is_processing_heap_limit_callback_ = false;
1498+
bool is_in_heapsnapshot_heap_limit_callback_ = false;
14981499
uint32_t heap_limit_snapshot_taken_ = 0;
14991500
uint32_t heap_snapshot_near_heap_limit_ = 0;
15001501
bool heapsnapshot_near_heap_limit_callback_added_ = false;

src/node_worker.cc

+12-2
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,21 @@ class WorkerThreadData {
244244
size_t Worker::NearHeapLimit(void* data, size_t current_heap_limit,
245245
size_t initial_heap_limit) {
246246
Worker* worker = static_cast<Worker*>(data);
247-
worker->Exit(1, "ERR_WORKER_OUT_OF_MEMORY", "JS heap out of memory");
248247
// Give the current GC some extra leeway to let it finish rather than
249248
// crash hard. We are not going to perform further allocations anyway.
250249
constexpr size_t kExtraHeapAllowance = 16 * 1024 * 1024;
251-
return current_heap_limit + kExtraHeapAllowance;
250+
size_t new_limit = current_heap_limit + kExtraHeapAllowance;
251+
Environment* env = worker->env();
252+
if (env != nullptr) {
253+
DCHECK(!env->is_in_heapsnapshot_heap_limit_callback());
254+
Debug(env,
255+
DebugCategory::DIAGNOSTICS,
256+
"Throwing ERR_WORKER_OUT_OF_MEMORY, "
257+
"new_limit=%" PRIu64 "\n",
258+
static_cast<uint64_t>(new_limit));
259+
}
260+
worker->Exit(1, "ERR_WORKER_OUT_OF_MEMORY", "JS heap out of memory");
261+
return new_limit;
252262
}
253263

254264
void Worker::Run() {

0 commit comments

Comments
 (0)