Skip to content

Commit 309ca45

Browse files
authored
src: make NearHeapLimitCallback() more robust
Instead of removing the callback before generating heap snapshot and then adding it back after the heap snapshot is generated, just remove it once the heap snapshot limit is reached. Otherwise if the worker callback kicks in and sets the heap limit to higher value during the heap snapshot generation, the current_heap_limit in the heap snapshot callback becomes invalid, and we might return a heap limit lower than the current one, resulting in OOM. In addition add more logs and checks in Worker::NearHeapLimit() to help us catch problems. PR-URL: #44581 Refs: nodejs/reliability#372 Reviewed-By: theanarkh <[email protected]> Reviewed-By: Rich Trott <[email protected]>
1 parent 2568325 commit 309ca45

File tree

4 files changed

+38
-19
lines changed

4 files changed

+38
-19
lines changed

src/env-inl.h

+4
Original file line numberDiff line numberDiff line change
@@ -896,6 +896,10 @@ inline void Environment::set_heap_snapshot_near_heap_limit(uint32_t limit) {
896896
heap_snapshot_near_heap_limit_ = limit;
897897
}
898898

899+
inline bool Environment::is_in_heapsnapshot_heap_limit_callback() const {
900+
return is_in_heapsnapshot_heap_limit_callback_;
901+
}
902+
899903
inline void Environment::AddHeapSnapshotNearHeapLimitCallback() {
900904
DCHECK(!heapsnapshot_near_heap_limit_callback_added_);
901905
heapsnapshot_near_heap_limit_callback_added_ = true;

src/env.cc

+20-16
Original file line numberDiff line numberDiff line change
@@ -1721,7 +1721,7 @@ size_t Environment::NearHeapLimitCallback(void* data,
17211721
"Invoked NearHeapLimitCallback, processing=%d, "
17221722
"current_limit=%" PRIu64 ", "
17231723
"initial_limit=%" PRIu64 "\n",
1724-
env->is_processing_heap_limit_callback_,
1724+
env->is_in_heapsnapshot_heap_limit_callback_,
17251725
static_cast<uint64_t>(current_heap_limit),
17261726
static_cast<uint64_t>(initial_heap_limit));
17271727

@@ -1773,8 +1773,8 @@ size_t Environment::NearHeapLimitCallback(void* data,
17731773
// new limit, so in a heap with unbounded growth the isolate
17741774
// may eventually crash with this new limit - effectively raising
17751775
// the heap limit to the new one.
1776-
if (env->is_processing_heap_limit_callback_) {
1777-
size_t new_limit = current_heap_limit + max_young_gen_size;
1776+
size_t new_limit = current_heap_limit + max_young_gen_size;
1777+
if (env->is_in_heapsnapshot_heap_limit_callback_) {
17781778
Debug(env,
17791779
DebugCategory::DIAGNOSTICS,
17801780
"Not generating snapshots in nested callback. "
@@ -1790,14 +1790,14 @@ size_t Environment::NearHeapLimitCallback(void* data,
17901790
Debug(env,
17911791
DebugCategory::DIAGNOSTICS,
17921792
"Not generating snapshots because it's too risky.\n");
1793-
env->RemoveHeapSnapshotNearHeapLimitCallback(initial_heap_limit);
1793+
env->RemoveHeapSnapshotNearHeapLimitCallback(0);
17941794
// The new limit must be higher than current_heap_limit or V8 might
17951795
// crash.
1796-
return current_heap_limit + 1;
1796+
return new_limit;
17971797
}
17981798

17991799
// Take the snapshot synchronously.
1800-
env->is_processing_heap_limit_callback_ = true;
1800+
env->is_in_heapsnapshot_heap_limit_callback_ = true;
18011801

18021802
std::string dir = env->options()->diagnostic_dir;
18031803
if (dir.empty()) {
@@ -1808,29 +1808,33 @@ size_t Environment::NearHeapLimitCallback(void* data,
18081808

18091809
Debug(env, DebugCategory::DIAGNOSTICS, "Start generating %s...\n", *name);
18101810

1811-
// Remove the callback first in case it's triggered when generating
1812-
// the snapshot.
1813-
env->RemoveHeapSnapshotNearHeapLimitCallback(initial_heap_limit);
1814-
18151811
heap::WriteSnapshot(env, filename.c_str());
18161812
env->heap_limit_snapshot_taken_ += 1;
18171813

1818-
// Don't take more snapshots than the number specified by
1819-
// --heapsnapshot-near-heap-limit.
1820-
if (env->heap_limit_snapshot_taken_ < env->heap_snapshot_near_heap_limit_) {
1821-
env->AddHeapSnapshotNearHeapLimitCallback();
1814+
Debug(env,
1815+
DebugCategory::DIAGNOSTICS,
1816+
"%" PRIu32 "/%" PRIu32 " snapshots taken.\n",
1817+
env->heap_limit_snapshot_taken_,
1818+
env->heap_snapshot_near_heap_limit_);
1819+
1820+
// Don't take more snapshots than the limit specified.
1821+
if (env->heap_limit_snapshot_taken_ == env->heap_snapshot_near_heap_limit_) {
1822+
Debug(env,
1823+
DebugCategory::DIAGNOSTICS,
1824+
"Removing the near heap limit callback");
1825+
env->RemoveHeapSnapshotNearHeapLimitCallback(0);
18221826
}
18231827

18241828
FPrintF(stderr, "Wrote snapshot to %s\n", filename.c_str());
18251829
// Tell V8 to reset the heap limit once the heap usage falls down to
18261830
// 95% of the initial limit.
18271831
env->isolate()->AutomaticallyRestoreInitialHeapLimit(0.95);
18281832

1829-
env->is_processing_heap_limit_callback_ = false;
1833+
env->is_in_heapsnapshot_heap_limit_callback_ = false;
18301834

18311835
// The new limit must be higher than current_heap_limit or V8 might
18321836
// crash.
1833-
return current_heap_limit + 1;
1837+
return new_limit;
18341838
}
18351839

18361840
inline size_t Environment::SelfSize() const {

src/env.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,7 @@ class Environment : public MemoryRetainer {
10411041
void ForEachBaseObject(T&& iterator);
10421042

10431043
inline void set_heap_snapshot_near_heap_limit(uint32_t limit);
1044+
inline bool is_in_heapsnapshot_heap_limit_callback() const;
10441045

10451046
inline void AddHeapSnapshotNearHeapLimitCallback();
10461047

@@ -1102,7 +1103,7 @@ class Environment : public MemoryRetainer {
11021103
std::vector<std::string> argv_;
11031104
std::string exec_path_;
11041105

1105-
bool is_processing_heap_limit_callback_ = false;
1106+
bool is_in_heapsnapshot_heap_limit_callback_ = false;
11061107
uint32_t heap_limit_snapshot_taken_ = 0;
11071108
uint32_t heap_snapshot_near_heap_limit_ = 0;
11081109
bool heapsnapshot_near_heap_limit_callback_added_ = false;

src/node_worker.cc

+12-2
Original file line numberDiff line numberDiff line change
@@ -244,11 +244,21 @@ class WorkerThreadData {
244244
size_t Worker::NearHeapLimit(void* data, size_t current_heap_limit,
245245
size_t initial_heap_limit) {
246246
Worker* worker = static_cast<Worker*>(data);
247-
worker->Exit(1, "ERR_WORKER_OUT_OF_MEMORY", "JS heap out of memory");
248247
// Give the current GC some extra leeway to let it finish rather than
249248
// crash hard. We are not going to perform further allocations anyway.
250249
constexpr size_t kExtraHeapAllowance = 16 * 1024 * 1024;
251-
return current_heap_limit + kExtraHeapAllowance;
250+
size_t new_limit = current_heap_limit + kExtraHeapAllowance;
251+
Environment* env = worker->env();
252+
if (env != nullptr) {
253+
DCHECK(!env->is_in_heapsnapshot_heap_limit_callback());
254+
Debug(env,
255+
DebugCategory::DIAGNOSTICS,
256+
"Throwing ERR_WORKER_OUT_OF_MEMORY, "
257+
"new_limit=%" PRIu64 "\n",
258+
static_cast<uint64_t>(new_limit));
259+
}
260+
worker->Exit(1, "ERR_WORKER_OUT_OF_MEMORY", "JS heap out of memory");
261+
return new_limit;
252262
}
253263

254264
void Worker::Run() {

0 commit comments

Comments
 (0)