20
20
//
21
21
// SPDX-License-Identifier: MIT
22
22
23
+ // The functions in this file map the .text section of Node.js into 2MB pages.
24
+ // They perform the following steps:
25
+ //
26
+ // 1: Find the Node.js binary's `.text` section in memory. This is done below in
27
+ // `FindNodeTextRegion`. It is accomplished in a platform-specific way. On
28
+ // Linux and FreeBSD, `dl_iterate_phdr(3)` is used. When the region is found,
29
+ // it is "trimmed" as follows:
30
+ // * Modify the start to point to the very beginning of the Node.js `.text`
31
+ // section (from symbol `__node_text_start` declared in node_text_start.S).
32
+ // * Possibly modify the end to account for the `lpstub` section which
33
+ // contains `MoveTextRegionToLargePages`, the function we do not wish to
34
+ // move (see below).
35
+ // * Align the address of the start to its nearest higher large page
36
+ // boundary.
37
+ // * Align the address of the end to its nearest lower large page boundary.
38
+ //
39
+ // 2: Move the text region to large pages. This is done below in
40
+ // `MoveTextRegionToLargePages`. We need to be very careful:
41
+ // a) `MoveTextRegionToLargePages` itself should not be moved.
42
+ // We use gcc attributes
43
+ // (__section__) to put it outside the `.text` section,
44
+ // (__aligned__) to align it at the 2M boundary, and
45
+ // (__noline__) to not inline this function.
46
+ // b) `MoveTextRegionToLargePages` should not call any function(s) that might
47
+ // be moved.
48
+ // To move the .text section, perform the following steps:
49
+ // * Map a new, temporary area and copy the original code there.
50
+ // * Use mmap using the start address with MAP_FIXED so we get exactly the
51
+ // same virtual address (except on OSX). On platforms other than Linux,
52
+ // use mmap flags to request hugepages.
53
+ // * On Linux use madvise with MADV_HUGEPAGE to use anonymous 2MB pages.
54
+ // * If successful copy the code to the newly mapped area and protect it to
55
+ // be readable and executable.
56
+ // * Unmap the temporary area.
57
+
23
58
#include " node_large_page.h"
24
59
25
60
#include < cerrno> // NOLINT(build/include)
26
61
27
62
// Besides returning ENOTSUP at runtime we do nothing if this define is missing.
28
63
#if defined(NODE_ENABLE_LARGE_CODE_PAGES) && NODE_ENABLE_LARGE_CODE_PAGES
29
64
#include " debug_utils-inl.h"
30
- #include " util.h"
31
- #include " uv.h"
32
65
33
66
#if defined(__linux__) || defined(__FreeBSD__)
34
- #include < string.h>
35
67
#if defined(__linux__)
36
68
#ifndef _GNU_SOURCE
37
69
#define _GNU_SOURCE
38
70
#endif // ifndef _GNU_SOURCE
71
+ #elif defined(__FreeBSD__)
72
+ #include " uv.h" // uv_exepath
39
73
#endif // defined(__linux__)
40
74
#include < link.h>
41
75
#endif // defined(__linux__) || defined(__FreeBSD__)
44
78
#include < sys/mman.h>
45
79
#if defined(__FreeBSD__)
46
80
#include < sys/sysctl.h>
47
- #include < sys/user.h>
48
81
#elif defined(__APPLE__)
49
82
#include < mach/vm_map.h>
50
83
#endif
51
- #include < unistd.h> // getpid
52
84
53
85
#include < climits> // PATH_MAX
54
- #include < clocale>
55
- #include < csignal>
56
86
#include < cstdlib>
57
87
#include < cstdint>
58
88
#include < cstring>
59
89
#include < string>
60
90
#include < fstream>
61
- #include < iostream>
62
- #include < vector>
63
-
64
- // The functions in this file map the text segment of node into 2M pages.
65
- // The algorithm is simple
66
- // Find the text region of node binary in memory
67
- // 1: Examine the /proc/self/maps to determine the currently mapped text
68
- // region and obtain the start and end
69
- // Modify the start to point to the very beginning of node text segment
70
- // (from variable nodetext setup in ld.script)
71
- // Align the address of start and end to Large Page Boundaries
72
- //
73
- // 2: Move the text region to large pages
74
- // Map a new area and copy the original code there
75
- // Use mmap using the start address with MAP_FIXED so we get exactly the
76
- // same virtual address
77
- // Use madvise with MADV_HUGEPAGE to use Anonymous 2M Pages
78
- // If successful copy the code there and unmap the original region.
79
91
80
92
#if defined(__linux__) || defined(__FreeBSD__)
81
93
extern " C" {
@@ -282,20 +294,44 @@ bool IsSuperPagesEnabled() {
282
294
}
283
295
#endif
284
296
297
+ // Functions in this class must always be inlined because they must end up in
298
+ // the `lpstub` section rather than the `.text` section.
299
+ class MemoryMapPointer {
300
+ public:
301
+ FORCE_INLINE explicit MemoryMapPointer () {}
302
+ FORCE_INLINE bool operator ==(void * rhs) const { return mem_ == rhs; }
303
+ FORCE_INLINE void * mem () const { return mem_; }
304
+ MemoryMapPointer (const MemoryMapPointer&) = delete ;
305
+ MemoryMapPointer (MemoryMapPointer&&) = delete ;
306
+ void operator = (const MemoryMapPointer&) = delete ;
307
+ void operator = (const MemoryMapPointer&&) = delete ;
308
+ FORCE_INLINE void Reset (void * start,
309
+ size_t size,
310
+ int prot,
311
+ int flags,
312
+ int fd = -1 ,
313
+ size_t offset = 0 ) {
314
+ mem_ = mmap (start, size, prot, flags, fd, offset);
315
+ size_ = size;
316
+ }
317
+ FORCE_INLINE void Reset () {
318
+ mem_ = nullptr ;
319
+ size_ = 0 ;
320
+ }
321
+ FORCE_INLINE ~MemoryMapPointer () {
322
+ if (mem_ == nullptr ) return ;
323
+ if (mem_ == MAP_FAILED) return ;
324
+ if (munmap (mem_, size_) == 0 ) return ;
325
+ PrintSystemError (errno);
326
+ }
327
+
328
+ private:
329
+ size_t size_ = 0 ;
330
+ void * mem_ = nullptr ;
331
+ };
332
+
285
333
} // End of anonymous namespace
286
334
287
- // Moving the text region to large pages. We need to be very careful.
288
- // 1: This function itself should not be moved.
289
- // We use a gcc attributes
290
- // (__section__) to put it outside the ".text" section
291
- // (__aligned__) to align it at 2M boundary
292
- // (__noline__) to not inline this function
293
- // 2: This function should not call any function(s) that might be moved.
294
- // a. map a new area and copy the original code there
295
- // b. mmap using the start address with MAP_FIXED so we get exactly
296
- // the same virtual address (except on macOS).
297
- // c. madvise with MADV_HUGEPAGE
298
- // d. If successful copy the code there and unmap the original region
299
335
int
300
336
#if !defined(__APPLE__)
301
337
__attribute__ ((__section__(" lpstub" )))
@@ -305,62 +341,56 @@ __attribute__((__section__("__TEXT,__lpstub")))
305
341
__attribute__ ((__aligned__(hps)))
306
342
__attribute__((__noinline__))
307
343
MoveTextRegionToLargePages(const text_region& r) {
308
- void * nmem = nullptr ;
309
- void * tmem = nullptr ;
344
+ MemoryMapPointer nmem;
345
+ MemoryMapPointer tmem;
310
346
void * start = r.from ;
311
347
size_t size = r.to - r.from ;
312
348
313
- auto free_mems = OnScopeLeave ([&nmem, &tmem, size]() {
314
- if (nmem != nullptr && nmem != MAP_FAILED && munmap (nmem, size) == -1 )
315
- PrintSystemError (errno);
316
- if (tmem != nullptr && tmem != MAP_FAILED && munmap (tmem, size) == -1 )
317
- PrintSystemError (errno);
318
- });
319
-
320
- // Allocate temporary region and back up the code we will re-map.
321
- nmem = mmap (nullptr , size,
322
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1 , 0 );
323
- if (nmem == MAP_FAILED) goto fail;
324
- memcpy (nmem, r.from , size);
349
+ // Allocate a temporary region and back up the code we will re-map.
350
+ nmem.Reset (nullptr , size,
351
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS);
352
+ if (nmem.mem () == MAP_FAILED) goto fail;
353
+ memcpy (nmem.mem (), r.from , size);
325
354
326
355
#if defined(__linux__)
327
356
// We already know the original page is r-xp
328
357
// (PROT_READ, PROT_EXEC, MAP_PRIVATE)
329
358
// We want PROT_WRITE because we are writing into it.
330
359
// We want it at the fixed address and we use MAP_FIXED.
331
- tmem = mmap (start, size,
332
- PROT_READ | PROT_WRITE | PROT_EXEC,
333
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - 1 , 0 );
334
- if (tmem == MAP_FAILED) goto fail;
335
- if (madvise (tmem, size, 14 /* MADV_HUGEPAGE */ ) == -1 ) goto fail;
336
- memcpy (start, nmem, size);
360
+ tmem. Reset (start, size,
361
+ PROT_READ | PROT_WRITE | PROT_EXEC,
362
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED);
363
+ if (tmem. mem () == MAP_FAILED) goto fail;
364
+ if (madvise (tmem. mem () , size, 14 /* MADV_HUGEPAGE */ ) == -1 ) goto fail;
365
+ memcpy (start, nmem. mem () , size);
337
366
#elif defined(__FreeBSD__)
338
- tmem = mmap (start, size,
339
- PROT_READ | PROT_WRITE | PROT_EXEC,
340
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED |
341
- MAP_ALIGNED_SUPER, - 1 , 0 );
342
- if (tmem == MAP_FAILED) goto fail;
343
- memcpy (start, nmem, size);
367
+ tmem. Reset (start, size,
368
+ PROT_READ | PROT_WRITE | PROT_EXEC,
369
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED |
370
+ MAP_ALIGNED_SUPER);
371
+ if (tmem. mem () == MAP_FAILED) goto fail;
372
+ memcpy (start, nmem. mem () , size);
344
373
#elif defined(__APPLE__)
345
374
// There is not enough room to reserve the mapping close
346
375
// to the region address so we content to give a hint
347
376
// without forcing the new address being closed to.
348
377
// We explicitally gives all permission since we plan
349
378
// to write into it.
350
- tmem = mmap (start, size,
351
- PROT_READ | PROT_WRITE | PROT_EXEC,
352
- MAP_PRIVATE | MAP_ANONYMOUS,
353
- VM_FLAGS_SUPERPAGE_SIZE_2MB, 0 );
354
- if (tmem == MAP_FAILED) goto fail;
355
- memcpy (tmem, nmem, size);
379
+ tmem. Reset (start, size,
380
+ PROT_READ | PROT_WRITE | PROT_EXEC,
381
+ MAP_PRIVATE | MAP_ANONYMOUS,
382
+ VM_FLAGS_SUPERPAGE_SIZE_2MB);
383
+ if (tmem. mem () == MAP_FAILED) goto fail;
384
+ memcpy (tmem. mem () , nmem. mem () , size);
356
385
if (mprotect (start, size, PROT_READ | PROT_WRITE | PROT_EXEC) == -1 )
357
386
goto fail;
358
- memcpy (start, tmem, size);
387
+ memcpy (start, tmem. mem () , size);
359
388
#endif
360
389
361
390
if (mprotect (start, size, PROT_READ | PROT_EXEC) == -1 ) goto fail;
362
- // We need not `munmap(tmem, size)` in the above `OnScopeLeave` on success.
363
- tmem = nullptr ;
391
+
392
+ // We need not `munmap(tmem, size)` on success.
393
+ tmem.Reset ();
364
394
return 0 ;
365
395
fail:
366
396
PrintSystemError (errno);
0 commit comments