Skip to content

Commit cd101ea

Browse files
committed
readyset-server: Add HTTP API endpoints for jemalloc profiling
Currently can only be enabled, disabled, and manually dumped; other options like sample rate and dumping to a file at an interval can still only be configured via the `MALLOC_CONF` environment variable (or `_RJEM_MALLOC_CONF` on macOS). Can be used like so: ```sh $ curl -X POST localhost:6033/jemalloc/profiling/activate Memory profiling activated $ curl localhost:6033/jemalloc/profiling/dump > jeprof.heap $ jeprof --svg target/release/readyset jeprof.heap > jeprof.heap.svg $ curl -X POST localhost:6033/jemalloc/profiling/deactivate Memory profiling deactivated ``` Change-Id: Ifc8efa3ccd67d01123b42e0588a45f2c26c947cd Reviewed-on: https://gerrit.readyset.name/c/readyset/+/8503 Reviewed-by: Jason Brown <[email protected]> Tested-by: Buildkite CI
1 parent 166e3ab commit cd101ea

File tree

3 files changed

+105
-42
lines changed

3 files changed

+105
-42
lines changed

readyset-alloc/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ futures = { workspace = true, features = ["executor", "thread-pool"] }
1919
fxhash = { workspace = true }
2020
lazy_static = { workspace = true }
2121
libc = { workspace = true }
22+
tempfile = { workspace = true }
2223
tikv-jemalloc-ctl = { workspace = true }
2324
tikv-jemalloc-sys = { workspace = true, features = ["stats"] }
2425
tikv-jemallocator = { workspace = true, features = [

readyset-alloc/src/jemalloc.rs

+22-15
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ pub fn remove_thread_memory_accessor() {
131131

132132
use std::thread::ThreadId;
133133

134-
pub use self::profiling::{activate_prof, deactivate_prof, dump_prof};
134+
pub use self::profiling::{activate_prof, deactivate_prof, dump_prof, dump_prof_to_string};
135135

136136
/// Returns a very verbose output of jemalloc stats as well as per-thread stats
137137
pub fn dump_stats() -> Result<String, Error> {
@@ -325,7 +325,7 @@ mod tests {
325325

326326
#[cfg(feature = "mem-profiling")]
327327
mod profiling {
328-
use std::ffi::CString;
328+
use std::{ffi::CString, os::unix::ffi::OsStrExt, path::Path};
329329

330330
use libc::c_char;
331331

@@ -360,26 +360,32 @@ mod profiling {
360360
}
361361

362362
/// Dump the profile to the `path`.
363-
pub fn dump_prof(path: &str) -> ProfResult<()> {
364-
let mut bytes = CString::new(path)?.into_bytes_with_nul();
363+
pub fn dump_prof(path: impl AsRef<Path>) -> ProfResult<()> {
364+
let mut bytes = CString::new(path.as_ref().as_os_str().as_bytes())?.into_bytes_with_nul();
365365
let ptr = bytes.as_mut_ptr() as *mut c_char;
366366
unsafe {
367367
if let Err(e) = tikv_jemalloc_ctl::raw::write(PROF_DUMP, ptr) {
368368
return Err(ProfError::JemallocError(format!(
369369
"failed to dump the profile to {:?}: {}",
370-
path, e
370+
path.as_ref(),
371+
e
371372
)));
372373
}
373374
}
374375
Ok(())
375376
}
376377

378+
pub async fn dump_prof_to_string() -> ProfResult<String> {
379+
let tempdir = tempfile::Builder::new().prefix("jeprof").tempdir()?;
380+
let path = tempdir.path().join("jeprof.out");
381+
dump_prof(&path)?;
382+
Ok(tokio::fs::read_to_string(path).await?)
383+
}
384+
377385
#[cfg(test)]
378386
mod tests {
379387
use std::fs;
380388

381-
use tempfile::Builder;
382-
383389
const OPT_PROF: &[u8] = b"opt.prof\0";
384390

385391
fn is_profiling_on() -> bool {
@@ -407,17 +413,15 @@ mod profiling {
407413
// Make sure somebody has turned on profiling
408414
assert!(is_profiling_on(), "set MALLOC_CONF=prof:true");
409415

410-
let dir = Builder::new()
416+
let dir = tempfile::Builder::new()
411417
.prefix("test_profiling_memory")
412418
.tempdir()
413419
.unwrap();
414420

415-
let os_path = dir.path().to_path_buf().join("test1.dump").into_os_string();
416-
let path = os_path.into_string().unwrap();
421+
let path = dir.path().join("test1.dump");
417422
super::dump_prof(&path).unwrap();
418423

419-
let os_path = dir.path().to_path_buf().join("test2.dump").into_os_string();
420-
let path = os_path.into_string().unwrap();
424+
let path = dir.path().join("test2.dump");
421425
super::dump_prof(&path).unwrap();
422426

423427
let files = fs::read_dir(dir.path()).unwrap().count();
@@ -445,13 +449,16 @@ mod profiling {
445449
mod profiling {
446450
use super::{ProfError, ProfResult};
447451

448-
pub fn dump_prof(_path: &str) -> ProfResult<()> {
449-
Err(ProfError::MemProfilingNotEnabled)
450-
}
451452
pub fn activate_prof() -> ProfResult<()> {
452453
Err(ProfError::MemProfilingNotEnabled)
453454
}
454455
pub fn deactivate_prof() -> ProfResult<()> {
455456
Err(ProfError::MemProfilingNotEnabled)
456457
}
458+
pub fn dump_prof(path: impl AsRef<Path>) -> ProfResult<()> {
459+
Err(ProfError::MemProfilingNotEnabled)
460+
}
461+
pub async fn dump_prof_to_string() -> ProfResult<String> {
462+
Err(ProfError::MemProfilingNotEnabled)
463+
}
457464
}

readyset-server/src/http_router.rs

+82-27
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@ use health_reporter::{HealthReporter, State};
1111
use hyper::header::CONTENT_TYPE;
1212
use hyper::service::make_service_fn;
1313
use hyper::{self, Body, Method, Request, Response, StatusCode};
14-
use readyset_alloc::{dump_stats, print_memory_and_per_thread_stats};
14+
use readyset_alloc::{
15+
activate_prof, deactivate_prof, dump_prof_to_string, dump_stats,
16+
print_memory_and_per_thread_stats,
17+
};
1518
use readyset_client::metrics::recorded;
1619
use readyset_errors::ReadySetError;
1720
use readyset_util::shutdown::ShutdownReceiver;
@@ -112,7 +115,7 @@ impl Service<Request<Body>> for NoriaServerHttpRouter {
112115
let contents = match bincode::deserialize(&body) {
113116
Err(_) => {
114117
return Ok(res
115-
.status(400)
118+
.status(StatusCode::BAD_REQUEST)
116119
.header(CONTENT_TYPE, "text/plain")
117120
.body(hyper::Body::from(
118121
"body cannot be deserialized into failpoint name and action",
@@ -123,7 +126,7 @@ impl Service<Request<Body>> for NoriaServerHttpRouter {
123126
};
124127
let (name, action): (String, String) = contents;
125128
let resp = res
126-
.status(200)
129+
.status(StatusCode::OK)
127130
.header(CONTENT_TYPE, "text/plain")
128131
.body(hyper::Body::from(
129132
::bincode::serialize(&fail::cfg(name, &action)).unwrap(),
@@ -147,7 +150,7 @@ impl Service<Request<Body>> for NoriaServerHttpRouter {
147150
let res = match render {
148151
Some(metrics) => res.body(hyper::Body::from(metrics)),
149152
None => res
150-
.status(404)
153+
.status(StatusCode::NOT_FOUND)
151154
.body(hyper::Body::from("Prometheus metrics were not enabled. To fix this, run Noria with --prometheus-metrics".to_string())),
152155
};
153156
Box::pin(async move { Ok(res.unwrap()) })
@@ -158,11 +161,11 @@ impl Service<Request<Body>> for NoriaServerHttpRouter {
158161
let body = format!("Server is in {} state", &state).into();
159162
let res = match state {
160163
State::Healthy | State::ShuttingDown => res
161-
.status(200)
164+
.status(StatusCode::OK)
162165
.header(CONTENT_TYPE, "text/plain")
163166
.body(body),
164167
_ => res
165-
.status(500)
168+
.status(StatusCode::INTERNAL_SERVER_ERROR)
166169
.header(CONTENT_TYPE, "text/plain")
167170
.body(body),
168171
};
@@ -177,7 +180,7 @@ impl Service<Request<Body>> for NoriaServerHttpRouter {
177180
.header(CONTENT_TYPE, "application/json")
178181
.body(hyper::Body::from(metrics)),
179182
None => res
180-
.status(404)
183+
.status(StatusCode::NOT_FOUND)
181184
.header(CONTENT_TYPE, "text/plain")
182185
.body(hyper::Body::from("Noria metrics were not enabled. To fix this, run Noria with --noria-metrics".to_string())),
183186
};
@@ -242,35 +245,87 @@ impl Service<Request<Body>> for NoriaServerHttpRouter {
242245
}
243246
// Returns a summary of memory usage for the entire process and per-thread memory usage
244247
(&Method::POST, "/memory_stats") => {
245-
let res =
246-
match print_memory_and_per_thread_stats() {
247-
Ok(stats) => res
248-
.status(200)
249-
.header(CONTENT_TYPE, "text/plain")
250-
.body(hyper::Body::from(stats)),
251-
Err(e) => res.status(500).header(CONTENT_TYPE, "text/plain").body(
252-
hyper::Body::from(format!("Error fetching memory stats: {e}")),
253-
),
254-
};
248+
let res = match print_memory_and_per_thread_stats() {
249+
Ok(stats) => res
250+
.status(StatusCode::OK)
251+
.header(CONTENT_TYPE, "text/plain")
252+
.body(hyper::Body::from(stats)),
253+
Err(e) => res
254+
.status(StatusCode::INTERNAL_SERVER_ERROR)
255+
.header(CONTENT_TYPE, "text/plain")
256+
.body(hyper::Body::from(format!(
257+
"Error fetching memory stats: {e}"
258+
))),
259+
};
255260

256261
Box::pin(async move { Ok(res.unwrap()) })
257262
}
258263
// Returns a large dump of jemalloc debugging information along with per-thread
259264
// memory stats
260265
(&Method::POST, "/memory_stats_verbose") => {
261-
let res =
262-
match dump_stats() {
263-
Ok(stats) => res
264-
.status(200)
265-
.header(CONTENT_TYPE, "text/plain")
266-
.body(hyper::Body::from(stats)),
267-
Err(e) => res.status(500).header(CONTENT_TYPE, "text/plain").body(
268-
hyper::Body::from(format!("Error fetching memory stats: {e}")),
269-
),
270-
};
266+
let res = match dump_stats() {
267+
Ok(stats) => res
268+
.status(StatusCode::OK)
269+
.header(CONTENT_TYPE, "text/plain")
270+
.body(hyper::Body::from(stats)),
271+
Err(e) => res
272+
.status(StatusCode::INTERNAL_SERVER_ERROR)
273+
.header(CONTENT_TYPE, "text/plain")
274+
.body(hyper::Body::from(format!(
275+
"Error fetching memory stats: {e}"
276+
))),
277+
};
271278

272279
Box::pin(async move { Ok(res.unwrap()) })
273280
}
281+
// Turns on jemalloc's profiler
282+
(&Method::POST, "/jemalloc/profiling/activate") => {
283+
let res = match activate_prof() {
284+
Ok(_) => res
285+
.status(StatusCode::OK)
286+
.header(CONTENT_TYPE, "text/plain")
287+
.body(hyper::Body::from("Memory profiling activated")),
288+
Err(e) => res
289+
.status(StatusCode::INTERNAL_SERVER_ERROR)
290+
.header(CONTENT_TYPE, "text/plain")
291+
.body(hyper::Body::from(format!(
292+
"Error activating memory profiling: {e}"
293+
))),
294+
};
295+
Box::pin(async move { Ok(res.unwrap()) })
296+
}
297+
// Disables jemalloc's profiler
298+
(&Method::POST, "/jemalloc/profiling/deactivate") => {
299+
let res = match deactivate_prof() {
300+
Ok(_) => res
301+
.status(StatusCode::OK)
302+
.header(CONTENT_TYPE, "text/plain")
303+
.body(hyper::Body::from("Memory profiling deactivated")),
304+
Err(e) => res
305+
.status(StatusCode::INTERNAL_SERVER_ERROR)
306+
.header(CONTENT_TYPE, "text/plain")
307+
.body(hyper::Body::from(format!(
308+
"Error deactivating memory profiling: {e}"
309+
))),
310+
};
311+
Box::pin(async move { Ok(res.unwrap()) })
312+
}
313+
// Returns the current jemalloc profiler output
314+
(&Method::GET, "/jemalloc/profiling/dump") => Box::pin(async move {
315+
let res = match dump_prof_to_string().await {
316+
Ok(dump) => res
317+
.status(StatusCode::OK)
318+
.header(CONTENT_TYPE, "text/plain")
319+
.body(hyper::Body::from(dump)),
320+
Err(e) => res
321+
.status(StatusCode::INTERNAL_SERVER_ERROR)
322+
.header(CONTENT_TYPE, "text/plain")
323+
.body(hyper::Body::from(format!(
324+
"Error dumping profiling output: {e}"
325+
))),
326+
};
327+
Ok(res.unwrap())
328+
}),
274329
_ => {
275330
metrics::counter!(recorded::SERVER_CONTROLLER_REQUESTS).increment(1);
276331

0 commit comments

Comments
 (0)