Skip to content

Commit 3006911

Browse files
committed
feat: convenience command to capture logs from external scripts
1 parent ba5e0c5 commit 3006911

File tree

5 files changed

+179
-11
lines changed

5 files changed

+179
-11
lines changed

maxray/__init__.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def maxray(
260260
*,
261261
mutable=True,
262262
pass_scope=False,
263+
initial_scope={},
263264
):
264265
"""
265266
A transform that recursively hooks into all further calls made within the function, so that `writer` will (in theory) observe every single expression evaluated by the Python interpreter occurring as part of the decorated function call.
@@ -284,7 +285,7 @@ def maxray(
284285
# del frame
285286

286287
# TODO: allow configuring injection of variables into exec scope
287-
caller_locals = {}
288+
caller_locals = initial_scope
288289

289290
def recursive_transform(fn):
290291
_MAXRAY_REGISTERED_HOOKS.append(
@@ -304,7 +305,7 @@ def recursive_transform(fn):
304305
match recompile_fn_with_transform(
305306
fn,
306307
_maxray_walker_handler,
307-
initial_scope=caller_locals,
308+
override_scope=caller_locals,
308309
pass_scope=pass_scope,
309310
is_maxray_root=True,
310311
):

maxray/capture/logs.py

+44-6
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,27 @@
99
from contextvars import ContextVar
1010
from functools import wraps
1111
import time
12+
from pathlib import Path
1213

1314
from loguru import logger
1415

1516

1617
class CaptureLogs:
1718
instance = ContextVar("CaptureLogs")
1819

20+
LSP_NO_SHOW_TYPES = {
21+
"type",
22+
"function",
23+
"builtin_function_or_method",
24+
"staticmethod",
25+
"method",
26+
"module",
27+
"NoneType",
28+
"int", # mostly uninteresting and makes array indexing expressions cluttered
29+
"int32",
30+
"int64",
31+
}
32+
1933
@staticmethod
2034
def extractor(x, ctx: NodeContext):
2135
if isinstance(instance := CaptureLogs.instance.get(None), CaptureLogs):
@@ -36,13 +50,24 @@ def extractor(x, ctx: NodeContext):
3650
instance.builder("source_file").append(ctx.fn_context.source_file)
3751
instance.builder("source").append(ctx.source)
3852

39-
assert isinstance(ctx.fn_context.name, str)
4053
instance.builder("fn").append(ctx.fn_context.name)
4154
instance.builder("fn_call_count").append(
4255
ctx.fn_context.call_count.get()
4356
)
44-
instance.builder("struct_repr").append(structured_repr(x))
45-
instance.builder("value_type").append(repr(type(x)))
57+
58+
struct_repr = structured_repr(x)
59+
instance.builder("struct_repr").append(struct_repr)
60+
61+
value_type = repr(type(x))
62+
instance.builder("value_type").append(value_type)
63+
64+
type_name = type(x).__name__
65+
if type_name in CaptureLogs.LSP_NO_SHOW_TYPES:
66+
instance.builder("lsp_repr").append(None)
67+
else:
68+
instance.builder("lsp_repr").append(
69+
f" {struct_repr}"
70+
) # space inserted for formatting
4671

4772
instance.builder("timestamp").append(time.perf_counter())
4873

@@ -52,7 +77,9 @@ def extractor(x, ctx: NodeContext):
5277

5378
return x
5479

55-
def __init__(self):
80+
def __init__(self, from_script_path=None):
81+
# TODO: support in-memory mode
82+
5683
# Maps function UUIDs (_MAXRAY_TRANSFORM_ID) to FnContext instances
5784
self.fn_sources = {}
5885

@@ -74,11 +101,16 @@ def __init__(self):
74101
"source": pa.string(),
75102
# Extracted data
76103
"struct_repr": pa.string(),
104+
"lsp_repr": pa.string(),
77105
"value_type": pa.string(),
78106
"timestamp": pa.float64(),
79107
}
80108

81-
self.save_to = "/tmp/maxray-logs.arrow"
109+
log_file_name = ".maxray-logs.arrow"
110+
if from_script_path is not None:
111+
self.save_to = Path(from_script_path).resolve(True).parent / log_file_name
112+
else:
113+
self.save_to = Path("/tmp") / log_file_name
82114

83115
self.flush_every_records = 10_000
84116

@@ -94,6 +126,10 @@ def builder(self, name: str):
94126
return self.builders[name]
95127

96128
def flush(self):
129+
if not self.builders:
130+
logger.warning("Nothing to flush")
131+
return
132+
97133
arrays, names = [], []
98134
for col_name, col_type in self.type_schema.items():
99135
builder = self.builders[col_name]
@@ -111,14 +147,16 @@ def __enter__(self):
111147

112148
# TODO: forbid re-entry
113149

114-
self.sink = self.write_context.enter_context(pa.OSFile(self.save_to, "wb"))
150+
self.sink = self.write_context.enter_context(pa.OSFile(str(self.save_to), "wb"))
115151
self.writer = self.write_context.enter_context(
116152
pa.ipc.new_file(self.sink, self.schema())
117153
)
118154
return self
119155

120156
def __exit__(self, exc_type, exc_val, exc_tb):
121157
try:
158+
self.flush()
159+
122160
if exc_type is not None:
123161
return
124162

maxray/capture/logs_cli.py

+124
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
from dataclasses import dataclass, field
2+
from pathlib import Path
3+
from maxray import xray, _set_logging, NodeContext
4+
from maxray.capture.logs import CaptureLogs
5+
6+
import ast
7+
from textwrap import indent
8+
import sys
9+
import tempfile
10+
from typing import Any
11+
from copy import copy
12+
from types import ModuleType
13+
14+
import numpy as np
15+
16+
17+
# assign some dummy module that doesn't conflict
18+
sys.modules["override_mod"] = np
19+
20+
21+
@dataclass
22+
class WrapScriptIntoMain:
23+
"""
24+
Convenience utility: Sometimes you just want to profile/annotate a script once without modifying any source code.
25+
This makes a copy of the script file, wrapping its contents in a callable "main" function to which `maxray` transforms can then be applied.
26+
"""
27+
28+
script_path: str
29+
temp_sourcefile: Any = field(
30+
default_factory=lambda: tempfile.NamedTemporaryFile("w", delete=False)
31+
)
32+
33+
def build(self):
34+
with open(self.script_path, "r") as file:
35+
source = file.read()
36+
37+
new_source = f"""def main():
38+
{indent(source, ' ')}
39+
"""
40+
41+
# inspect.getsource relies on the file actually existing
42+
with open(self.temp_sourcefile.name, "w") as f:
43+
f.write(new_source)
44+
45+
tree = ast.parse(new_source, filename=self.temp_sourcefile.name)
46+
main_func = ast.fix_missing_locations(tree)
47+
48+
# BUG: will error if `import *` is used (doesn't work inside a fn `def`)
49+
compiled_code = compile(
50+
main_func, filename=self.temp_sourcefile.name, mode="exec"
51+
)
52+
53+
namespace = {}
54+
exec(compiled_code, namespace)
55+
# TODO: use non-conflicting name other than "main"
56+
main = namespace["main"]
57+
main.__module__ = "override_mod"
58+
return main
59+
60+
def run(self):
61+
fn = xray(self.rewrite_node, initial_scope={"__name__": "__main__"})(
62+
self.build()
63+
)
64+
with CaptureLogs(self.script_path) as cl:
65+
fn()
66+
67+
def rewrite_node(self, x, ctx: NodeContext):
68+
# functions and contextvars can't be deepcopied
69+
ctx = copy(ctx)
70+
ctx.fn_context = copy(ctx.fn_context)
71+
72+
# source_file should never be self.script_path because we've copied
73+
if ctx.fn_context.source_file == self.temp_sourcefile.name:
74+
ctx.fn_context.source_file = self.script_path
75+
76+
# subtract the "def" line and new indentation
77+
ctx.location = (
78+
ctx.location[0] - 1,
79+
ctx.location[1] - 1,
80+
ctx.location[2] - 4,
81+
ctx.location[3] - 4,
82+
)
83+
CaptureLogs.extractor(x, ctx)
84+
return x
85+
86+
@staticmethod
87+
def empty_module(module_name: str):
88+
module_code = ""
89+
90+
module_ast = ast.parse(module_code, filename=f"{module_name}.py")
91+
92+
module_code_object = compile(
93+
module_ast, filename=f"{module_name}.py", mode="exec"
94+
)
95+
96+
module = ModuleType(module_name)
97+
98+
exec(module_code_object, module.__dict__)
99+
100+
return module
101+
102+
103+
def run_script():
104+
_set_logging(True)
105+
106+
match sys.argv:
107+
case (
108+
_,
109+
script_path,
110+
) if (
111+
path := Path(script_path).resolve(True)
112+
).exists() and path.suffix == ".py":
113+
WrapScriptIntoMain(str(path)).run()
114+
115+
case (_0, script_path, "--", *args):
116+
sys.argv = [_0, *args]
117+
path = Path(script_path).resolve(True)
118+
if path.exists() and path.suffix == ".py":
119+
WrapScriptIntoMain(str(path)).run()
120+
121+
case _:
122+
raise RuntimeError(
123+
f"Incorrect argument usage - expected `capture-logs <script_path> -- script_args...` (got {sys.argv[1:]})"
124+
)

maxray/transforms.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ class NodeContext:
5858
"""
5959

6060
location: tuple[int, int, int, int]
61+
"""
62+
(start_line, end_line, start_col, end_col)
63+
"""
6164

6265
local_scope: Any = None
6366

@@ -458,7 +461,7 @@ def recompile_fn_with_transform(
458461
transform_fn,
459462
ast_pre_callback=None,
460463
ast_post_callback=None,
461-
initial_scope={},
464+
override_scope={},
462465
pass_scope=False,
463466
special_use_instance_type=None,
464467
is_maxray_root=False,
@@ -555,7 +558,7 @@ def recompile_fn_with_transform(
555558
fn_call_counter = ContextVar("maxray_call_counter", default=0)
556559
fn_context = FnContext(
557560
source_fn,
558-
source_fn.__name__,
561+
source_fn.__qualname__,
559562
module.__name__,
560563
source,
561564
sourcefile,
@@ -595,7 +598,7 @@ def patch_mro(super_type: super):
595598
"_MAXRAY_BUILTINS_LOCALS": locals,
596599
"_MAXRAY_PATCH_MRO": patch_mro,
597600
},
598-
"override": initial_scope,
601+
"override": override_scope,
599602
"class_local": {},
600603
"module": {},
601604
"closure": {},

pyproject.toml

+2
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ python = "^3.11"
1515
result = "^0.16.1"
1616
loguru = "^0.7.2"
1717

18+
[tool.poetry.scripts]
19+
capture-logs = "maxray.capture.logs_cli:run_script"
1820

1921
[tool.poetry.group.dev.dependencies]
2022
pytest = "^8.1.1"

0 commit comments

Comments
 (0)