-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathwal_per_query.py
executable file
·235 lines (187 loc) · 6.02 KB
/
wal_per_query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#!/usr/bin/python
#
# wal_per_query.py Summarize WAL writes by postgres backend.
# For Linux, uses BCC, eBPF.
# To trace only inside a particular container, you can use
# --container option (that will assume docker), you need to
# provide a namespace identificator. In case of a docker
# container to get one you can first check out container Pid:
#
# docker inspect postgres_test --format='{{.State.Pid}}'
#
# Then use lsns to get a namespace id
#
# lsns -p $PID -t pid
#
# usage: wal_per_query $PG_BIN/postgres [-d] [-p PID] [-n NAMESPACE]
from __future__ import print_function
import argparse
import signal
from time import sleep
from bcc import BPF
import utils
# load BPF program
bpf_text = """
#include <linux/ptrace.h>
#include <uapi/linux/bpf_perf_event.h>
#define HASH_SIZE 2^14
#define QUERY_LEN 100
#define STACK_STORAGE_SIZE 1024
typedef unsigned int uint32;
typedef unsigned long int uint64;
typedef unsigned char uint8;
typedef uint32 TransactionId;
typedef uint8 RmgrId;
typedef uint32 pg_crc32c;
typedef uint64 XLogRecPtr;
typedef struct XLogRecData
{
struct XLogRecData *next; /* next struct in chain, or NULL */
char *data; /* start of rmgr data to include */
uint32 len; /* length of rmgr data to include */
} XLogRecData;
typedef struct XLogRecord
{
uint32 xl_tot_len; /* total len of entire record */
TransactionId xl_xid; /* xact id */
XLogRecPtr xl_prev; /* ptr to previous record in log */
uint8 xl_info; /* flag bits, see below */
RmgrId xl_rmid; /* resource manager for this record */
/* 2 bytes of padding here, initialize to zero */
pg_crc32c xl_crc; /* CRC for this record */
/* XLogRecordBlockHeaders and XLogRecordDataHeader follow, no padding */
} XLogRecord;
struct key_t {
int cpu;
int pid;
u64 namespace;
int len;
char query[QUERY_LEN];
};
struct backend {
int pid;
char query[QUERY_LEN];
};
BPF_HASH(wal_records, struct key_t);
BPF_HASH(queries, u32, struct backend, HASH_SIZE);
static inline __attribute__((always_inline)) int get_key(struct key_t* key) {
key->cpu = bpf_get_smp_processor_id();
key->pid = bpf_get_current_pid_tgid();
struct backend *data = queries.lookup(&(key->pid));
if (data != NULL)
{
bpf_probe_read(&(key->query), QUERY_LEN, &(data->query));
return 1;
}
return 0;
}
int probe_wal_insert_record(struct pt_regs *ctx) {
struct key_t key = {};
int result = get_key(&key);
SAVE_NAMESPACE
CHECK_NAMESPACE
if (result == 0)
return 0;
struct XLogRecData rdata = {};
struct XLogRecord rec = {};
bpf_probe_read(&rdata, sizeof(rdata),
((struct XLogRecData *)PT_REGS_PARM1(ctx)));
bpf_probe_read(&rec, sizeof(rec),
((struct XLogRecord *)rdata.data));
u64 zero = 0, *val;
val = wal_records.lookup_or_init(&key, &zero);
(*val) += rec.xl_tot_len;
return 0;
}
void probe_exec_simple_query(struct pt_regs *ctx, const char *query_string)
{
u32 pid = bpf_get_current_pid_tgid();
struct backend data = {};
data.pid = pid;
bpf_probe_read(&data.query, QUERY_LEN, &(*query_string));
queries.update(&pid, &data);
}
void probe_exec_simple_query_finish(struct pt_regs *ctx)
{
u32 pid = bpf_get_current_pid_tgid();
queries.delete(&pid);
}
"""
# signal handler
def signal_ignore(sig, frame):
print()
def attach(bpf, args):
binary_path = args.path
pid = args.pid
bpf.attach_uprobe(
name=binary_path,
sym="exec_simple_query",
fn_name="probe_exec_simple_query",
pid=pid)
bpf.attach_uretprobe(
name=binary_path,
sym="exec_simple_query",
fn_name="probe_exec_simple_query_finish",
pid=pid)
bpf.attach_uprobe(
name=binary_path,
sym="XLogInsertRecord",
fn_name="probe_wal_insert_record",
pid=pid)
def pre_process(text, args):
text = utils.replace_namespace(text, args)
return text
def output(bpf, fmt="plain"):
if fmt == "plain":
print()
for (k, v) in bpf.get_table('wal_records').items():
query = k.query.decode("ascii", "ignore")
print("[{}:{}] {}: {}".format(
k.pid, k.namespace, query, utils.size(v.value)))
def run(args):
print("Attaching...")
debug = 4 if args.debug else 0
bpf = BPF(text=pre_process(bpf_text, args), debug=debug)
attach(bpf, args)
exiting = False
print("Listening...")
while True:
try:
sleep(args.interval)
output(bpf)
bpf.get_table('wal_records').clear()
if args.debug:
bpf.perf_buffer_poll()
except KeyboardInterrupt:
exiting = True
# as cleanup can take many seconds, trap Ctrl-C:
signal.signal(signal.SIGINT, signal_ignore)
if exiting:
print()
print("Detaching...")
print()
break
output(bpf)
def parse_args():
parser = argparse.ArgumentParser(
description="Summarize WAL written by postgres backend",
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument("path", type=str, help="path to PostgreSQL binary")
parser.add_argument(
"-p", "--pid", type=int, default=-1,
help="trace this PID only")
parser.add_argument(
"-c", "--container", type=str,
help="trace this container only")
parser.add_argument(
"-n", "--namespace", type=int,
help="trace this namespace only")
parser.add_argument(
"-i", "--interval", type=int, default=5,
help="after how many seconds output the result")
parser.add_argument(
"-d", "--debug", action='store_true', default=False,
help="debug mode")
return parser.parse_args()
if __name__ == "__main__":
run(parse_args())