@@ -27,6 +27,7 @@ pprint.defaults = {
27
27
level_width = 80 , -- max width per indent level
28
28
wrap_string = true , -- wrap string when it's longer than level_width
29
29
wrap_array = false , -- wrap every array elements
30
+ string_is_utf8 = true , -- treat string as utf8, and count utf8 char when wrapping, if possible
30
31
sort_keys = true , -- sort table keys
31
32
}
32
33
@@ -42,16 +43,62 @@ local ESCAPE_MAP = {
42
43
}
43
44
44
45
-- generic utilities
45
- local function escape (s )
46
- s = s :gsub (' ([%c\\ ])' , ESCAPE_MAP )
47
- local dq = s :find (' "' )
48
- local sq = s :find (" '" )
49
- if dq and sq then
50
- return s :gsub (' "' , ' \\ "' ), ' "'
51
- elseif sq then
52
- return s , ' "'
53
- else
54
- return s , " '"
46
+ local tokenize_string = function (s )
47
+ local t = {}
48
+ for i = 1 , # s do
49
+ local c = s :sub (i , i )
50
+ local b = c :byte ()
51
+ local e = ESCAPE_MAP [c ]
52
+ if (b >= 0x20 and b < 0x80 ) or e then
53
+ local s = e or c
54
+ t [i ] = { char = s , len = # s }
55
+ else
56
+ t [i ] = { char = string.format (' \\ x%02x' , b ), len = 4 }
57
+ end
58
+ if c == ' "' then
59
+ t .has_double_quote = true
60
+ elseif c == " '" then
61
+ t .has_single_quote = true
62
+ end
63
+ end
64
+ return t
65
+ end
66
+ local tokenize_utf8_string = tokenize_string
67
+
68
+ local has_lpeg , lpeg = pcall (require , ' lpeg' )
69
+
70
+ if has_lpeg then
71
+ local function utf8_valid_char (c )
72
+ return { char = c , len = 1 }
73
+ end
74
+
75
+ local function utf8_invalid_char (c )
76
+ local b = c :byte ()
77
+ local e = ESCAPE_MAP [c ]
78
+ if (b >= 0x20 and b < 0x80 ) or e then
79
+ local s = e or c
80
+ return { char = s , len = # s }
81
+ else
82
+ return { char = string.format (' \\ x%02x' , b ), len = 4 }
83
+ end
84
+ end
85
+
86
+ local cont = lpeg .R (' \x80\xbf ' )
87
+ local utf8_char =
88
+ lpeg .R (' \x20\x7f ' ) +
89
+ lpeg .R (' \xc0\xdf ' ) * cont +
90
+ lpeg .R (' \xe0\xef ' ) * cont * cont +
91
+ lpeg .R (' \xf0\xf7 ' ) * cont * cont * cont
92
+
93
+ local utf8_capture = (((utf8_char / utf8_valid_char ) + (lpeg .P (1 ) / utf8_invalid_char )) ^ 0 ) * - 1
94
+
95
+ tokenize_utf8_string = function (s )
96
+ local dq = s :find (' "' )
97
+ local sq = s :find (" '" )
98
+ local t = table.pack (utf8_capture :match (s ))
99
+ t .has_double_quote = not not dq
100
+ t .has_single_quote = not not sq
101
+ return t
55
102
end
56
103
end
57
104
@@ -201,9 +248,11 @@ function pprint.pformat(obj, option, printer)
201
248
local status = {
202
249
indent = ' ' , -- current indent
203
250
len = 0 , -- current line length
251
+ printed_something = false , -- used to remove leading new lines
204
252
}
205
253
206
254
local wrapped_printer = function (s )
255
+ status .printed_something = true
207
256
printer (last )
208
257
last = s
209
258
end
@@ -213,6 +262,7 @@ function pprint.pformat(obj, option, printer)
213
262
end
214
263
215
264
local function _n (d )
265
+ if not status .printed_something then return end
216
266
wrapped_printer (' \n ' )
217
267
wrapped_printer (status .indent )
218
268
if d then
@@ -269,26 +319,85 @@ function pprint.pformat(obj, option, printer)
269
319
end
270
320
271
321
local function string_formatter (s , force_long_quote )
272
- local s , quote = escape (s )
273
- local quote_len = force_long_quote and 4 or 2
274
- if quote_len + # s + status .len > option .level_width then
322
+ local tokens = option .string_is_utf8 and tokenize_utf8_string (s ) or tokenize_string (s )
323
+ local string_len = 0
324
+ local escape_quotes = tokens .has_double_quote and tokens .has_single_quote
325
+ for _ , token in ipairs (tokens ) do
326
+ if escape_quotes and token .char == ' "' then
327
+ string_len = string_len + 2
328
+ else
329
+ string_len = string_len + token .len
330
+ end
331
+ end
332
+ local quote_len = 2
333
+ local long_quote_dashes = 0
334
+ local function compute_long_quote_dashes ()
335
+ local keep_looking = true
336
+ while keep_looking do
337
+ if s :find (' %]' .. string.rep (' =' , long_quote_dashes ) .. ' %]' ) then
338
+ long_quote_dashes = long_quote_dashes + 1
339
+ else
340
+ keep_looking = false
341
+ end
342
+ end
343
+ end
344
+ if force_long_quote then
345
+ compute_long_quote_dashes ()
346
+ quote_len = 2 + long_quote_dashes
347
+ end
348
+ if quote_len + string_len + status .len > option .level_width then
275
349
_n ()
276
350
-- only wrap string when is longer than level_width
277
- if option .wrap_string and # s + quote_len > option .level_width then
351
+ if option .wrap_string and string_len + quote_len > option .level_width then
352
+ if not force_long_quote then
353
+ compute_long_quote_dashes ()
354
+ quote_len = 2 + long_quote_dashes
355
+ end
278
356
-- keep the quotes together
279
- _p (' [[' )
280
- while # s + status .len >= option .level_width do
281
- local seg = option .level_width - status .len
282
- _p (string.sub (s , 1 , seg ), true )
283
- _n ()
284
- s = string.sub (s , seg + 1 )
357
+ local dashes = string.rep (' =' , long_quote_dashes )
358
+ _p (' [' .. dashes .. ' [' , true )
359
+ local status_len = status .len
360
+ local line_len = 0
361
+ local line = ' '
362
+ for _ , token in ipairs (tokens ) do
363
+ if line_len + token .len + status_len > option .level_width then
364
+ _n ()
365
+ _p (line , true )
366
+ line_len = token .len
367
+ line = token .char
368
+ else
369
+ line_len = line_len + token .len
370
+ line = line .. token .char
371
+ end
285
372
end
286
- _p ( s ) -- print the remaining parts
287
- return ' ]] '
373
+
374
+ return line .. ' ] ' .. dashes .. ' ] '
288
375
end
289
376
end
290
377
291
- return force_long_quote and ' [[' .. s .. ' ]]' or quote .. s .. quote
378
+ if tokens .has_double_quote and tokens .has_single_quote and not force_long_quote then
379
+ for i , token in ipairs (tokens ) do
380
+ if token .char == ' "' then
381
+ tokens [i ].char = ' \\ "'
382
+ end
383
+ end
384
+ end
385
+ local flat_table = {}
386
+ for _ , token in ipairs (tokens ) do
387
+ table.insert (flat_table , token .char )
388
+ end
389
+ local concat = table.concat (flat_table )
390
+
391
+ if force_long_quote then
392
+ local dashes = string.rep (' =' , long_quote_dashes )
393
+ return ' [' .. dashes .. ' [' .. concat .. ' ]' .. dashes .. ' ]'
394
+ elseif tokens .has_single_quote then
395
+ -- use double quote
396
+ return ' "' .. concat .. ' "'
397
+ else
398
+ -- use single quote
399
+ return " '" .. concat .. " '"
400
+ end
292
401
end
293
402
294
403
local function table_formatter (t )
0 commit comments