Skip to content

Commit 865ea16

Browse files
committedFeb 16, 2013
strip leading whitespace from triple-quoted strings (closes #70)
For example, s = """ a b """ is now equivalent to "a\n b".
1 parent b8eae0e commit 865ea16

File tree

5 files changed

+97
-21
lines changed

5 files changed

+97
-21
lines changed
 

‎base/exports.jl

+2
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,8 @@ export
11001100

11011101
# Macros
11021102
@str,
1103+
@mstr,
1104+
@imstr,
11031105
@I_str,
11041106
@E_str,
11051107
@B_str,

‎base/string.jl

+44
Original file line numberDiff line numberDiff line change
@@ -652,9 +652,53 @@ function interp_parse_bytes(s::String)
652652
interp_parse(s, unescape_string, writer)
653653
end
654654

655+
## multiline strings ##
656+
657+
function multiline_lstrip(s::String)
658+
if length(s) == 0 || !isspace(s[1])
659+
return s
660+
end
661+
lines = split(s, '\n')
662+
663+
# trim leading,trailing whitespace
664+
a,b = 1,length(lines)
665+
if b == 1 return s end
666+
if lstrip(lines[a]) == "" a += 1 end
667+
if lstrip(lines[b]) == "" b -= 1 end
668+
if a > b return s end
669+
670+
# find prefix
671+
first_line = lines[a]
672+
n = 0
673+
for c in first_line
674+
if isspace(c)
675+
n += 1
676+
else
677+
break
678+
end
679+
end
680+
prefix = (n == 0) ? "" : first_line[1:n]
681+
682+
# output string
683+
prefix_len = length(prefix)
684+
buf = memio(length(s) - (b-a+1)*prefix_len, false)
685+
for i = a:b
686+
line = lines[i]
687+
if begins_with(line, prefix)
688+
print(buf, line[prefix_len+1:end])
689+
else
690+
print(buf, line)
691+
end
692+
if i != b print(buf, '\n') end
693+
end
694+
takebuf_string(buf)
695+
end
696+
655697
## core string macros ##
656698

657699
macro str(s); interp_parse(s); end
700+
macro mstr(s); multiline_lstrip(s); end
701+
macro imstr(s); interp_parse(multiline_lstrip(s)); end
658702
macro I_str(s); interp_parse(s, x->unescape_chars(x,"\"")); end
659703
macro E_str(s); check_utf8(unescape_string(s)); end
660704
macro B_str(s); interp_parse_bytes(s); end

‎src/julia-parser.scm

+30-20
Original file line numberDiff line numberDiff line change
@@ -800,16 +800,19 @@
800800
(if (and (symbol? ex) (not (operator? ex))
801801
(not (ts:space? s)))
802802
;; custom prefixed string literals, x"s" => @x_str "s"
803-
(let ((str (begin (take-token s)
804-
(parse-string-literal s)))
805-
(macname (symbol (string #\@ ex '_str))))
806-
(let ((nxt (peek-token s)))
807-
(if (and (symbol? nxt) (not (operator? nxt))
808-
(not (ts:space? s)))
809-
;; string literal suffix, "s"x
810-
(loop `(macrocall ,macname ,(car str)
811-
,(string (take-token s))))
812-
(loop `(macrocall ,macname ,(car str))))))
803+
(let* ((str (begin (take-token s)
804+
(parse-string-literal s)))
805+
(nxt (peek-token s))
806+
(macname (symbol (string #\@ ex '_str)))
807+
(macstr (if (triplequote-string-literal? str)
808+
`(macrocall @mstr ,(car str))
809+
(car str))))
810+
(if (and (symbol? nxt) (not (operator? nxt))
811+
(not (ts:space? s)))
812+
;; string literal suffix, "s"x
813+
(loop `(macrocall ,macname ,macstr
814+
,(string (take-token s))))
815+
(loop `(macrocall ,macname ,macstr))))
813816
ex))
814817
(else ex))))))))
815818

@@ -1312,7 +1315,7 @@
13121315
c))
13131316

13141317
(define (take-char p)
1315-
(begin (read-char p) p))
1318+
(begin (read-char p) p))
13161319

13171320
; reads a raw string literal with no processing.
13181321
; quote can be escaped with \, but the \ is left in place.
@@ -1322,7 +1325,7 @@
13221325
(if (eqv? (peek-char p) #\")
13231326
(if (eqv? (peek-char (take-char p)) #\")
13241327
(parse-string-literal-3 (take-char p))
1325-
(cons "" #f))
1328+
(cons "" (cons #f #f)))
13261329
(parse-string-literal-1 p))))
13271330

13281331
(define (parse-string-literal-1 p)
@@ -1340,7 +1343,7 @@
13401343
(set! interpolate #t))
13411344
(write-char (not-eof-3 c) b)))
13421345
(loop (read-char p)))))
1343-
(cons (io.tostring! b) interpolate)))
1346+
(cons (io.tostring! b) (cons interpolate #f))))
13441347

13451348
(define (parse-string-literal-3 p)
13461349
(let ((b (open-output-string))
@@ -1366,7 +1369,10 @@
13661369
(set! interpolate #t))
13671370
(write-char (not-eof-3 c) b)))
13681371
(loop (read-char p)))))
1369-
(cons (io.tostring! b) interpolate)))
1372+
(cons (io.tostring! b) (cons interpolate #t))))
1373+
1374+
(define (interpolate-string-literal? s) (cadr s))
1375+
(define (triplequote-string-literal? s) (cddr s))
13701376

13711377
(define (not-eof-1 c)
13721378
(if (eof-object? c)
@@ -1542,12 +1548,16 @@
15421548
((eqv? t #\")
15431549
(take-token s)
15441550
(let ((ps (parse-string-literal s)))
1545-
(if (cdr ps)
1546-
`(macrocall @str ,(car ps))
1547-
(let ((str (unescape-string (car ps))))
1548-
(if (not (string.isutf8 str))
1549-
(error "invalid UTF-8 sequence"))
1550-
str))))
1551+
(if (interpolate-string-literal? ps)
1552+
(if (triplequote-string-literal? ps)
1553+
`(macrocall @imstr ,(car ps))
1554+
`(macrocall @str ,(car ps)))
1555+
(let ((str (unescape-string (car ps))))
1556+
(if (not (string.isutf8 str))
1557+
(error "invalid UTF-8 sequence"))
1558+
(if (triplequote-string-literal? ps)
1559+
`(macrocall @mstr ,str)
1560+
str)))))
15511561

15521562
;; macro call
15531563
((eqv? t #\@)

‎src/julia-syntax.scm

+2-1
Original file line numberDiff line numberDiff line change
@@ -2121,7 +2121,8 @@ So far only the second case can actually occur.
21212121

21222122
(define (julia-expand-strs e)
21232123
(cond ((not (pair? e)) e)
2124-
((and (eq? (car e) 'macrocall) (eq? (cadr e) '@str))
2124+
((and (eq? (car e) 'macrocall) (or (eq? (cadr e) '@str)
2125+
(eq? (cadr e) '@mstr)))
21252126
;; expand macro
21262127
(let ((form
21272128
(apply invoke-julia-macro (cadr e) (cddr e))))

‎test/strings.jl

+19
Original file line numberDiff line numberDiff line change
@@ -508,3 +508,22 @@ str = "s\u2200"
508508
@test """ab""c""" == "ab\"\"c"
509509
@test """ab"\"c""" == "ab\"\"c"
510510
@test """abc\"""" == "abc\""
511+
n = 3
512+
@test """$n""" == "$n"
513+
@test E"""$n""" == E"$n"
514+
@test """
515+
a
516+
b
517+
518+
c
519+
""" == "a\nb\n\nc"
520+
@test """x
521+
a
522+
""" == "x\n a\n "
523+
@test """
524+
$n
525+
""" == "3"
526+
@test E"""
527+
$n
528+
""" == E"$n"
529+

6 commit comments

Comments
 (6)

StefanKarpinski commented on Feb 16, 2013

@StefanKarpinski
Member

Cool. This isn't quite the behavior I was thinking of, but it's close. The trailing newline should be kept, so that

julia> str = """
             Hello,
             world.
             """
"Hello,\nworld.\n"

Note the newline at the end. Also, with this it is impossible to express intentionally indented text. For example, I would want this:

julia> str = """
               Hello,
               world.
             """
"  Hello,\n  world.\n"

This can be done by only stripping the whitespace leading up to the closing triple quote. You can merge now though and I can tweak it later. Or we can debate a bit if people strongly disagree.

JeffBezanson commented on Feb 16, 2013

@JeffBezanson
Member

What should it do if the string doesn't end in a newline, e.g.

        world."""

StefanKarpinski commented on Feb 17, 2013

@StefanKarpinski
Member

Probably use the ident of that line.

nolta commented on Feb 17, 2013

@nolta
MemberAuthor

Ok, i'll switch to using the last line to strip whitespace, instead of the first.

But i think we should still discard the trailing newline.

StefanKarpinski commented on Feb 17, 2013

@StefanKarpinski
Member

But i think we should still discard the trailing newline.

Justification? I've found that you almost always want the trailing newline when generating text.

nolta commented on Feb 17, 2013

@nolta
MemberAuthor
  • Personal preference: i almost always don't want the trailing newline.
  • Symmetry: leading and trailing whitespace are treated the same.
  • As @kmsquire pointed out, it would be impossible to custom indent a string w/o the trailing newline.
Please sign in to comment.