Skip to content

Commit 16f5e9e

Browse files
committed
Add support for non-decimal floating point literals.
Issue rust-lang#1433. The syntax chosen was requiring an 0b, 0x, or 0o after the dot. If the literal is hexadecimal, an exponent is required.
1 parent 0ba6d48 commit 16f5e9e

15 files changed

+123
-197
lines changed

src/doc/rust.md

+19-14
Original file line numberDiff line numberDiff line change
@@ -319,21 +319,23 @@ r##"foo #"# bar"##; // foo #"# bar
319319
#### Number literals
320320

321321
~~~~ {.ebnf .gram}
322-
num_lit : nonzero_dec [ dec_digit | '_' ] * num_suffix ?
323-
| '0' [ [ dec_digit | '_' ] * num_suffix ?
324-
| 'b' [ '1' | '0' | '_' ] + int_suffix ?
325-
| 'o' [ oct_digit | '_' ] + int_suffix ?
326-
| 'x' [ hex_digit | '_' ] + int_suffix ? ] ;
322+
num_lit : radix_lit num_suffix ;
323+
324+
radix_lit : nonzero_dec [ dec_digit | '_' ] *
325+
| '0' [ [ dec_digit | '_' ] *
326+
| 'b' [ '1' | '0' | '_' ] +
327+
| 'o' [ oct_digit | '_' ] +
328+
| 'x' [ hex_digit | '_' ] + ] ;
327329
328330
num_suffix : int_suffix | float_suffix ;
329331
330332
int_suffix : 'u' int_suffix_size ?
331333
| 'i' int_suffix_size ? ;
332334
int_suffix_size : [ '8' | '1' '6' | '3' '2' | '6' '4' ] ;
333335
334-
float_suffix : [ exponent | '.' dec_lit exponent ? ] ? float_suffix_ty ? ;
336+
float_suffix : [ exponent | '.' radix_lit exponent ? ] ? float_suffix_ty ? ;
335337
float_suffix_ty : 'f' [ '3' '2' | '6' '4' ] ;
336-
exponent : ['E' | 'e'] ['-' | '+' ] ? dec_lit ;
338+
exponent : ['E' | 'e' | 'p' | 'P'] ['-' | '+' ] ? dec_lit ;
337339
dec_lit : [ dec_digit | '_' ] + ;
338340
~~~~
339341

@@ -343,7 +345,7 @@ as they are differentiated by suffixes.
343345

344346
##### Integer literals
345347

346-
An _integer literal_ has one of four forms:
348+
An _radix literal_ has one of four forms:
347349

348350
* A _decimal literal_ starts with a *decimal digit* and continues with any
349351
mixture of *decimal digits* and _underscores_.
@@ -354,9 +356,9 @@ An _integer literal_ has one of four forms:
354356
* A _binary literal_ starts with the character sequence `U+0030` `U+0062`
355357
(`0b`) and continues as any mixture binary digits and underscores.
356358

357-
An integer literal may be followed (immediately, without any spaces) by an
358-
_integer suffix_, which changes the type of the literal. There are two kinds
359-
of integer literal suffix:
359+
An integer literal consists of a radix literal and may be followed
360+
(immediately, without any spaces) by an _integer suffix_, which changes the
361+
type of the literal. There are two kinds of integer literal suffix:
360362

361363
* The `i` and `u` suffixes give the literal type `int` or `uint`,
362364
respectively.
@@ -389,10 +391,11 @@ Examples of integer literals of various forms:
389391

390392
A _floating-point literal_ has one of two forms:
391393

392-
* Two _decimal literals_ separated by a period
394+
* Two _radix literals_ separated by a period
393395
character `U+002E` (`.`), with an optional _exponent_ trailing after the
394-
second decimal literal.
395-
* A single _decimal literal_ followed by an _exponent_.
396+
second decimal literal. Both radix literals must have the same base.
397+
* A single _radix literal_ followed by an _exponent_.
398+
* If the float literal is hexadecimal, an _exponent_ must be supplied.
396399

397400
By default, a floating-point literal has a generic type, but will fall back to
398401
`f64`. A floating-point literal may be followed (immediately, without any
@@ -406,6 +409,8 @@ Examples of floating-point literals of various forms:
406409
123.0; // type f64
407410
0.1; // type f64
408411
0.1f32; // type f32
412+
0x4.0x432p-4_f32; // type f32
413+
0b1.0b10111011011000; // type f64
409414
12E+99_f64; // type f64
410415
~~~~
411416

src/libsyntax/parse/lexer.rs

+92-35
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,7 @@ fn scan_exponent(rdr: &StringReader, start_bpos: BytePos) -> Option<~str> {
443443
// \x00 hits the `return None` case immediately, so this is fine.
444444
let mut c = rdr.curr.get().unwrap_or('\x00');
445445
let mut rslt = ~"";
446-
if c == 'e' || c == 'E' {
447-
rslt.push_char(c);
446+
if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
448447
bump(rdr);
449448
c = rdr.curr.get().unwrap_or('\x00');
450449
if c == '-' || c == '+' {
@@ -476,40 +475,32 @@ fn scan_digits(rdr: &StringReader, radix: uint) -> ~str {
476475
};
477476
}
478477

479-
fn check_float_base(rdr: &StringReader, start_bpos: BytePos, last_bpos: BytePos,
480-
base: uint) {
481-
match base {
482-
16u => fatal_span(rdr, start_bpos, last_bpos,
483-
~"hexadecimal float literal is not supported"),
484-
8u => fatal_span(rdr, start_bpos, last_bpos,
485-
~"octal float literal is not supported"),
486-
2u => fatal_span(rdr, start_bpos, last_bpos,
487-
~"binary float literal is not supported"),
488-
_ => ()
489-
}
490-
}
491-
492-
fn scan_number(c: char, rdr: &StringReader) -> token::Token {
493-
let mut num_str;
494-
let mut base = 10u;
495-
let mut c = c;
496-
let mut n = nextch(rdr).unwrap_or('\x00');
497-
let start_bpos = rdr.last_pos.get();
478+
fn scan_radix(rdr: &StringReader) -> uint {
479+
let c = rdr.curr.get().unwrap_or('\x00');
480+
let n = nextch(rdr).unwrap_or('\x00');
498481
if c == '0' && n == 'x' {
499482
bump(rdr);
500483
bump(rdr);
501-
base = 16u;
484+
return 16u;
502485
} else if c == '0' && n == 'o' {
503486
bump(rdr);
504487
bump(rdr);
505-
base = 8u;
488+
return 8u;
506489
} else if c == '0' && n == 'b' {
507490
bump(rdr);
508491
bump(rdr);
509-
base = 2u;
492+
return 2u;
510493
}
494+
return 10u;
495+
}
496+
497+
fn scan_number(rdr: &StringReader) -> token::Token {
498+
let mut num_str;
499+
let start_bpos = rdr.last_pos.get();
500+
let mut base = scan_radix(rdr);
511501
num_str = scan_digits(rdr, base);
512-
c = rdr.curr.get().unwrap_or('\x00');
502+
let mut c = rdr.curr.get().unwrap_or('\x00');
503+
let mut n:char;
513504
nextch(rdr);
514505
if c == 'u' || c == 'i' {
515506
enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
@@ -558,19 +549,71 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
558549
}
559550
}
560551
let mut is_float = false;
552+
let mut dec_part = ~"";
561553
if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
562554
is_float = true;
563555
bump(rdr);
564-
let dec_part = scan_digits(rdr, 10u);
565-
num_str.push_char('.');
566-
num_str.push_str(dec_part);
556+
let mantissa_base = scan_radix(rdr);
557+
if mantissa_base != base {
558+
//The ability to switch base, while conceivably useful, is much more
559+
//likely to be triggered by accident.
560+
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
561+
~"float literals must have consistent base before and after decimal point");
562+
}
563+
base = mantissa_base;
564+
dec_part = scan_digits(rdr, mantissa_base);
567565
}
566+
let mut exp_part = ~"";
568567
match scan_exponent(rdr, start_bpos) {
569-
Some(ref s) => {
568+
Some(s) => {
570569
is_float = true;
571-
num_str.push_str(*s);
570+
exp_part = s;
572571
}
573-
None => ()
572+
None => {
573+
if is_float && base > 10 {
574+
//otherwise we have ambiguity: 0x1.0xffff_f32 gets parsed as
575+
//0x1.fffff32, which will create confusing results.
576+
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
577+
~"hexadecimal float literals must contain exponent");
578+
}
579+
}
580+
}
581+
if is_float {
582+
if base == 10 || base == 16 {
583+
num_str.push_char('.');
584+
num_str.push_str( if dec_part.len() > 0 {dec_part} else {~"0"} );
585+
if exp_part.len() != 0 {
586+
num_str.push_char(if base == 10 {'e'} else {'p'});
587+
num_str.push_str(exp_part);
588+
}
589+
} else {
590+
num_str = from_str_radix::<u64>(num_str, base).unwrap().to_str_radix(16);
591+
let mut i = 0;
592+
let len = dec_part.len();
593+
let step = match base { 8 => 2, 2 => 4, _ => fail!("Impossible base for float")};
594+
let mut dec_str = ~"";
595+
while i < len {
596+
let chunk = if i + step > len {
597+
let mut chunk = dec_part.slice_from(i).to_str();
598+
for _ in range(0, i + step - len) {
599+
chunk.push_char('0');
600+
}
601+
chunk
602+
} else {
603+
dec_part.slice(i, i + step).to_str()
604+
};
605+
dec_str.push_str(from_str_radix::<u8>(chunk, base).unwrap_or(0).to_str());
606+
i += step;
607+
}
608+
num_str.push_char('.');
609+
num_str.push_str(dec_str);
610+
num_str.push_char('p');
611+
num_str.push_str(if exp_part.len() > 0 {exp_part} else {~"0"});
612+
}
613+
if base != 10 {
614+
num_str.unshift_char('x');
615+
num_str.unshift_char('0');
616+
}
574617
}
575618

576619
if rdr.curr_is('f') {
@@ -580,12 +623,10 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
580623
if c == '3' && n == '2' {
581624
bump(rdr);
582625
bump(rdr);
583-
check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
584626
return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF32);
585627
} else if c == '6' && n == '4' {
586628
bump(rdr);
587629
bump(rdr);
588-
check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
589630
return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF64);
590631
/* FIXME (#2252): if this is out of range for either a
591632
32-bit or 64-bit float, it won't be noticed till the
@@ -596,7 +637,6 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
596637
}
597638
}
598639
if is_float {
599-
check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
600640
return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
601641
} else {
602642
if num_str.len() == 0u {
@@ -687,7 +727,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
687727
})
688728
}
689729
if is_dec_digit(c) {
690-
return scan_number(c.unwrap(), rdr);
730+
return scan_number(rdr);
691731
}
692732
fn binop(rdr: &StringReader, op: token::BinOp) -> token::Token {
693733
bump(rdr);
@@ -1005,6 +1045,7 @@ mod test {
10051045
use diagnostic;
10061046
use parse::token;
10071047
use parse::token::{str_to_ident};
1048+
use ast;
10081049

10091050
// represents a testing reader (incl. both reader and interner)
10101051
struct Env {
@@ -1139,4 +1180,20 @@ mod test {
11391180
assert_eq!(tok,token::LIT_CHAR('a' as u32));
11401181
}
11411182

1183+
#[test] fn hex_floats() {
1184+
let env = setup(~"0x1.0xffffffp100_f32");
1185+
let TokenAndSpan {tok, sp: _} =
1186+
env.string_reader.next_token();
1187+
let id = token::str_to_ident("0x1.ffffffp100");
1188+
assert_eq!(tok,token::LIT_FLOAT(id, ast::TyF32));
1189+
}
1190+
1191+
#[test] fn bin_floats() {
1192+
let env = setup(~"0b1.0b0000_0001_0010_0011_1p100_f32");
1193+
let TokenAndSpan {tok, sp: _} =
1194+
env.string_reader.next_token();
1195+
let id = token::str_to_ident("0x1.01238p100");
1196+
assert_eq!(tok,token::LIT_FLOAT(id, ast::TyF32));
1197+
}
1198+
11421199
}

src/test/compile-fail/lex-bad-fp-base-1.rs

-13
This file was deleted.

src/test/compile-fail/lex-bad-fp-base-2.rs

-13
This file was deleted.

src/test/compile-fail/lex-bad-fp-base-3.rs

-13
This file was deleted.

src/test/compile-fail/lex-bad-fp-base-4.rs

-13
This file was deleted.

src/test/compile-fail/lex-bad-fp-base-5.rs

-13
This file was deleted.

src/test/compile-fail/lex-bad-fp-base-6.rs

-13
This file was deleted.

src/test/compile-fail/lex-bad-fp-base-7.rs

-13
This file was deleted.

src/test/compile-fail/lex-bad-fp-base-8.rs

-13
This file was deleted.

0 commit comments

Comments
 (0)