diff options
| -rw-r--r-- | doc/langref.html.in | 11 | ||||
| -rw-r--r-- | lib/std/math/big/int.zig | 87 | ||||
| -rw-r--r-- | lib/std/math/big/rational.zig | 38 | ||||
| -rw-r--r-- | lib/std/special/compiler_rt/floatundisf.zig | 38 | ||||
| -rw-r--r-- | lib/std/zig/parser_test.zig | 69 | ||||
| -rw-r--r-- | lib/std/zig/tokenizer.zig | 495 | ||||
| -rw-r--r-- | src-self-hosted/ir.zig | 15 | ||||
| -rw-r--r-- | src/ir.cpp | 2 | ||||
| -rw-r--r-- | src/main.cpp | 14 | ||||
| -rw-r--r-- | src/parse_f128.c | 79 | ||||
| -rw-r--r-- | src/tokenizer.cpp | 145 | ||||
| -rw-r--r-- | test/compile_errors.zig | 154 | ||||
| -rw-r--r-- | test/stage1/behavior/math.zig | 28 | ||||
| -rw-r--r-- | test/stage1/behavior/slice.zig | 14 |
14 files changed, 983 insertions, 206 deletions
diff --git a/doc/langref.html.in b/doc/langref.html.in index 2e13367a00..cf57aa6b13 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -885,6 +885,12 @@ const hex_int = 0xff; const another_hex_int = 0xFF; const octal_int = 0o755; const binary_int = 0b11110000; + +// underscores may be placed between two digits as a visual separator +const one_billion = 1_000_000_000; +const binary_mask = 0b1_1111_1111; +const permissions = 0o7_5_5; +const big_address = 0xFF80_0000_0000_0000; {#code_end#} {#header_close#} {#header_open|Runtime Integer Values#} @@ -947,6 +953,11 @@ const yet_another = 123.0e+77; const hex_floating_point = 0x103.70p-5; const another_hex_float = 0x103.70; const yet_another_hex_float = 0x103.70P-5; + +// underscores may be placed between two digits as a visual separator +const lightspeed = 299_792_458.000_000; +const nanosecond = 0.000_000_001; +const more_hex = 0x1234_5678.9ABC_CDEFp-10; {#code_end#} <p> There is no syntax for NaN, infinity, or negative infinity. For these special values, diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 2a3c9d508a..51e97abf61 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -373,6 +373,7 @@ pub const Int = struct { const d = switch (ch) { '0'...'9' => ch - '0', 'a'...'f' => (ch - 'a') + 0xa, + 'A'...'F' => (ch - 'A') + 0xa, else => return error.InvalidCharForDigit, }; @@ -393,8 +394,9 @@ pub const Int = struct { /// Set self from the string representation `value`. /// - /// value must contain only digits <= `base`. Base prefixes are not allowed (e.g. 0x43 should - /// simply be 43). + /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are + /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are + /// ignored and can be used as digit separators. /// /// Returns an error if memory could not be allocated or `value` has invalid digits for the /// requested base. @@ -415,6 +417,9 @@ pub const Int = struct { try self.set(0); for (value[i..]) |ch| { + if (ch == '_') { + continue; + } const d = try charToDigit(ch, base); const ap_d = Int.initFixed(([_]Limb{d})[0..]); @@ -529,13 +534,14 @@ pub const Int = struct { return out_stream.writeAll(str); } - /// Returns -1, 0, 1 if |a| < |b|, |a| == |b| or |a| > |b| respectively. - pub fn cmpAbs(a: Int, b: Int) i8 { + /// Returns math.Order.lt, math.Order.eq, math.Order.gt if |a| < |b|, |a| == + /// |b| or |a| > |b| respectively. + pub fn cmpAbs(a: Int, b: Int) math.Order { if (a.len() < b.len()) { - return -1; + return .lt; } if (a.len() > b.len()) { - return 1; + return .gt; } var i: usize = a.len() - 1; @@ -546,21 +552,26 @@ pub const Int = struct { } if (a.limbs[i] < b.limbs[i]) { - return -1; + return .lt; } else if (a.limbs[i] > b.limbs[i]) { - return 1; + return .gt; } else { - return 0; + return .eq; } } - /// Returns -1, 0, 1 if a < b, a == b or a > b respectively. - pub fn cmp(a: Int, b: Int) i8 { + /// Returns math.Order.lt, math.Order.eq, math.Order.gt if a < b, a == b or a + /// > b respectively. + pub fn cmp(a: Int, b: Int) math.Order { if (a.isPositive() != b.isPositive()) { - return if (a.isPositive()) @as(i8, 1) else -1; + return if (a.isPositive()) .gt else .lt; } else { const r = cmpAbs(a, b); - return if (a.isPositive()) r else -r; + return if (a.isPositive()) r else switch (r) { + .lt => math.Order.gt, + .eq => math.Order.eq, + .gt => math.Order.lt, + }; } } @@ -571,12 +582,12 @@ pub const Int = struct { /// Returns true if |a| == |b|. pub fn eqAbs(a: Int, b: Int) bool { - return cmpAbs(a, b) == 0; + return cmpAbs(a, b) == .eq; } /// Returns true if a == b. pub fn eq(a: Int, b: Int) bool { - return cmp(a, b) == 0; + return cmp(a, b) == .eq; } // Normalize a possible sequence of leading zeros. @@ -689,7 +700,7 @@ pub const Int = struct { } else { if (a.isPositive()) { // (a) - (b) => a - b - if (a.cmp(b) >= 0) { + if (a.cmp(b) != .lt) { try r.ensureCapacity(a.len() + 1); llsub(r.limbs[0..], a.limbs[0..a.len()], b.limbs[0..b.len()]); r.normalize(a.len()); @@ -702,7 +713,7 @@ pub const Int = struct { } } else { // (-a) - (-b) => -(a - b) - if (a.cmp(b) < 0) { + if (a.cmp(b) == .lt) { try r.ensureCapacity(a.len() + 1); llsub(r.limbs[0..], a.limbs[0..a.len()], b.limbs[0..b.len()]); r.normalize(a.len()); @@ -1005,7 +1016,7 @@ pub const Int = struct { @panic("quo and rem cannot be same variable"); } - if (a.cmpAbs(b) < 0) { + if (a.cmpAbs(b) == .lt) { // quo may alias a so handle rem first try rem.copy(a); rem.setSign(a.isPositive() == b.isPositive()); @@ -1128,7 +1139,7 @@ pub const Int = struct { // 2. try tmp.shiftLeft(y.*, Limb.bit_count * (n - t)); - while (x.cmp(tmp) >= 0) { + while (x.cmp(tmp) != .lt) { q.limbs[n - t] += 1; try x.sub(x.*, tmp); } @@ -1159,7 +1170,7 @@ pub const Int = struct { r.limbs[2] = carry; r.normalize(3); - if (r.cmpAbs(tmp) <= 0) { + if (r.cmpAbs(tmp) != .gt) { break; } @@ -1582,6 +1593,22 @@ test "big.int string negative" { testing.expect((try a.to(i32)) == -1023); } +test "big.int string set number with underscores" { + var a = try Int.init(testing.allocator); + defer a.deinit(); + + try a.setString(10, "__1_2_0_3_1_7_2_4_1_2_0_____9_1__2__4_7_8_1_2_4_1_2_9_0_8_4_7_1_2_4___"); + testing.expect((try a.to(u128)) == 120317241209124781241290847124); +} + +test "big.int string set case insensitive number" { + var a = try Int.init(testing.allocator); + defer a.deinit(); + + try a.setString(16, "aB_cD_eF"); + testing.expect((try a.to(u32)) == 0xabcdef); +} + test "big.int string set bad char error" { var a = try Int.init(testing.allocator); defer a.deinit(); @@ -1698,8 +1725,8 @@ test "big.int compare" { var b = try Int.initSet(testing.allocator, 10); defer b.deinit(); - testing.expect(a.cmpAbs(b) == 1); - testing.expect(a.cmp(b) == -1); + testing.expect(a.cmpAbs(b) == .gt); + testing.expect(a.cmp(b) == .lt); } test "big.int compare similar" { @@ -1708,8 +1735,8 @@ test "big.int compare similar" { var b = try Int.initSet(testing.allocator, 0xffffffffeeeeeeeeffffffffeeeeeeef); defer b.deinit(); - testing.expect(a.cmpAbs(b) == -1); - testing.expect(b.cmpAbs(a) == 1); + testing.expect(a.cmpAbs(b) == .lt); + testing.expect(b.cmpAbs(a) == .gt); } test "big.int compare different limb size" { @@ -1718,8 +1745,8 @@ test "big.int compare different limb size" { var b = try Int.initSet(testing.allocator, 1); defer b.deinit(); - testing.expect(a.cmpAbs(b) == 1); - testing.expect(b.cmpAbs(a) == -1); + testing.expect(a.cmpAbs(b) == .gt); + testing.expect(b.cmpAbs(a) == .lt); } test "big.int compare multi-limb" { @@ -1728,8 +1755,8 @@ test "big.int compare multi-limb" { var b = try Int.initSet(testing.allocator, 0x7777777799999999ffffeeeeffffeeeeffffeeeee); defer b.deinit(); - testing.expect(a.cmpAbs(b) == 1); - testing.expect(a.cmp(b) == -1); + testing.expect(a.cmpAbs(b) == .gt); + testing.expect(a.cmp(b) == .lt); } test "big.int equality" { @@ -2705,9 +2732,9 @@ test "big.int var args" { const c = try Int.initSet(testing.allocator, 11); defer c.deinit(); - testing.expect(a.cmp(c) == 0); + testing.expect(a.cmp(c) == .eq); const d = try Int.initSet(testing.allocator, 14); defer d.deinit(); - testing.expect(a.cmp(d) <= 0); + testing.expect(a.cmp(d) != .gt); } diff --git a/lib/std/math/big/rational.zig b/lib/std/math/big/rational.zig index a57183a623..cf88558576 100644 --- a/lib/std/math/big/rational.zig +++ b/lib/std/math/big/rational.zig @@ -326,18 +326,20 @@ pub const Rational = struct { r.q.swap(&other.q); } - /// Returns -1, 0, 1 if a < b, a == b or a > b respectively. - pub fn cmp(a: Rational, b: Rational) !i8 { + /// Returns math.Order.lt, math.Order.eq, math.Order.gt if a < b, a == b or a + /// > b respectively. + pub fn cmp(a: Rational, b: Rational) !math.Order { return cmpInternal(a, b, true); } - /// Returns -1, 0, 1 if |a| < |b|, |a| == |b| or |a| > |b| respectively. - pub fn cmpAbs(a: Rational, b: Rational) !i8 { + /// Returns math.Order.lt, math.Order.eq, math.Order.gt if |a| < |b|, |a| == + /// |b| or |a| > |b| respectively. + pub fn cmpAbs(a: Rational, b: Rational) !math.Order { return cmpInternal(a, b, false); } // p/q > x/y iff p*y > x*q - fn cmpInternal(a: Rational, b: Rational, is_abs: bool) !i8 { + fn cmpInternal(a: Rational, b: Rational, is_abs: bool) !math.Order { // TODO: Would a div compare algorithm of sorts be viable and quicker? Can we avoid // the memory allocations here? var q = try Int.init(a.p.allocator.?); @@ -450,7 +452,7 @@ pub const Rational = struct { r.p.setSign(sign); const one = Int.initFixed(([_]Limb{1})[0..]); - if (a.cmp(one) != 0) { + if (a.cmp(one) != .eq) { var unused = try Int.init(r.p.allocator.?); defer unused.deinit(); @@ -505,7 +507,7 @@ fn gcdLehmer(r: *Int, xa: Int, ya: Int) !void { y.abs(); defer y.deinit(); - if (x.cmp(y) < 0) { + if (x.cmp(y) == .lt) { x.swap(&y); } @@ -573,7 +575,7 @@ fn gcdLehmer(r: *Int, xa: Int, ya: Int) !void { } // euclidean algorithm - debug.assert(x.cmp(y) >= 0); + debug.assert(x.cmp(y) != .lt); while (!y.eqZero()) { try Int.divTrunc(&T, r, x, y); @@ -874,11 +876,11 @@ test "big.rational cmp" { try a.setRatio(500, 231); try b.setRatio(18903, 8584); - testing.expect((try a.cmp(b)) < 0); + testing.expect((try a.cmp(b)) == .lt); try a.setRatio(890, 10); try b.setRatio(89, 1); - testing.expect((try a.cmp(b)) == 0); + testing.expect((try a.cmp(b)) == .eq); } test "big.rational add single-limb" { @@ -889,11 +891,11 @@ test "big.rational add single-limb" { try a.setRatio(500, 231); try b.setRatio(18903, 8584); - testing.expect((try a.cmp(b)) < 0); + testing.expect((try a.cmp(b)) == .lt); try a.setRatio(890, 10); try b.setRatio(89, 1); - testing.expect((try a.cmp(b)) == 0); + testing.expect((try a.cmp(b)) == .eq); } test "big.rational add" { @@ -909,7 +911,7 @@ test "big.rational add" { try a.add(a, b); try r.setRatio(984786924199, 290395044174); - testing.expect((try a.cmp(r)) == 0); + testing.expect((try a.cmp(r)) == .eq); } test "big.rational sub" { @@ -925,7 +927,7 @@ test "big.rational sub" { try a.sub(a, b); try r.setRatio(979040510045, 290395044174); - testing.expect((try a.cmp(r)) == 0); + testing.expect((try a.cmp(r)) == .eq); } test "big.rational mul" { @@ -941,7 +943,7 @@ test "big.rational mul" { try a.mul(a, b); try r.setRatio(571481443, 17082061422); - testing.expect((try a.cmp(r)) == 0); + testing.expect((try a.cmp(r)) == .eq); } test "big.rational div" { @@ -957,7 +959,7 @@ test "big.rational div" { try a.div(a, b); try r.setRatio(75531824394, 221015929); - testing.expect((try a.cmp(r)) == 0); + testing.expect((try a.cmp(r)) == .eq); } test "big.rational div" { @@ -970,11 +972,11 @@ test "big.rational div" { a.invert(); try r.setRatio(23341, 78923); - testing.expect((try a.cmp(r)) == 0); + testing.expect((try a.cmp(r)) == .eq); try a.setRatio(-78923, 23341); a.invert(); try r.setRatio(-23341, 78923); - testing.expect((try a.cmp(r)) == 0); + testing.expect((try a.cmp(r)) == .eq); } diff --git a/lib/std/special/compiler_rt/floatundisf.zig b/lib/std/special/compiler_rt/floatundisf.zig index 41ff02daee..ff242721d6 100644 --- a/lib/std/special/compiler_rt/floatundisf.zig +++ b/lib/std/special/compiler_rt/floatundisf.zig @@ -69,23 +69,23 @@ test "floatundisf" { test__floatundisf(0, 0.0); test__floatundisf(1, 1.0); test__floatundisf(2, 2.0); - test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62F); - test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62F); - test__floatundisf(0x8000008000000000, 0x1p+63F); - test__floatundisf(0x8000010000000000, 0x1.000002p+63F); - test__floatundisf(0x8000000000000000, 0x1p+63F); - test__floatundisf(0x8000000000000001, 0x1p+63F); - test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64F); - test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64F); - test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50F); - test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50F); - test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50F); + test__floatundisf(0x7FFFFF8000000000, 0x1.FFFFFEp+62); + test__floatundisf(0x7FFFFF0000000000, 0x1.FFFFFCp+62); + test__floatundisf(0x8000008000000000, 0x1p+63); + test__floatundisf(0x8000010000000000, 0x1.000002p+63); + test__floatundisf(0x8000000000000000, 0x1p+63); + test__floatundisf(0x8000000000000001, 0x1p+63); + test__floatundisf(0xFFFFFFFFFFFFFFFE, 0x1p+64); + test__floatundisf(0xFFFFFFFFFFFFFFFF, 0x1p+64); + test__floatundisf(0x0007FB72E8000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EA000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EB000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EBFFFFFF, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72EC000000, 0x1.FEDCBCp+50); + test__floatundisf(0x0007FB72E8000001, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E6000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E7000000, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E7FFFFFF, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E4000001, 0x1.FEDCBAp+50); + test__floatundisf(0x0007FB72E4000000, 0x1.FEDCB8p+50); } diff --git a/lib/std/zig/parser_test.zig b/lib/std/zig/parser_test.zig index 894f726fb1..daeb197913 100644 --- a/lib/std/zig/parser_test.zig +++ b/lib/std/zig/parser_test.zig @@ -2815,6 +2815,75 @@ test "zig fmt: extern without container keyword returns error" { ); } +test "zig fmt: integer literals with underscore separators" { + try testTransform( + \\const + \\ x = + \\ 1_234_567 + \\ +(0b0_1-0o7_0+0xff_FF ) + 0_0; + , + \\const x = 1_234_567 + (0b0_1 - 0o7_0 + 0xff_FF) + 0_0; + \\ + ); +} + +test "zig fmt: hex literals with underscore separators" { + try testTransform( + \\pub fn orMask(a: [ 1_000 ]u64, b: [ 1_000] u64) [1_000]u64 { + \\ var c: [1_000]u64 = [1]u64{ 0xFFFF_FFFF_FFFF_FFFF}**1_000; + \\ for (c [ 0_0 .. ]) |_, i| { + \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA; + \\ } + \\ return c; + \\} + \\ + \\ + , + \\pub fn orMask(a: [1_000]u64, b: [1_000]u64) [1_000]u64 { + \\ var c: [1_000]u64 = [1]u64{0xFFFF_FFFF_FFFF_FFFF} ** 1_000; + \\ for (c[0_0..]) |_, i| { + \\ c[i] = (a[i] | b[i]) & 0xCCAA_CCAA_CCAA_CCAA; + \\ } + \\ return c; + \\} + \\ + ); +} + +test "zig fmt: decimal float literals with underscore separators" { + try testTransform( + \\pub fn main() void { + \\ const a:f64=(10.0e-0+(10.e+0))+10_00.00_00e-2+00_00.00_10e+4; + \\ const b:f64=010.0--0_10.+0_1_0.0_0+1e2; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + , + \\pub fn main() void { + \\ const a: f64 = (10.0e-0 + (10.e+0)) + 10_00.00_00e-2 + 00_00.00_10e+4; + \\ const b: f64 = 010.0 - -0_10. + 0_1_0.0_0 + 1e2; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + \\ + ); +} + +test "zig fmt: hexadeciaml float literals with underscore separators" { + try testTransform( + \\pub fn main() void { + \\ const a: f64 = (0x10.0p-0+(0x10.p+0))+0x10_00.00_00p-8+0x00_00.00_10p+16; + \\ const b: f64 = 0x0010.0--0x00_10.+0x10.00+0x1p4; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + , + \\pub fn main() void { + \\ const a: f64 = (0x10.0p-0 + (0x10.p+0)) + 0x10_00.00_00p-8 + 0x00_00.00_10p+16; + \\ const b: f64 = 0x0010.0 - -0x00_10. + 0x10.00 + 0x1p4; + \\ std.debug.warn("a: {}, b: {} -> a+b: {}\n", .{ a, b, a + b }); + \\} + \\ + ); +} + const std = @import("std"); const mem = std.mem; const warn = std.debug.warn; diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index f6c71479e7..6cb66595a7 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -387,17 +387,23 @@ pub const Tokenizer = struct { DocComment, ContainerDocComment, Zero, - IntegerLiteral, - IntegerLiteralWithRadix, - IntegerLiteralWithRadixHex, - NumberDot, + IntegerLiteralDec, + IntegerLiteralDecNoUnderscore, + IntegerLiteralBin, + IntegerLiteralBinNoUnderscore, + IntegerLiteralOct, + IntegerLiteralOctNoUnderscore, + IntegerLiteralHex, + IntegerLiteralHexNoUnderscore, + NumberDotDec, NumberDotHex, - FloatFraction, + FloatFractionDec, + FloatFractionDecNoUnderscore, FloatFractionHex, + FloatFractionHexNoUnderscore, FloatExponentUnsigned, - FloatExponentUnsignedHex, FloatExponentNumber, - FloatExponentNumberHex, + FloatExponentNumberNoUnderscore, Ampersand, Caret, Percent, @@ -412,6 +418,10 @@ pub const Tokenizer = struct { SawAtSign, }; + fn isIdentifierChar(char: u8) bool { + return std.ascii.isAlNum(char) or char == '_'; + } + pub fn next(self: *Tokenizer) Token { if (self.pending_invalid_token) |token| { self.pending_invalid_token = null; @@ -550,7 +560,7 @@ pub const Tokenizer = struct { result.id = Token.Id.IntegerLiteral; }, '1'...'9' => { - state = State.IntegerLiteral; + state = State.IntegerLiteralDec; result.id = Token.Id.IntegerLiteral; }, else => { @@ -1048,55 +1058,145 @@ pub const Tokenizer = struct { else => self.checkLiteralCharacter(), }, State.Zero => switch (c) { - 'b', 'o' => { - state = State.IntegerLiteralWithRadix; + 'b' => { + state = State.IntegerLiteralBinNoUnderscore; + }, + 'o' => { + state = State.IntegerLiteralOctNoUnderscore; }, 'x' => { - state = State.IntegerLiteralWithRadixHex; + state = State.IntegerLiteralHexNoUnderscore; }, - else => { - // reinterpret as a normal number + '0'...'9', '_', '.', 'e', 'E' => { + // reinterpret as a decimal number self.index -= 1; - state = State.IntegerLiteral; + state = State.IntegerLiteralDec; + }, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, + }, + State.IntegerLiteralBinNoUnderscore => switch (c) { + '0'...'1' => { + state = State.IntegerLiteralBin; + }, + else => { + result.id = Token.Id.Invalid; + break; + }, + }, + State.IntegerLiteralBin => switch (c) { + '_' => { + state = State.IntegerLiteralBinNoUnderscore; + }, + '0'...'1' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, + }, + State.IntegerLiteralOctNoUnderscore => switch (c) { + '0'...'7' => { + state = State.IntegerLiteralOct; + }, + else => { + result.id = Token.Id.Invalid; + break; + }, + }, + State.IntegerLiteralOct => switch (c) { + '_' => { + state = State.IntegerLiteralOctNoUnderscore; + }, + '0'...'7' => {}, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, + }, + State.IntegerLiteralDecNoUnderscore => switch (c) { + '0'...'9' => { + state = State.IntegerLiteralDec; + }, + else => { + result.id = Token.Id.Invalid; + break; }, }, - State.IntegerLiteral => switch (c) { + State.IntegerLiteralDec => switch (c) { + '_' => { + state = State.IntegerLiteralDecNoUnderscore; + }, '.' => { - state = State.NumberDot; + state = State.NumberDotDec; + result.id = Token.Id.FloatLiteral; }, - 'p', 'P', 'e', 'E' => { + 'e', 'E' => { state = State.FloatExponentUnsigned; + result.id = Token.Id.FloatLiteral; }, '0'...'9' => {}, - else => break, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, }, - State.IntegerLiteralWithRadix => switch (c) { - '.' => { - state = State.NumberDot; + State.IntegerLiteralHexNoUnderscore => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + state = State.IntegerLiteralHex; + }, + else => { + result.id = Token.Id.Invalid; + break; }, - '0'...'9' => {}, - else => break, }, - State.IntegerLiteralWithRadixHex => switch (c) { + State.IntegerLiteralHex => switch (c) { + '_' => { + state = State.IntegerLiteralHexNoUnderscore; + }, '.' => { state = State.NumberDotHex; + result.id = Token.Id.FloatLiteral; }, 'p', 'P' => { - state = State.FloatExponentUnsignedHex; + state = State.FloatExponentUnsigned; + result.id = Token.Id.FloatLiteral; }, '0'...'9', 'a'...'f', 'A'...'F' => {}, - else => break, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, }, - State.NumberDot => switch (c) { + State.NumberDotDec => switch (c) { '.' => { self.index -= 1; state = State.Start; break; }, - else => { - self.index -= 1; + 'e', 'E' => { + state = State.FloatExponentUnsigned; + }, + '0'...'9' => { result.id = Token.Id.FloatLiteral; - state = State.FloatFraction; + state = State.FloatFractionDec; + }, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; }, }, State.NumberDotHex => switch (c) { @@ -1105,65 +1205,112 @@ pub const Tokenizer = struct { state = State.Start; break; }, - else => { - self.index -= 1; + 'p', 'P' => { + state = State.FloatExponentUnsigned; + }, + '0'...'9', 'a'...'f', 'A'...'F' => { result.id = Token.Id.FloatLiteral; state = State.FloatFractionHex; }, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, }, - State.FloatFraction => switch (c) { + State.FloatFractionDecNoUnderscore => switch (c) { + '0'...'9' => { + state = State.FloatFractionDec; + }, + else => { + result.id = Token.Id.Invalid; + break; + }, + }, + State.FloatFractionDec => switch (c) { + '_' => { + state = State.FloatFractionDecNoUnderscore; + }, 'e', 'E' => { state = State.FloatExponentUnsigned; }, '0'...'9' => {}, - else => break, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, + }, + State.FloatFractionHexNoUnderscore => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + state = State.FloatFractionHex; + }, + else => { + result.id = Token.Id.Invalid; + break; + }, }, State.FloatFractionHex => switch (c) { + '_' => { + state = State.FloatFractionHexNoUnderscore; + }, 'p', 'P' => { - state = State.FloatExponentUnsignedHex; + state = State.FloatExponentUnsigned; }, '0'...'9', 'a'...'f', 'A'...'F' => {}, - else => break, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, }, State.FloatExponentUnsigned => switch (c) { '+', '-' => { - state = State.FloatExponentNumber; + state = State.FloatExponentNumberNoUnderscore; }, else => { // reinterpret as a normal exponent number self.index -= 1; - state = State.FloatExponentNumber; + state = State.FloatExponentNumberNoUnderscore; }, }, - State.FloatExponentUnsignedHex => switch (c) { - '+', '-' => { - state = State.FloatExponentNumberHex; + State.FloatExponentNumberNoUnderscore => switch (c) { + '0'...'9' => { + state = State.FloatExponentNumber; }, else => { - // reinterpret as a normal exponent number - self.index -= 1; - state = State.FloatExponentNumberHex; + result.id = Token.Id.Invalid; + break; }, }, State.FloatExponentNumber => switch (c) { + '_' => { + state = State.FloatExponentNumberNoUnderscore; + }, '0'...'9' => {}, - else => break, - }, - State.FloatExponentNumberHex => switch (c) { - '0'...'9', 'a'...'f', 'A'...'F' => {}, - else => break, + else => { + if (isIdentifierChar(c)) { + result.id = Token.Id.Invalid; + } + break; + }, }, } } else if (self.index == self.buffer.len) { switch (state) { State.Start, - State.IntegerLiteral, - State.IntegerLiteralWithRadix, - State.IntegerLiteralWithRadixHex, - State.FloatFraction, + State.IntegerLiteralDec, + State.IntegerLiteralBin, + State.IntegerLiteralOct, + State.IntegerLiteralHex, + State.NumberDotDec, + State.NumberDotHex, + State.FloatFractionDec, State.FloatFractionHex, State.FloatExponentNumber, - State.FloatExponentNumberHex, State.StringLiteral, // find this error later State.MultilineStringLiteralLine, State.Builtin, @@ -1184,10 +1331,14 @@ pub const Tokenizer = struct { result.id = Token.Id.ContainerDocComment; }, - State.NumberDot, - State.NumberDotHex, + State.IntegerLiteralDecNoUnderscore, + State.IntegerLiteralBinNoUnderscore, + State.IntegerLiteralOctNoUnderscore, + State.IntegerLiteralHexNoUnderscore, + State.FloatFractionDecNoUnderscore, + State.FloatFractionHexNoUnderscore, + State.FloatExponentNumberNoUnderscore, State.FloatExponentUnsigned, - State.FloatExponentUnsignedHex, State.SawAtSign, State.Backslash, State.CharLiteral, @@ -1585,6 +1736,236 @@ test "correctly parse pointer assignment" { }); } +test "tokenizer - number literals decimal" { + testTokenize("0", &[_]Token.Id{.IntegerLiteral}); + testTokenize("1", &[_]Token.Id{.IntegerLiteral}); + testTokenize("2", &[_]Token.Id{.IntegerLiteral}); + testTokenize("3", &[_]Token.Id{.IntegerLiteral}); + testTokenize("4", &[_]Token.Id{.IntegerLiteral}); + testTokenize("5", &[_]Token.Id{.IntegerLiteral}); + testTokenize("6", &[_]Token.Id{.IntegerLiteral}); + testTokenize("7", &[_]Token.Id{.IntegerLiteral}); + testTokenize("8", &[_]Token.Id{.IntegerLiteral}); + testTokenize("9", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0a", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("9b", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1z", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1z_1", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("9z3", &[_]Token.Id{ .Invalid, .Identifier }); + + testTokenize("0_0", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0001", &[_]Token.Id{.IntegerLiteral}); + testTokenize("01234567890", &[_]Token.Id{.IntegerLiteral}); + testTokenize("012_345_6789_0", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0_1_2_3_4_5_6_7_8_9_0", &[_]Token.Id{.IntegerLiteral}); + + testTokenize("00_", &[_]Token.Id{.Invalid}); + testTokenize("0_0_", &[_]Token.Id{.Invalid}); + testTokenize("0__0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0_0f", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0_0_f", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0_0_f_00", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1_,", &[_]Token.Id{ .Invalid, .Comma }); + + testTokenize("1.", &[_]Token.Id{.FloatLiteral}); + testTokenize("0.0", &[_]Token.Id{.FloatLiteral}); + testTokenize("1.0", &[_]Token.Id{.FloatLiteral}); + testTokenize("10.0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0e0", &[_]Token.Id{.FloatLiteral}); + testTokenize("1e0", &[_]Token.Id{.FloatLiteral}); + testTokenize("1e100", &[_]Token.Id{.FloatLiteral}); + testTokenize("1.e100", &[_]Token.Id{.FloatLiteral}); + testTokenize("1.0e100", &[_]Token.Id{.FloatLiteral}); + testTokenize("1.0e+100", &[_]Token.Id{.FloatLiteral}); + testTokenize("1.0e-100", &[_]Token.Id{.FloatLiteral}); + testTokenize("1_0_0_0.0_0_0_0_0_1e1_0_0_0", &[_]Token.Id{.FloatLiteral}); + testTokenize("1.+", &[_]Token.Id{ .FloatLiteral, .Plus }); + + testTokenize("1e", &[_]Token.Id{.Invalid}); + testTokenize("1.0e1f0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.0p100", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.0p-100", &[_]Token.Id{ .Invalid, .Identifier, .Minus, .IntegerLiteral }); + testTokenize("1.0p1f0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.0_,", &[_]Token.Id{ .Invalid, .Comma }); + testTokenize("1_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral }); + testTokenize("1._", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.a", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.z", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1._0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1._+", &[_]Token.Id{ .Invalid, .Identifier, .Plus }); + testTokenize("1._e", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.0e", &[_]Token.Id{.Invalid}); + testTokenize("1.0e,", &[_]Token.Id{ .Invalid, .Comma }); + testTokenize("1.0e_", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.0e+_", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.0e-_", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("1.0e0_+", &[_]Token.Id{ .Invalid, .Plus }); +} + +test "tokenizer - number literals binary" { + testTokenize("0b0", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0b1", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0b2", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0b3", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0b4", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0b5", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0b6", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0b7", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0b8", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0b9", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0ba", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0bb", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0bc", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0bd", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0be", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0bf", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0bz", &[_]Token.Id{ .Invalid, .Identifier }); + + testTokenize("0b0000_0000", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0b1111_1111", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0b10_10_10_10", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0b0_1_0_1_0_1_0_1", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0b1.", &[_]Token.Id{ .IntegerLiteral, .Period }); + testTokenize("0b1.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral }); + + testTokenize("0B0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b_", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b_0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b1_", &[_]Token.Id{.Invalid}); + testTokenize("0b0__1", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b0_1_", &[_]Token.Id{.Invalid}); + testTokenize("0b1e", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b1p", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b1e0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b1p0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0b1_,", &[_]Token.Id{ .Invalid, .Comma }); +} + +test "tokenizer - number literals octal" { + testTokenize("0o0", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o1", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o2", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o3", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o4", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o5", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o6", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o7", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o8", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0o9", &[_]Token.Id{ .Invalid, .IntegerLiteral }); + testTokenize("0oa", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0ob", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0oc", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0od", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0oe", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0of", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0oz", &[_]Token.Id{ .Invalid, .Identifier }); + + testTokenize("0o01234567", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o0123_4567", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o01_23_45_67", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o0_1_2_3_4_5_6_7", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0o7.", &[_]Token.Id{ .IntegerLiteral, .Period }); + testTokenize("0o7.0", &[_]Token.Id{ .IntegerLiteral, .Period, .IntegerLiteral }); + + testTokenize("0O0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o_", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o_0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o1_", &[_]Token.Id{.Invalid}); + testTokenize("0o0__1", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o0_1_", &[_]Token.Id{.Invalid}); + testTokenize("0o1e", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o1p", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o1e0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o1p0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0o_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma }); +} + +test "tokenizer - number literals hexadeciaml" { + testTokenize("0x0", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x1", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x2", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x3", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x4", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x5", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x6", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x7", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x8", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x9", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xa", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xb", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xc", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xd", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xe", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xf", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xA", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xB", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xC", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xD", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xE", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0xF", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x0z", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0xz", &[_]Token.Id{ .Invalid, .Identifier }); + + testTokenize("0x0123456789ABCDEF", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x0123_4567_89AB_CDEF", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x01_23_45_67_89AB_CDE_F", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x0_1_2_3_4_5_6_7_8_9_A_B_C_D_E_F", &[_]Token.Id{.IntegerLiteral}); + + testTokenize("0X0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x_", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x_1", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x1_", &[_]Token.Id{.Invalid}); + testTokenize("0x0__1", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0_1_", &[_]Token.Id{.Invalid}); + testTokenize("0x_,", &[_]Token.Id{ .Invalid, .Identifier, .Comma }); + + testTokenize("0x1.", &[_]Token.Id{.FloatLiteral}); + testTokenize("0x1.0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xF.", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xF.0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xF.F", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xF.Fp0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xF.FP0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0x1p0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xfp0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0x1.+0xF.", &[_]Token.Id{ .FloatLiteral, .Plus, .FloatLiteral }); + + testTokenize("0x0123456.789ABCDEF", &[_]Token.Id{.FloatLiteral}); + testTokenize("0x0_123_456.789_ABC_DEF", &[_]Token.Id{.FloatLiteral}); + testTokenize("0x0_1_2_3_4_5_6.7_8_9_A_B_C_D_E_F", &[_]Token.Id{.FloatLiteral}); + testTokenize("0x0p0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0x0.0p0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xff.ffp10", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xff.ffP10", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xff.p10", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xffp10", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xff_ff.ff_ffp1_0_0_0", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xf_f_f_f.f_f_f_fp+1_000", &[_]Token.Id{.FloatLiteral}); + testTokenize("0xf_f_f_f.f_f_f_fp-1_00_0", &[_]Token.Id{.FloatLiteral}); + + testTokenize("0x1e", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x1e0", &[_]Token.Id{.IntegerLiteral}); + testTokenize("0x1p", &[_]Token.Id{.Invalid}); + testTokenize("0xfp0z1", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0xff.ffpff", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0.p", &[_]Token.Id{.Invalid}); + testTokenize("0x0.z", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0._", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0_.0", &[_]Token.Id{ .Invalid, .Period, .IntegerLiteral }); + testTokenize("0x0_.0.0", &[_]Token.Id{ .Invalid, .Period, .FloatLiteral }); + testTokenize("0x0._0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0.0_", &[_]Token.Id{.Invalid}); + testTokenize("0x0_p0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0_.p0", &[_]Token.Id{ .Invalid, .Period, .Identifier }); + testTokenize("0x0._p0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0.0_p0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0._0p0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0.0p_0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0.0p+_0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0.0p-_0", &[_]Token.Id{ .Invalid, .Identifier }); + testTokenize("0x0.0p0_", &[_]Token.Id{ .Invalid, .Eof }); +} + fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void { var tokenizer = Tokenizer.init(source); for (expected_tokens) |expected_token_id| { diff --git a/src-self-hosted/ir.zig b/src-self-hosted/ir.zig index 7453f6fbd7..7ad9e0ccc4 100644 --- a/src-self-hosted/ir.zig +++ b/src-self-hosted/ir.zig @@ -1311,13 +1311,16 @@ pub const Builder = struct { var base: u8 = undefined; var rest: []const u8 = undefined; if (int_token.len >= 3 and int_token[0] == '0') { - base = switch (int_token[1]) { - 'b' => 2, - 'o' => 8, - 'x' => 16, - else => unreachable, - }; rest = int_token[2..]; + switch (int_token[1]) { + 'b' => base = 2, + 'o' => base = 8, + 'x' => base = 16, + else => { + base = 10; + rest = int_token; + }, + } } else { base = 10; rest = int_token; diff --git a/src/ir.cpp b/src/ir.cpp index 913cd69cfe..46d936619f 100644 --- a/src/ir.cpp +++ b/src/ir.cpp @@ -12786,7 +12786,7 @@ static IrInstGen *ir_resolve_ptr_of_array_to_slice(IrAnalyze *ira, IrInst* sourc result->value->type = wanted_type; return result; } - } else { + } else if (array_ptr_val->data.x_ptr.special != ConstPtrSpecialHardCodedAddr) { ZigValue *pointee = const_ptr_pointee(ira, ira->codegen, array_ptr_val, source_instr->source_node); if (pointee == nullptr) return ira->codegen->invalid_inst_gen; diff --git a/src/main.cpp b/src/main.cpp index 8aad9fa499..23a933d53b 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -613,10 +613,12 @@ static int main0(int argc, char **argv) { case Stage2ClangArgPositional: { Buf *arg_buf = buf_create_from_str(it.only_arg); if (buf_ends_with_str(arg_buf, ".c") || + buf_ends_with_str(arg_buf, ".C") || buf_ends_with_str(arg_buf, ".cc") || buf_ends_with_str(arg_buf, ".cpp") || buf_ends_with_str(arg_buf, ".cxx") || - buf_ends_with_str(arg_buf, ".s")) + buf_ends_with_str(arg_buf, ".s") || + buf_ends_with_str(arg_buf, ".S")) { CFile *c_file = heap::c_allocator.create<CFile>(); c_file->source_path = it.only_arg; @@ -748,6 +750,16 @@ static int main0(int argc, char **argv) { } Buf *rpath = linker_args.at(i); rpath_list.append(buf_ptr(rpath)); + } else if (buf_eql_str(arg, "-I") || + buf_eql_str(arg, "--dynamic-linker") || + buf_eql_str(arg, "-dynamic-linker")) + { + i += 1; + if (i >= linker_args.length) { + fprintf(stderr, "expected linker arg after '%s'\n", buf_ptr(arg)); + return EXIT_FAILURE; + } + dynamic_linker = buf_ptr(linker_args.at(i)); } else { fprintf(stderr, "warning: unsupported linker arg: %s\n", buf_ptr(arg)); } diff --git a/src/parse_f128.c b/src/parse_f128.c index cffb3796b4..9b5c287a3c 100644 --- a/src/parse_f128.c +++ b/src/parse_f128.c @@ -165,22 +165,36 @@ static long long scanexp(struct MuslFILE *f, int pok) int x; long long y; int neg = 0; - + c = shgetc(f); if (c=='+' || c=='-') { neg = (c=='-'); c = shgetc(f); if (c-'0'>=10U && pok) shunget(f); } - if (c-'0'>=10U) { + if (c-'0'>=10U && c!='_') { shunget(f); return LLONG_MIN; } - for (x=0; c-'0'<10U && x<INT_MAX/10; c = shgetc(f)) - x = 10*x + c-'0'; - for (y=x; c-'0'<10U && y<LLONG_MAX/100; c = shgetc(f)) - y = 10*y + c-'0'; - for (; c-'0'<10U; c = shgetc(f)); + for (x=0; ; c = shgetc(f)) { + if (c=='_') { + continue; + } else if (c-'0'<10U && x<INT_MAX/10) { + x = 10*x + c-'0'; + } else { + break; + } + } + for (y=x; ; c = shgetc(f)) { + if (c=='_') { + continue; + } else if (c-'0'<10U && y<LLONG_MAX/100) { + y = 10*y + c-'0'; + } else { + break; + } + } + for (; c-'0'<10U || c=='_'; c = shgetc(f)); shunget(f); return neg ? -y : y; } @@ -450,16 +464,36 @@ static float128_t decfloat(struct MuslFILE *f, int c, int bits, int emin, int si j=0; k=0; - /* Don't let leading zeros consume buffer space */ - for (; c=='0'; c = shgetc(f)) gotdig=1; + /* Don't let leading zeros/underscores consume buffer space */ + for (; ; c = shgetc(f)) { + if (c=='_') { + continue; + } else if (c=='0') { + gotdig=1; + } else { + break; + } + } + if (c=='.') { gotrad = 1; - for (c = shgetc(f); c=='0'; c = shgetc(f)) gotdig=1, lrp--; + for (c = shgetc(f); ; c = shgetc(f)) { + if (c == '_') { + continue; + } else if (c=='0') { + gotdig=1; + lrp--; + } else { + break; + } + } } x[0] = 0; - for (; c-'0'<10U || c=='.'; c = shgetc(f)) { - if (c == '.') { + for (; c-'0'<10U || c=='.' || c=='_'; c = shgetc(f)) { + if (c == '_') { + continue; + } else if (c == '.') { if (gotrad) break; gotrad = 1; lrp = dc; @@ -773,18 +807,29 @@ static float128_t hexfloat(struct MuslFILE *f, int bits, int emin, int sign, int c = shgetc(f); - /* Skip leading zeros */ - for (; c=='0'; c = shgetc(f)) gotdig = 1; + /* Skip leading zeros/underscores */ + for (; c=='0' || c=='_'; c = shgetc(f)) gotdig = 1; if (c=='.') { gotrad = 1; c = shgetc(f); /* Count zeros after the radix point before significand */ - for (rp=0; c=='0'; c = shgetc(f), rp--) gotdig = 1; + for (rp=0; ; c = shgetc(f)) { + if (c == '_') { + continue; + } else if (c == '0') { + gotdig = 1; + rp--; + } else { + break; + } + } } - for (; c-'0'<10U || (c|32)-'a'<6U || c=='.'; c = shgetc(f)) { - if (c=='.') { + for (; c-'0'<10U || (c|32)-'a'<6U || c=='.' || c=='_'; c = shgetc(f)) { + if (c=='_') { + continue; + } else if (c=='.') { if (gotrad) break; rp = dc; gotrad = 1; diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index d3390aef3c..22d63568bf 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -177,10 +177,13 @@ enum TokenizeState { TokenizeStateSymbol, TokenizeStateZero, // "0", which might lead to "0x" TokenizeStateNumber, // "123", "0x123" + TokenizeStateNumberNoUnderscore, // "12_", "0x12_" next char must be digit TokenizeStateNumberDot, TokenizeStateFloatFraction, // "123.456", "0x123.456" + TokenizeStateFloatFractionNoUnderscore, // "123.45_", "0x123.45_" TokenizeStateFloatExponentUnsigned, // "123.456e", "123e", "0x123p" - TokenizeStateFloatExponentNumber, // "123.456e-", "123.456e5", "123.456e5e-5" + TokenizeStateFloatExponentNumber, // "123.456e7", "123.456e+7", "123.456e-7" + TokenizeStateFloatExponentNumberNoUnderscore, // "123.456e7_", "123.456e+7_", "123.456e-7_" TokenizeStateString, TokenizeStateStringEscape, TokenizeStateStringEscapeUnicodeStart, @@ -233,14 +236,10 @@ struct Tokenize { Token *cur_tok; Tokenization *out; uint32_t radix; - int32_t exp_add_amt; - bool is_exp_negative; + bool is_trailing_underscore; size_t char_code_index; bool unicode; uint32_t char_code; - int exponent_in_bin_or_dec; - BigInt specified_exponent; - BigInt significand; size_t remaining_code_units; }; @@ -426,20 +425,16 @@ void tokenize(Buf *buf, Tokenization *out) { case '0': t.state = TokenizeStateZero; begin_token(&t, TokenIdIntLiteral); + t.is_trailing_underscore = false; t.radix = 10; - t.exp_add_amt = 1; - t.exponent_in_bin_or_dec = 0; bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, 0); - bigint_init_unsigned(&t.specified_exponent, 0); break; case DIGIT_NON_ZERO: t.state = TokenizeStateNumber; begin_token(&t, TokenIdIntLiteral); + t.is_trailing_underscore = false; t.radix = 10; - t.exp_add_amt = 1; - t.exponent_in_bin_or_dec = 0; bigint_init_unsigned(&t.cur_tok->data.int_lit.bigint, get_digit_value(c)); - bigint_init_unsigned(&t.specified_exponent, 0); break; case '"': begin_token(&t, TokenIdStringLiteral); @@ -1189,17 +1184,15 @@ void tokenize(Buf *buf, Tokenization *out) { switch (c) { case 'b': t.radix = 2; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; case 'o': t.radix = 8; - t.exp_add_amt = 3; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; case 'x': t.radix = 16; - t.exp_add_amt = 4; - t.state = TokenizeStateNumber; + t.state = TokenizeStateNumberNoUnderscore; break; default: // reinterpret as normal number @@ -1208,9 +1201,27 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } break; + case TokenizeStateNumberNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + break; + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateNumber; + } + // fall through case TokenizeStateNumber: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateNumberNoUnderscore; + break; + } if (c == '.') { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (t.radix != 16 && t.radix != 10) { invalid_char_error(&t, c); } @@ -1218,17 +1229,26 @@ void tokenize(Buf *buf, Tokenization *out) { break; } if (is_exponent_signifier(c, t.radix)) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (t.radix != 16 && t.radix != 10) { invalid_char_error(&t, c); } t.state = TokenizeStateFloatExponentUnsigned; + t.radix = 10; // exponent is always base 10 assert(t.cur_tok->id == TokenIdIntLiteral); - bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); break; } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } + if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1259,20 +1279,41 @@ void tokenize(Buf *buf, Tokenization *out) { continue; } t.pos -= 1; - t.state = TokenizeStateFloatFraction; + t.state = TokenizeStateFloatFractionNoUnderscore; assert(t.cur_tok->id == TokenIdIntLiteral); - bigint_init_bigint(&t.significand, &t.cur_tok->data.int_lit.bigint); set_token_id(&t, t.cur_tok, TokenIdFloatLiteral); continue; } + case TokenizeStateFloatFractionNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateFloatFraction; + } + // fall through case TokenizeStateFloatFraction: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateFloatFractionNoUnderscore; + break; + } if (is_exponent_signifier(c, t.radix)) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } t.state = TokenizeStateFloatExponentUnsigned; + t.radix = 10; // exponent is always base 10 break; } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1282,46 +1323,47 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; continue; } - t.exponent_in_bin_or_dec -= t.exp_add_amt; - if (t.radix == 10) { - // For now we use strtod to parse decimal floats, so we just have to get to the - // end of the token. - break; - } - BigInt digit_value_bi; - bigint_init_unsigned(&digit_value_bi, digit_value); - - BigInt radix_bi; - bigint_init_unsigned(&radix_bi, t.radix); - - BigInt multiplied; - bigint_mul(&multiplied, &t.significand, &radix_bi); - bigint_add(&t.significand, &multiplied, &digit_value_bi); - break; + // we use parse_f128 to generate the float literal, so just + // need to get to the end of the token } + break; case TokenizeStateFloatExponentUnsigned: switch (c) { case '+': - t.is_exp_negative = false; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; break; case '-': - t.is_exp_negative = true; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; break; default: // reinterpret as normal exponent number t.pos -= 1; - t.is_exp_negative = false; - t.state = TokenizeStateFloatExponentNumber; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; continue; } break; + case TokenizeStateFloatExponentNumberNoUnderscore: + if (c == '_') { + invalid_char_error(&t, c); + } else if (get_digit_value(c) < t.radix) { + t.is_trailing_underscore = false; + t.state = TokenizeStateFloatExponentNumber; + } + // fall through case TokenizeStateFloatExponentNumber: { + if (c == '_') { + t.is_trailing_underscore = true; + t.state = TokenizeStateFloatExponentNumberNoUnderscore; + break; + } uint32_t digit_value = get_digit_value(c); if (digit_value >= t.radix) { + if (t.is_trailing_underscore) { + invalid_char_error(&t, c); + break; + } if (is_symbol_char(c)) { invalid_char_error(&t, c); } @@ -1331,21 +1373,9 @@ void tokenize(Buf *buf, Tokenization *out) { t.state = TokenizeStateStart; continue; } - if (t.radix == 10) { - // For now we use strtod to parse decimal floats, so we just have to get to the - // end of the token. - break; - } - BigInt digit_value_bi; - bigint_init_unsigned(&digit_value_bi, digit_value); - - BigInt radix_bi; - bigint_init_unsigned(&radix_bi, 10); - - BigInt multiplied; - bigint_mul(&multiplied, &t.specified_exponent, &radix_bi); - bigint_add(&t.specified_exponent, &multiplied, &digit_value_bi); + // we use parse_f128 to generate the float literal, so just + // need to get to the end of the token } break; case TokenizeStateSawDash: @@ -1399,6 +1429,9 @@ void tokenize(Buf *buf, Tokenization *out) { case TokenizeStateStart: case TokenizeStateError: break; + case TokenizeStateNumberNoUnderscore: + case TokenizeStateFloatFractionNoUnderscore: + case TokenizeStateFloatExponentNumberNoUnderscore: case TokenizeStateNumberDot: tokenize_error(&t, "unterminated number literal"); break; diff --git a/test/compile_errors.zig b/test/compile_errors.zig index f19ff11471..d762f5bdc9 100644 --- a/test/compile_errors.zig +++ b/test/compile_errors.zig @@ -395,11 +395,163 @@ pub fn addCases(cases: *tests.CompileErrorContext) void { \\ var bad_float :f32 = 0.0; \\ bad_float = bad_float + .20; \\ std.debug.assert(bad_float < 1.0); - \\}) + \\} , &[_][]const u8{ "tmp.zig:5:29: error: invalid token: '.'", }); + cases.add("invalid exponent in float literal - 1", + \\fn main() void { + \\ var bad: f128 = 0x1.0p1ab1; + \\} + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: 'a'", + }); + + cases.add("invalid exponent in float literal - 2", + \\fn main() void { + \\ var bad: f128 = 0x1.0p50F; + \\} + , &[_][]const u8{ + "tmp.zig:2:29: error: invalid character: 'F'", + }); + + cases.add("invalid underscore placement in float literal - 1", + \\fn main() void { + \\ var bad: f128 = 0._0; + \\} + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 2", + \\fn main() void { + \\ var bad: f128 = 0_.0; + \\} + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '.'", + }); + + cases.add("invalid underscore placement in float literal - 3", + \\fn main() void { + \\ var bad: f128 = 0.0_; + \\} + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in float literal - 4", + \\fn main() void { + \\ var bad: f128 = 1.0e_1; + \\} + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 5", + \\fn main() void { + \\ var bad: f128 = 1.0e+_1; + \\} + , &[_][]const u8{ + "tmp.zig:2:26: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 6", + \\fn main() void { + \\ var bad: f128 = 1.0e-_1; + \\} + , &[_][]const u8{ + "tmp.zig:2:26: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 7", + \\fn main() void { + \\ var bad: f128 = 1.0e-1_; + \\} + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in float literal - 9", + \\fn main() void { + \\ var bad: f128 = 1__0.0e-1; + \\} + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 10", + \\fn main() void { + \\ var bad: f128 = 1.0__0e-1; + \\} + , &[_][]const u8{ + "tmp.zig:2:25: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 11", + \\fn main() void { + \\ var bad: f128 = 1.0e-1__0; + \\} + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 12", + \\fn main() void { + \\ var bad: f128 = 0_x0.0; + \\} + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: 'x'", + }); + + cases.add("invalid underscore placement in float literal - 13", + \\fn main() void { + \\ var bad: f128 = 0x_0.0; + \\} + , &[_][]const u8{ + "tmp.zig:2:23: error: invalid character: '_'", + }); + + cases.add("invalid underscore placement in float literal - 14", + \\fn main() void { + \\ var bad: f128 = 0x0.0_p1; + \\} + , &[_][]const u8{ + "tmp.zig:2:27: error: invalid character: 'p'", + }); + + cases.add("invalid underscore placement in int literal - 1", + \\fn main() void { + \\ var bad: u128 = 0010_; + \\} + , &[_][]const u8{ + "tmp.zig:2:26: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in int literal - 2", + \\fn main() void { + \\ var bad: u128 = 0b0010_; + \\} + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in int literal - 3", + \\fn main() void { + \\ var bad: u128 = 0o0010_; + \\} + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: ';'", + }); + + cases.add("invalid underscore placement in int literal - 4", + \\fn main() void { + \\ var bad: u128 = 0x0010_; + \\} + , &[_][]const u8{ + "tmp.zig:2:28: error: invalid character: ';'", + }); + cases.add("var args without c calling conv", \\fn foo(args: ...) void {} \\comptime { diff --git a/test/stage1/behavior/math.zig b/test/stage1/behavior/math.zig index b50e72840a..fd38baa53a 100644 --- a/test/stage1/behavior/math.zig +++ b/test/stage1/behavior/math.zig @@ -407,6 +407,34 @@ test "quad hex float literal parsing accurate" { comptime S.doTheTest(); } +test "underscore separator parsing" { + expect(0_0_0_0 == 0); + expect(1_234_567 == 1234567); + expect(001_234_567 == 1234567); + expect(0_0_1_2_3_4_5_6_7 == 1234567); + + expect(0b0_0_0_0 == 0); + expect(0b1010_1010 == 0b10101010); + expect(0b0000_1010_1010 == 0b10101010); + expect(0b1_0_1_0_1_0_1_0 == 0b10101010); + + expect(0o0_0_0_0 == 0); + expect(0o1010_1010 == 0o10101010); + expect(0o0000_1010_1010 == 0o10101010); + expect(0o1_0_1_0_1_0_1_0 == 0o10101010); + + expect(0x0_0_0_0 == 0); + expect(0x1010_1010 == 0x10101010); + expect(0x0000_1010_1010 == 0x10101010); + expect(0x1_0_1_0_1_0_1_0 == 0x10101010); + + expect(123_456.789_000e1_0 == 123456.789000e10); + expect(0_1_2_3_4_5_6.7_8_9_0_0_0e0_0_1_0 == 123456.789000e10); + + expect(0x1234_5678.9ABC_DEF0p-1_0 == 0x12345678.9ABCDEF0p-10); + expect(0x1_2_3_4_5_6_7_8.9_A_B_C_D_E_F_0p-0_0_0_1_0 == 0x12345678.9ABCDEF0p-10); +} + test "hex float literal within range" { const a = 0x1.0p16383; const b = 0x0.1p16387; diff --git a/test/stage1/behavior/slice.zig b/test/stage1/behavior/slice.zig index f7d6037a1f..e357ad2f0f 100644 --- a/test/stage1/behavior/slice.zig +++ b/test/stage1/behavior/slice.zig @@ -285,3 +285,17 @@ test "slice syntax resulting in pointer-to-array" { S.doTheTest(); comptime S.doTheTest(); } + +test "slice of hardcoded address to pointer" { + const S = struct { + fn doTheTest() void { + const pointer = @intToPtr([*]u8, 0x04)[0..2]; + comptime expect(@TypeOf(pointer) == *[2]u8); + const slice: []const u8 = pointer; + expect(@ptrToInt(slice.ptr) == 4); + expect(slice.len == 2); + } + }; + + S.doTheTest(); +} |
