Merge branch 'master' into llvm6

author: Andrew Kelley <superjoe30@gmail.com> 2018-01-03 00:42:00 -0500
committer: Andrew Kelley <superjoe30@gmail.com> 2018-01-03 00:42:00 -0500
commit: 1d77f8db289e6eefce827fe37d27e72b68362943 (patch)
tree: 62bcadb5abcf45076583c362cb68e254067020f0 /std
parent: 6bfaf262d5a1d18482a813c7022ffb03a18f52a8 (diff)
parent: 0ea50b3157f00ab56b2752dfa8c70edb4bce2af7 (diff)
download: zig-1d77f8db289e6eefce827fe37d27e72b68362943.tar.gz
zig-1d77f8db289e6eefce827fe37d27e72b68362943.zip
5 files changed, 280 insertions, 3 deletions
diff --git a/std/fmt/index.zig b/std/fmt/index.zig
index 550fa1ce1f..432b43bfef 100644
--- a/std/fmt/index.zig
+++ b/std/fmt/index.zig
@@ -14,6 +14,8 @@ const State = enum { // TODO put inside format function and make sure the name a
     CloseBrace,
     Integer,
     IntegerWidth,
+    Float,
+    FloatWidth,
     Character,
     Buf,
     BufWidth,
@@ -37,7 +39,6 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
         switch (state) {
             State.Start => switch (c) {
                 '{' => {
-                    // TODO if you make this an if statement with `and` then it breaks
                     if (start_index < i) {
                         %return output(context, fmt[start_index..i]);
                     }
@@ -85,6 +86,8 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
                 },
                 's' => {
                     state = State.Buf;
+                },'.' => {
+                    state = State.Float;
                 },
                 else => @compileError("Unknown format character: " ++ []u8{c}),
             },
@@ -129,6 +132,30 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
                 '0' ... '9' => {},
                 else => @compileError("Unexpected character in format string: " ++ []u8{c}),
             },
+            State.Float => switch (c) {
+                '}' => {
+                    %return formatFloatDecimal(args[next_arg], 0, context, output);
+                    next_arg += 1;
+                    state = State.Start;
+                    start_index = i + 1;
+                },
+                '0' ... '9' => {
+                    width_start = i;
+                    state = State.FloatWidth;
+                },
+                else => @compileError("Unexpected character in format string: " ++ []u8{c}),
+            },
+            State.FloatWidth => switch (c) {
+                '}' => {
+                    width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10);
+                    %return formatFloatDecimal(args[next_arg], width, context, output);
+                    next_arg += 1;
+                    state = State.Start;
+                    start_index = i + 1;
+                },
+                '0' ... '9' => {},
+                else => @compileError("Unexpected character in format string: " ++ []u8{c}),
+            },
             State.BufWidth => switch (c) {
                 '}' => {
                     width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10);
@@ -267,6 +294,47 @@ pub fn formatFloat(value: var, context: var, output: fn(@typeOf(context), []cons
     }
 }
 
+pub fn formatFloatDecimal(value: var, precision: usize, context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void {
+    var x = f64(value);
+
+    // Errol doesn't handle these special cases.
+    if (math.isNan(x)) {
+        return output(context, "NaN");
+    }
+    if (math.signbit(x)) {
+        %return output(context, "-");
+        x = -x;
+    }
+    if (math.isPositiveInf(x)) {
+        return output(context, "Infinity");
+    }
+    if (x == 0.0) {
+        return output(context, "0.0");
+    }
+
+    var buffer: [32]u8 = undefined;
+    const float_decimal = errol3(x, buffer[0..]);
+
+    const num_left_digits = if (float_decimal.exp > 0) usize(float_decimal.exp) else 1;
+
+    %return output(context, float_decimal.digits[0 .. num_left_digits]);
+    %return output(context, ".");
+    if (float_decimal.digits.len > 1) {
+        const num_valid_digtis = if (@typeOf(value) == f32)  math.min(usize(7), float_decimal.digits.len)
+        else
+            float_decimal.digits.len;
+
+        const num_right_digits = if (precision != 0)
+            math.min(precision, (num_valid_digtis-num_left_digits))
+        else
+            num_valid_digtis - num_left_digits;
+        %return output(context, float_decimal.digits[num_left_digits .. (num_left_digits + num_right_digits)]);
+    } else {
+        %return output(context, "0");
+    }
+}
+
+
 pub fn formatInt(value: var, base: u8, uppercase: bool, width: usize,
     context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void
 {
@@ -540,6 +608,39 @@ test "fmt.format" {
             const result = %%bufPrint(buf1[0..], "f64: {}\n", -math.inf_f64);
             assert(mem.eql(u8, result, "f64: -Infinity\n"));
         }
+        {
+            var buf1: [32]u8 = undefined;
+            const value: f32 = 1.1234;
+            const result = %%bufPrint(buf1[0..], "f32: {.1}\n", value);
+            assert(mem.eql(u8, result, "f32: 1.1\n"));
+        }
+        {
+            var buf1: [32]u8 = undefined;
+            const value: f32 = 1234.567;
+            const result = %%bufPrint(buf1[0..], "f32: {.2}\n", value);
+            assert(mem.eql(u8, result, "f32: 1234.56\n"));
+        }
+        {
+            var buf1: [32]u8 = undefined;
+            const value: f32 = -11.1234;
+            const result = %%bufPrint(buf1[0..], "f32: {.4}\n", value);
+            // -11.1234 is converted to f64 -11.12339... internally (errol3() function takes f64).
+            // -11.12339... is truncated to -11.1233
+            assert(mem.eql(u8, result, "f32: -11.1233\n"));
+        }
+        {
+            var buf1: [32]u8 = undefined;
+            const value: f32 = 91.12345;
+            const result = %%bufPrint(buf1[0..], "f32: {.}\n", value);
+            assert(mem.eql(u8, result, "f32: 91.12345\n"));
+        }
+        {
+            var buf1: [32]u8 = undefined;
+            const value: f64 = 91.12345678901235;
+            const result = %%bufPrint(buf1[0..], "f64: {.10}\n", value);
+            assert(mem.eql(u8, result, "f64: 91.1234567890\n"));
+        }
+
     }
 }
 
diff --git a/std/index.zig b/std/index.zig
index 07da469b5e..a9a0038e60 100644
--- a/std/index.zig
+++ b/std/index.zig
@@ -25,6 +25,7 @@ pub const net = @import("net.zig");
 pub const os = @import("os/index.zig");
 pub const rand = @import("rand.zig");
 pub const sort = @import("sort.zig");
+pub const unicode = @import("unicode.zig");
 
 test "std" {
     // run tests from these
@@ -53,4 +54,5 @@ test "std" {
     _ = @import("os/index.zig");
     _ = @import("rand.zig");
     _ = @import("sort.zig");
+    _ = @import("unicode.zig");
 }
diff --git a/std/io.zig b/std/io.zig
index cbf2e0c216..44e5634ae0 100644
--- a/std/io.zig
+++ b/std/io.zig
@@ -500,11 +500,16 @@ pub fn writeFile(path: []const u8, data: []const u8, allocator: ?&mem.Allocator)
 
 /// On success, caller owns returned buffer.
 pub fn readFileAlloc(path: []const u8, allocator: &mem.Allocator) -> %[]u8 {
+    return readFileAllocExtra(path, allocator, 0);
+}
+/// On success, caller owns returned buffer.
+/// Allocates extra_len extra bytes at the end of the file buffer, which are uninitialized.
+pub fn readFileAllocExtra(path: []const u8, allocator: &mem.Allocator, extra_len: usize) -> %[]u8 {
     var file = %return File.openRead(path, allocator);
     defer file.close();
 
     const size = %return file.getEndPos();
-    const buf = %return allocator.alloc(u8, size);
+    const buf = %return allocator.alloc(u8, size + extra_len);
     %defer allocator.free(buf);
 
     var adapter = FileInStream.init(&file);
diff --git a/std/math/acos.zig b/std/math/acos.zig
index 478d5a846d..8adce39bff 100644
--- a/std/math/acos.zig
+++ b/std/math/acos.zig
@@ -39,7 +39,7 @@ fn acos32(x: f32) -> f32 {
             if (hx >> 31 != 0) {
                 return 2.0 * pio2_hi + 0x1.0p-120;
             } else {
-                return 0;
+                return 0.0;
             }
         } else {
             return math.nan(f32);
diff --git a/std/unicode.zig b/std/unicode.zig
new file mode 100644
index 0000000000..6c06eeb73a
--- /dev/null
+++ b/std/unicode.zig
@@ -0,0 +1,169 @@
+const std = @import("./index.zig");
+
+error Utf8InvalidStartByte;
+
+/// Given the first byte of a UTF-8 codepoint,
+/// returns a number 1-4 indicating the total length of the codepoint in bytes.
+/// If this byte does not match the form of a UTF-8 start byte, returns Utf8InvalidStartByte.
+pub fn utf8ByteSequenceLength(first_byte: u8) -> %u3 {
+    if (first_byte < 0b10000000) return u3(1);
+    if (first_byte & 0b11100000 == 0b11000000) return u3(2);
+    if (first_byte & 0b11110000 == 0b11100000) return u3(3);
+    if (first_byte & 0b11111000 == 0b11110000) return u3(4);
+    return error.Utf8InvalidStartByte;
+}
+
+error Utf8OverlongEncoding;
+error Utf8ExpectedContinuation;
+error Utf8EncodesSurrogateHalf;
+error Utf8CodepointTooLarge;
+
+/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
+/// bytes.len must be equal to %%utf8ByteSequenceLength(bytes[0]).
+/// If you already know the length at comptime, you can call one of
+/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
+pub fn utf8Decode(bytes: []const u8) -> %u32 {
+    return switch (bytes.len) {
+        1 => u32(bytes[0]),
+        2 => utf8Decode2(bytes),
+        3 => utf8Decode3(bytes),
+        4 => utf8Decode4(bytes),
+        else => unreachable,
+    };
+}
+pub fn utf8Decode2(bytes: []const u8) -> %u32 {
+    std.debug.assert(bytes.len == 2);
+    std.debug.assert(bytes[0] & 0b11100000 == 0b11000000);
+    var value: u32 = bytes[0] & 0b00011111;
+
+    if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+    value <<= 6;
+    value |= bytes[1] & 0b00111111;
+
+    if (value < 0x80) return error.Utf8OverlongEncoding;
+
+    return value;
+}
+pub fn utf8Decode3(bytes: []const u8) -> %u32 {
+    std.debug.assert(bytes.len == 3);
+    std.debug.assert(bytes[0] & 0b11110000 == 0b11100000);
+    var value: u32 = bytes[0] & 0b00001111;
+
+    if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+    value <<= 6;
+    value |= bytes[1] & 0b00111111;
+
+    if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+    value <<= 6;
+    value |= bytes[2] & 0b00111111;
+
+    if (value < 0x800) return error.Utf8OverlongEncoding;
+    if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
+
+    return value;
+}
+pub fn utf8Decode4(bytes: []const u8) -> %u32 {
+    std.debug.assert(bytes.len == 4);
+    std.debug.assert(bytes[0] & 0b11111000 == 0b11110000);
+    var value: u32 = bytes[0] & 0b00000111;
+
+    if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+    value <<= 6;
+    value |= bytes[1] & 0b00111111;
+
+    if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+    value <<= 6;
+    value |= bytes[2] & 0b00111111;
+
+    if (bytes[3] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+    value <<= 6;
+    value |= bytes[3] & 0b00111111;
+
+    if (value < 0x10000) return error.Utf8OverlongEncoding;
+    if (value > 0x10FFFF) return error.Utf8CodepointTooLarge;
+
+    return value;
+}
+
+error UnexpectedEof;
+test "valid utf8" {
+    testValid("\x00", 0x0);
+    testValid("\x20", 0x20);
+    testValid("\x7f", 0x7f);
+    testValid("\xc2\x80", 0x80);
+    testValid("\xdf\xbf", 0x7ff);
+    testValid("\xe0\xa0\x80", 0x800);
+    testValid("\xe1\x80\x80", 0x1000);
+    testValid("\xef\xbf\xbf", 0xffff);
+    testValid("\xf0\x90\x80\x80", 0x10000);
+    testValid("\xf1\x80\x80\x80", 0x40000);
+    testValid("\xf3\xbf\xbf\xbf", 0xfffff);
+    testValid("\xf4\x8f\xbf\xbf", 0x10ffff);
+}
+
+test "invalid utf8 continuation bytes" {
+    // unexpected continuation
+    testError("\x80", error.Utf8InvalidStartByte);
+    testError("\xbf", error.Utf8InvalidStartByte);
+    // too many leading 1's
+    testError("\xf8", error.Utf8InvalidStartByte);
+    testError("\xff", error.Utf8InvalidStartByte);
+    // expected continuation for 2 byte sequences
+    testError("\xc2", error.UnexpectedEof);
+    testError("\xc2\x00", error.Utf8ExpectedContinuation);
+    testError("\xc2\xc0", error.Utf8ExpectedContinuation);
+    // expected continuation for 3 byte sequences
+    testError("\xe0", error.UnexpectedEof);
+    testError("\xe0\x00", error.UnexpectedEof);
+    testError("\xe0\xc0", error.UnexpectedEof);
+    testError("\xe0\xa0", error.UnexpectedEof);
+    testError("\xe0\xa0\x00", error.Utf8ExpectedContinuation);
+    testError("\xe0\xa0\xc0", error.Utf8ExpectedContinuation);
+    // expected continuation for 4 byte sequences
+    testError("\xf0", error.UnexpectedEof);
+    testError("\xf0\x00", error.UnexpectedEof);
+    testError("\xf0\xc0", error.UnexpectedEof);
+    testError("\xf0\x90\x00", error.UnexpectedEof);
+    testError("\xf0\x90\xc0", error.UnexpectedEof);
+    testError("\xf0\x90\x80\x00", error.Utf8ExpectedContinuation);
+    testError("\xf0\x90\x80\xc0", error.Utf8ExpectedContinuation);
+}
+
+test "overlong utf8 codepoint" {
+    testError("\xc0\x80", error.Utf8OverlongEncoding);
+    testError("\xc1\xbf", error.Utf8OverlongEncoding);
+    testError("\xe0\x80\x80", error.Utf8OverlongEncoding);
+    testError("\xe0\x9f\xbf", error.Utf8OverlongEncoding);
+    testError("\xf0\x80\x80\x80", error.Utf8OverlongEncoding);
+    testError("\xf0\x8f\xbf\xbf", error.Utf8OverlongEncoding);
+}
+
+test "misc invalid utf8" {
+    // codepoint out of bounds
+    testError("\xf4\x90\x80\x80", error.Utf8CodepointTooLarge);
+    testError("\xf7\xbf\xbf\xbf", error.Utf8CodepointTooLarge);
+    // surrogate halves
+    testValid("\xed\x9f\xbf", 0xd7ff);
+    testError("\xed\xa0\x80", error.Utf8EncodesSurrogateHalf);
+    testError("\xed\xbf\xbf", error.Utf8EncodesSurrogateHalf);
+    testValid("\xee\x80\x80", 0xe000);
+}
+
+fn testError(bytes: []const u8, expected_err: error) {
+    if (testDecode(bytes)) |_| {
+        unreachable;
+    } else |err| {
+        std.debug.assert(err == expected_err);
+    }
+}
+
+fn testValid(bytes: []const u8, expected_codepoint: u32) {
+    std.debug.assert(%%testDecode(bytes) == expected_codepoint);
+}
+
+fn testDecode(bytes: []const u8) -> %u32 {
+    const length = %return utf8ByteSequenceLength(bytes[0]);
+    if (bytes.len < length) return error.UnexpectedEof;
+    std.debug.assert(bytes.len == length);
+    return utf8Decode(bytes);
+}
author	Andrew Kelley <superjoe30@gmail.com>	2018-01-03 00:42:00 -0500
committer	Andrew Kelley <superjoe30@gmail.com>	2018-01-03 00:42:00 -0500
commit	1d77f8db289e6eefce827fe37d27e72b68362943 (patch)
tree	62bcadb5abcf45076583c362cb68e254067020f0 /std
parent	6bfaf262d5a1d18482a813c7022ffb03a18f52a8 (diff)
parent	0ea50b3157f00ab56b2752dfa8c70edb4bce2af7 (diff)
download	zig-1d77f8db289e6eefce827fe37d27e72b68362943.tar.gz zig-1d77f8db289e6eefce827fe37d27e72b68362943.zip