diff options
| author | Andrew Kelley <superjoe30@gmail.com> | 2018-01-03 00:42:00 -0500 |
|---|---|---|
| committer | Andrew Kelley <superjoe30@gmail.com> | 2018-01-03 00:42:00 -0500 |
| commit | 1d77f8db289e6eefce827fe37d27e72b68362943 (patch) | |
| tree | 62bcadb5abcf45076583c362cb68e254067020f0 /std | |
| parent | 6bfaf262d5a1d18482a813c7022ffb03a18f52a8 (diff) | |
| parent | 0ea50b3157f00ab56b2752dfa8c70edb4bce2af7 (diff) | |
| download | zig-1d77f8db289e6eefce827fe37d27e72b68362943.tar.gz zig-1d77f8db289e6eefce827fe37d27e72b68362943.zip | |
Merge branch 'master' into llvm6
Diffstat (limited to 'std')
| -rw-r--r-- | std/fmt/index.zig | 103 | ||||
| -rw-r--r-- | std/index.zig | 2 | ||||
| -rw-r--r-- | std/io.zig | 7 | ||||
| -rw-r--r-- | std/math/acos.zig | 2 | ||||
| -rw-r--r-- | std/unicode.zig | 169 |
5 files changed, 280 insertions, 3 deletions
diff --git a/std/fmt/index.zig b/std/fmt/index.zig index 550fa1ce1f..432b43bfef 100644 --- a/std/fmt/index.zig +++ b/std/fmt/index.zig @@ -14,6 +14,8 @@ const State = enum { // TODO put inside format function and make sure the name a CloseBrace, Integer, IntegerWidth, + Float, + FloatWidth, Character, Buf, BufWidth, @@ -37,7 +39,6 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void, switch (state) { State.Start => switch (c) { '{' => { - // TODO if you make this an if statement with `and` then it breaks if (start_index < i) { %return output(context, fmt[start_index..i]); } @@ -85,6 +86,8 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void, }, 's' => { state = State.Buf; + },'.' => { + state = State.Float; }, else => @compileError("Unknown format character: " ++ []u8{c}), }, @@ -129,6 +132,30 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void, '0' ... '9' => {}, else => @compileError("Unexpected character in format string: " ++ []u8{c}), }, + State.Float => switch (c) { + '}' => { + %return formatFloatDecimal(args[next_arg], 0, context, output); + next_arg += 1; + state = State.Start; + start_index = i + 1; + }, + '0' ... '9' => { + width_start = i; + state = State.FloatWidth; + }, + else => @compileError("Unexpected character in format string: " ++ []u8{c}), + }, + State.FloatWidth => switch (c) { + '}' => { + width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10); + %return formatFloatDecimal(args[next_arg], width, context, output); + next_arg += 1; + state = State.Start; + start_index = i + 1; + }, + '0' ... '9' => {}, + else => @compileError("Unexpected character in format string: " ++ []u8{c}), + }, State.BufWidth => switch (c) { '}' => { width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10); @@ -267,6 +294,47 @@ pub fn formatFloat(value: var, context: var, output: fn(@typeOf(context), []cons } } +pub fn formatFloatDecimal(value: var, precision: usize, context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void { + var x = f64(value); + + // Errol doesn't handle these special cases. + if (math.isNan(x)) { + return output(context, "NaN"); + } + if (math.signbit(x)) { + %return output(context, "-"); + x = -x; + } + if (math.isPositiveInf(x)) { + return output(context, "Infinity"); + } + if (x == 0.0) { + return output(context, "0.0"); + } + + var buffer: [32]u8 = undefined; + const float_decimal = errol3(x, buffer[0..]); + + const num_left_digits = if (float_decimal.exp > 0) usize(float_decimal.exp) else 1; + + %return output(context, float_decimal.digits[0 .. num_left_digits]); + %return output(context, "."); + if (float_decimal.digits.len > 1) { + const num_valid_digtis = if (@typeOf(value) == f32) math.min(usize(7), float_decimal.digits.len) + else + float_decimal.digits.len; + + const num_right_digits = if (precision != 0) + math.min(precision, (num_valid_digtis-num_left_digits)) + else + num_valid_digtis - num_left_digits; + %return output(context, float_decimal.digits[num_left_digits .. (num_left_digits + num_right_digits)]); + } else { + %return output(context, "0"); + } +} + + pub fn formatInt(value: var, base: u8, uppercase: bool, width: usize, context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void { @@ -540,6 +608,39 @@ test "fmt.format" { const result = %%bufPrint(buf1[0..], "f64: {}\n", -math.inf_f64); assert(mem.eql(u8, result, "f64: -Infinity\n")); } + { + var buf1: [32]u8 = undefined; + const value: f32 = 1.1234; + const result = %%bufPrint(buf1[0..], "f32: {.1}\n", value); + assert(mem.eql(u8, result, "f32: 1.1\n")); + } + { + var buf1: [32]u8 = undefined; + const value: f32 = 1234.567; + const result = %%bufPrint(buf1[0..], "f32: {.2}\n", value); + assert(mem.eql(u8, result, "f32: 1234.56\n")); + } + { + var buf1: [32]u8 = undefined; + const value: f32 = -11.1234; + const result = %%bufPrint(buf1[0..], "f32: {.4}\n", value); + // -11.1234 is converted to f64 -11.12339... internally (errol3() function takes f64). + // -11.12339... is truncated to -11.1233 + assert(mem.eql(u8, result, "f32: -11.1233\n")); + } + { + var buf1: [32]u8 = undefined; + const value: f32 = 91.12345; + const result = %%bufPrint(buf1[0..], "f32: {.}\n", value); + assert(mem.eql(u8, result, "f32: 91.12345\n")); + } + { + var buf1: [32]u8 = undefined; + const value: f64 = 91.12345678901235; + const result = %%bufPrint(buf1[0..], "f64: {.10}\n", value); + assert(mem.eql(u8, result, "f64: 91.1234567890\n")); + } + } } diff --git a/std/index.zig b/std/index.zig index 07da469b5e..a9a0038e60 100644 --- a/std/index.zig +++ b/std/index.zig @@ -25,6 +25,7 @@ pub const net = @import("net.zig"); pub const os = @import("os/index.zig"); pub const rand = @import("rand.zig"); pub const sort = @import("sort.zig"); +pub const unicode = @import("unicode.zig"); test "std" { // run tests from these @@ -53,4 +54,5 @@ test "std" { _ = @import("os/index.zig"); _ = @import("rand.zig"); _ = @import("sort.zig"); + _ = @import("unicode.zig"); } diff --git a/std/io.zig b/std/io.zig index cbf2e0c216..44e5634ae0 100644 --- a/std/io.zig +++ b/std/io.zig @@ -500,11 +500,16 @@ pub fn writeFile(path: []const u8, data: []const u8, allocator: ?&mem.Allocator) /// On success, caller owns returned buffer. pub fn readFileAlloc(path: []const u8, allocator: &mem.Allocator) -> %[]u8 { + return readFileAllocExtra(path, allocator, 0); +} +/// On success, caller owns returned buffer. +/// Allocates extra_len extra bytes at the end of the file buffer, which are uninitialized. +pub fn readFileAllocExtra(path: []const u8, allocator: &mem.Allocator, extra_len: usize) -> %[]u8 { var file = %return File.openRead(path, allocator); defer file.close(); const size = %return file.getEndPos(); - const buf = %return allocator.alloc(u8, size); + const buf = %return allocator.alloc(u8, size + extra_len); %defer allocator.free(buf); var adapter = FileInStream.init(&file); diff --git a/std/math/acos.zig b/std/math/acos.zig index 478d5a846d..8adce39bff 100644 --- a/std/math/acos.zig +++ b/std/math/acos.zig @@ -39,7 +39,7 @@ fn acos32(x: f32) -> f32 { if (hx >> 31 != 0) { return 2.0 * pio2_hi + 0x1.0p-120; } else { - return 0; + return 0.0; } } else { return math.nan(f32); diff --git a/std/unicode.zig b/std/unicode.zig new file mode 100644 index 0000000000..6c06eeb73a --- /dev/null +++ b/std/unicode.zig @@ -0,0 +1,169 @@ +const std = @import("./index.zig"); + +error Utf8InvalidStartByte; + +/// Given the first byte of a UTF-8 codepoint, +/// returns a number 1-4 indicating the total length of the codepoint in bytes. +/// If this byte does not match the form of a UTF-8 start byte, returns Utf8InvalidStartByte. +pub fn utf8ByteSequenceLength(first_byte: u8) -> %u3 { + if (first_byte < 0b10000000) return u3(1); + if (first_byte & 0b11100000 == 0b11000000) return u3(2); + if (first_byte & 0b11110000 == 0b11100000) return u3(3); + if (first_byte & 0b11111000 == 0b11110000) return u3(4); + return error.Utf8InvalidStartByte; +} + +error Utf8OverlongEncoding; +error Utf8ExpectedContinuation; +error Utf8EncodesSurrogateHalf; +error Utf8CodepointTooLarge; + +/// Decodes the UTF-8 codepoint encoded in the given slice of bytes. +/// bytes.len must be equal to %%utf8ByteSequenceLength(bytes[0]). +/// If you already know the length at comptime, you can call one of +/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function. +pub fn utf8Decode(bytes: []const u8) -> %u32 { + return switch (bytes.len) { + 1 => u32(bytes[0]), + 2 => utf8Decode2(bytes), + 3 => utf8Decode3(bytes), + 4 => utf8Decode4(bytes), + else => unreachable, + }; +} +pub fn utf8Decode2(bytes: []const u8) -> %u32 { + std.debug.assert(bytes.len == 2); + std.debug.assert(bytes[0] & 0b11100000 == 0b11000000); + var value: u32 = bytes[0] & 0b00011111; + + if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; + value <<= 6; + value |= bytes[1] & 0b00111111; + + if (value < 0x80) return error.Utf8OverlongEncoding; + + return value; +} +pub fn utf8Decode3(bytes: []const u8) -> %u32 { + std.debug.assert(bytes.len == 3); + std.debug.assert(bytes[0] & 0b11110000 == 0b11100000); + var value: u32 = bytes[0] & 0b00001111; + + if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; + value <<= 6; + value |= bytes[1] & 0b00111111; + + if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; + value <<= 6; + value |= bytes[2] & 0b00111111; + + if (value < 0x800) return error.Utf8OverlongEncoding; + if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf; + + return value; +} +pub fn utf8Decode4(bytes: []const u8) -> %u32 { + std.debug.assert(bytes.len == 4); + std.debug.assert(bytes[0] & 0b11111000 == 0b11110000); + var value: u32 = bytes[0] & 0b00000111; + + if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; + value <<= 6; + value |= bytes[1] & 0b00111111; + + if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; + value <<= 6; + value |= bytes[2] & 0b00111111; + + if (bytes[3] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation; + value <<= 6; + value |= bytes[3] & 0b00111111; + + if (value < 0x10000) return error.Utf8OverlongEncoding; + if (value > 0x10FFFF) return error.Utf8CodepointTooLarge; + + return value; +} + +error UnexpectedEof; +test "valid utf8" { + testValid("\x00", 0x0); + testValid("\x20", 0x20); + testValid("\x7f", 0x7f); + testValid("\xc2\x80", 0x80); + testValid("\xdf\xbf", 0x7ff); + testValid("\xe0\xa0\x80", 0x800); + testValid("\xe1\x80\x80", 0x1000); + testValid("\xef\xbf\xbf", 0xffff); + testValid("\xf0\x90\x80\x80", 0x10000); + testValid("\xf1\x80\x80\x80", 0x40000); + testValid("\xf3\xbf\xbf\xbf", 0xfffff); + testValid("\xf4\x8f\xbf\xbf", 0x10ffff); +} + +test "invalid utf8 continuation bytes" { + // unexpected continuation + testError("\x80", error.Utf8InvalidStartByte); + testError("\xbf", error.Utf8InvalidStartByte); + // too many leading 1's + testError("\xf8", error.Utf8InvalidStartByte); + testError("\xff", error.Utf8InvalidStartByte); + // expected continuation for 2 byte sequences + testError("\xc2", error.UnexpectedEof); + testError("\xc2\x00", error.Utf8ExpectedContinuation); + testError("\xc2\xc0", error.Utf8ExpectedContinuation); + // expected continuation for 3 byte sequences + testError("\xe0", error.UnexpectedEof); + testError("\xe0\x00", error.UnexpectedEof); + testError("\xe0\xc0", error.UnexpectedEof); + testError("\xe0\xa0", error.UnexpectedEof); + testError("\xe0\xa0\x00", error.Utf8ExpectedContinuation); + testError("\xe0\xa0\xc0", error.Utf8ExpectedContinuation); + // expected continuation for 4 byte sequences + testError("\xf0", error.UnexpectedEof); + testError("\xf0\x00", error.UnexpectedEof); + testError("\xf0\xc0", error.UnexpectedEof); + testError("\xf0\x90\x00", error.UnexpectedEof); + testError("\xf0\x90\xc0", error.UnexpectedEof); + testError("\xf0\x90\x80\x00", error.Utf8ExpectedContinuation); + testError("\xf0\x90\x80\xc0", error.Utf8ExpectedContinuation); +} + +test "overlong utf8 codepoint" { + testError("\xc0\x80", error.Utf8OverlongEncoding); + testError("\xc1\xbf", error.Utf8OverlongEncoding); + testError("\xe0\x80\x80", error.Utf8OverlongEncoding); + testError("\xe0\x9f\xbf", error.Utf8OverlongEncoding); + testError("\xf0\x80\x80\x80", error.Utf8OverlongEncoding); + testError("\xf0\x8f\xbf\xbf", error.Utf8OverlongEncoding); +} + +test "misc invalid utf8" { + // codepoint out of bounds + testError("\xf4\x90\x80\x80", error.Utf8CodepointTooLarge); + testError("\xf7\xbf\xbf\xbf", error.Utf8CodepointTooLarge); + // surrogate halves + testValid("\xed\x9f\xbf", 0xd7ff); + testError("\xed\xa0\x80", error.Utf8EncodesSurrogateHalf); + testError("\xed\xbf\xbf", error.Utf8EncodesSurrogateHalf); + testValid("\xee\x80\x80", 0xe000); +} + +fn testError(bytes: []const u8, expected_err: error) { + if (testDecode(bytes)) |_| { + unreachable; + } else |err| { + std.debug.assert(err == expected_err); + } +} + +fn testValid(bytes: []const u8, expected_codepoint: u32) { + std.debug.assert(%%testDecode(bytes) == expected_codepoint); +} + +fn testDecode(bytes: []const u8) -> %u32 { + const length = %return utf8ByteSequenceLength(bytes[0]); + if (bytes.len < length) return error.UnexpectedEof; + std.debug.assert(bytes.len == length); + return utf8Decode(bytes); +} |
