aboutsummaryrefslogtreecommitdiff
path: root/std
diff options
context:
space:
mode:
authorAndrew Kelley <superjoe30@gmail.com>2018-01-03 00:42:00 -0500
committerAndrew Kelley <superjoe30@gmail.com>2018-01-03 00:42:00 -0500
commit1d77f8db289e6eefce827fe37d27e72b68362943 (patch)
tree62bcadb5abcf45076583c362cb68e254067020f0 /std
parent6bfaf262d5a1d18482a813c7022ffb03a18f52a8 (diff)
parent0ea50b3157f00ab56b2752dfa8c70edb4bce2af7 (diff)
downloadzig-1d77f8db289e6eefce827fe37d27e72b68362943.tar.gz
zig-1d77f8db289e6eefce827fe37d27e72b68362943.zip
Merge branch 'master' into llvm6
Diffstat (limited to 'std')
-rw-r--r--std/fmt/index.zig103
-rw-r--r--std/index.zig2
-rw-r--r--std/io.zig7
-rw-r--r--std/math/acos.zig2
-rw-r--r--std/unicode.zig169
5 files changed, 280 insertions, 3 deletions
diff --git a/std/fmt/index.zig b/std/fmt/index.zig
index 550fa1ce1f..432b43bfef 100644
--- a/std/fmt/index.zig
+++ b/std/fmt/index.zig
@@ -14,6 +14,8 @@ const State = enum { // TODO put inside format function and make sure the name a
CloseBrace,
Integer,
IntegerWidth,
+ Float,
+ FloatWidth,
Character,
Buf,
BufWidth,
@@ -37,7 +39,6 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
switch (state) {
State.Start => switch (c) {
'{' => {
- // TODO if you make this an if statement with `and` then it breaks
if (start_index < i) {
%return output(context, fmt[start_index..i]);
}
@@ -85,6 +86,8 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
},
's' => {
state = State.Buf;
+ },'.' => {
+ state = State.Float;
},
else => @compileError("Unknown format character: " ++ []u8{c}),
},
@@ -129,6 +132,30 @@ pub fn format(context: var, output: fn(@typeOf(context), []const u8)->%void,
'0' ... '9' => {},
else => @compileError("Unexpected character in format string: " ++ []u8{c}),
},
+ State.Float => switch (c) {
+ '}' => {
+ %return formatFloatDecimal(args[next_arg], 0, context, output);
+ next_arg += 1;
+ state = State.Start;
+ start_index = i + 1;
+ },
+ '0' ... '9' => {
+ width_start = i;
+ state = State.FloatWidth;
+ },
+ else => @compileError("Unexpected character in format string: " ++ []u8{c}),
+ },
+ State.FloatWidth => switch (c) {
+ '}' => {
+ width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10);
+ %return formatFloatDecimal(args[next_arg], width, context, output);
+ next_arg += 1;
+ state = State.Start;
+ start_index = i + 1;
+ },
+ '0' ... '9' => {},
+ else => @compileError("Unexpected character in format string: " ++ []u8{c}),
+ },
State.BufWidth => switch (c) {
'}' => {
width = comptime %%parseUnsigned(usize, fmt[width_start..i], 10);
@@ -267,6 +294,47 @@ pub fn formatFloat(value: var, context: var, output: fn(@typeOf(context), []cons
}
}
+pub fn formatFloatDecimal(value: var, precision: usize, context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void {
+ var x = f64(value);
+
+ // Errol doesn't handle these special cases.
+ if (math.isNan(x)) {
+ return output(context, "NaN");
+ }
+ if (math.signbit(x)) {
+ %return output(context, "-");
+ x = -x;
+ }
+ if (math.isPositiveInf(x)) {
+ return output(context, "Infinity");
+ }
+ if (x == 0.0) {
+ return output(context, "0.0");
+ }
+
+ var buffer: [32]u8 = undefined;
+ const float_decimal = errol3(x, buffer[0..]);
+
+ const num_left_digits = if (float_decimal.exp > 0) usize(float_decimal.exp) else 1;
+
+ %return output(context, float_decimal.digits[0 .. num_left_digits]);
+ %return output(context, ".");
+ if (float_decimal.digits.len > 1) {
+ const num_valid_digtis = if (@typeOf(value) == f32) math.min(usize(7), float_decimal.digits.len)
+ else
+ float_decimal.digits.len;
+
+ const num_right_digits = if (precision != 0)
+ math.min(precision, (num_valid_digtis-num_left_digits))
+ else
+ num_valid_digtis - num_left_digits;
+ %return output(context, float_decimal.digits[num_left_digits .. (num_left_digits + num_right_digits)]);
+ } else {
+ %return output(context, "0");
+ }
+}
+
+
pub fn formatInt(value: var, base: u8, uppercase: bool, width: usize,
context: var, output: fn(@typeOf(context), []const u8)->%void) -> %void
{
@@ -540,6 +608,39 @@ test "fmt.format" {
const result = %%bufPrint(buf1[0..], "f64: {}\n", -math.inf_f64);
assert(mem.eql(u8, result, "f64: -Infinity\n"));
}
+ {
+ var buf1: [32]u8 = undefined;
+ const value: f32 = 1.1234;
+ const result = %%bufPrint(buf1[0..], "f32: {.1}\n", value);
+ assert(mem.eql(u8, result, "f32: 1.1\n"));
+ }
+ {
+ var buf1: [32]u8 = undefined;
+ const value: f32 = 1234.567;
+ const result = %%bufPrint(buf1[0..], "f32: {.2}\n", value);
+ assert(mem.eql(u8, result, "f32: 1234.56\n"));
+ }
+ {
+ var buf1: [32]u8 = undefined;
+ const value: f32 = -11.1234;
+ const result = %%bufPrint(buf1[0..], "f32: {.4}\n", value);
+ // -11.1234 is converted to f64 -11.12339... internally (errol3() function takes f64).
+ // -11.12339... is truncated to -11.1233
+ assert(mem.eql(u8, result, "f32: -11.1233\n"));
+ }
+ {
+ var buf1: [32]u8 = undefined;
+ const value: f32 = 91.12345;
+ const result = %%bufPrint(buf1[0..], "f32: {.}\n", value);
+ assert(mem.eql(u8, result, "f32: 91.12345\n"));
+ }
+ {
+ var buf1: [32]u8 = undefined;
+ const value: f64 = 91.12345678901235;
+ const result = %%bufPrint(buf1[0..], "f64: {.10}\n", value);
+ assert(mem.eql(u8, result, "f64: 91.1234567890\n"));
+ }
+
}
}
diff --git a/std/index.zig b/std/index.zig
index 07da469b5e..a9a0038e60 100644
--- a/std/index.zig
+++ b/std/index.zig
@@ -25,6 +25,7 @@ pub const net = @import("net.zig");
pub const os = @import("os/index.zig");
pub const rand = @import("rand.zig");
pub const sort = @import("sort.zig");
+pub const unicode = @import("unicode.zig");
test "std" {
// run tests from these
@@ -53,4 +54,5 @@ test "std" {
_ = @import("os/index.zig");
_ = @import("rand.zig");
_ = @import("sort.zig");
+ _ = @import("unicode.zig");
}
diff --git a/std/io.zig b/std/io.zig
index cbf2e0c216..44e5634ae0 100644
--- a/std/io.zig
+++ b/std/io.zig
@@ -500,11 +500,16 @@ pub fn writeFile(path: []const u8, data: []const u8, allocator: ?&mem.Allocator)
/// On success, caller owns returned buffer.
pub fn readFileAlloc(path: []const u8, allocator: &mem.Allocator) -> %[]u8 {
+ return readFileAllocExtra(path, allocator, 0);
+}
+/// On success, caller owns returned buffer.
+/// Allocates extra_len extra bytes at the end of the file buffer, which are uninitialized.
+pub fn readFileAllocExtra(path: []const u8, allocator: &mem.Allocator, extra_len: usize) -> %[]u8 {
var file = %return File.openRead(path, allocator);
defer file.close();
const size = %return file.getEndPos();
- const buf = %return allocator.alloc(u8, size);
+ const buf = %return allocator.alloc(u8, size + extra_len);
%defer allocator.free(buf);
var adapter = FileInStream.init(&file);
diff --git a/std/math/acos.zig b/std/math/acos.zig
index 478d5a846d..8adce39bff 100644
--- a/std/math/acos.zig
+++ b/std/math/acos.zig
@@ -39,7 +39,7 @@ fn acos32(x: f32) -> f32 {
if (hx >> 31 != 0) {
return 2.0 * pio2_hi + 0x1.0p-120;
} else {
- return 0;
+ return 0.0;
}
} else {
return math.nan(f32);
diff --git a/std/unicode.zig b/std/unicode.zig
new file mode 100644
index 0000000000..6c06eeb73a
--- /dev/null
+++ b/std/unicode.zig
@@ -0,0 +1,169 @@
+const std = @import("./index.zig");
+
+error Utf8InvalidStartByte;
+
+/// Given the first byte of a UTF-8 codepoint,
+/// returns a number 1-4 indicating the total length of the codepoint in bytes.
+/// If this byte does not match the form of a UTF-8 start byte, returns Utf8InvalidStartByte.
+pub fn utf8ByteSequenceLength(first_byte: u8) -> %u3 {
+ if (first_byte < 0b10000000) return u3(1);
+ if (first_byte & 0b11100000 == 0b11000000) return u3(2);
+ if (first_byte & 0b11110000 == 0b11100000) return u3(3);
+ if (first_byte & 0b11111000 == 0b11110000) return u3(4);
+ return error.Utf8InvalidStartByte;
+}
+
+error Utf8OverlongEncoding;
+error Utf8ExpectedContinuation;
+error Utf8EncodesSurrogateHalf;
+error Utf8CodepointTooLarge;
+
+/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
+/// bytes.len must be equal to %%utf8ByteSequenceLength(bytes[0]).
+/// If you already know the length at comptime, you can call one of
+/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
+pub fn utf8Decode(bytes: []const u8) -> %u32 {
+ return switch (bytes.len) {
+ 1 => u32(bytes[0]),
+ 2 => utf8Decode2(bytes),
+ 3 => utf8Decode3(bytes),
+ 4 => utf8Decode4(bytes),
+ else => unreachable,
+ };
+}
+pub fn utf8Decode2(bytes: []const u8) -> %u32 {
+ std.debug.assert(bytes.len == 2);
+ std.debug.assert(bytes[0] & 0b11100000 == 0b11000000);
+ var value: u32 = bytes[0] & 0b00011111;
+
+ if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+ value <<= 6;
+ value |= bytes[1] & 0b00111111;
+
+ if (value < 0x80) return error.Utf8OverlongEncoding;
+
+ return value;
+}
+pub fn utf8Decode3(bytes: []const u8) -> %u32 {
+ std.debug.assert(bytes.len == 3);
+ std.debug.assert(bytes[0] & 0b11110000 == 0b11100000);
+ var value: u32 = bytes[0] & 0b00001111;
+
+ if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+ value <<= 6;
+ value |= bytes[1] & 0b00111111;
+
+ if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+ value <<= 6;
+ value |= bytes[2] & 0b00111111;
+
+ if (value < 0x800) return error.Utf8OverlongEncoding;
+ if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
+
+ return value;
+}
+pub fn utf8Decode4(bytes: []const u8) -> %u32 {
+ std.debug.assert(bytes.len == 4);
+ std.debug.assert(bytes[0] & 0b11111000 == 0b11110000);
+ var value: u32 = bytes[0] & 0b00000111;
+
+ if (bytes[1] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+ value <<= 6;
+ value |= bytes[1] & 0b00111111;
+
+ if (bytes[2] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+ value <<= 6;
+ value |= bytes[2] & 0b00111111;
+
+ if (bytes[3] & 0b11000000 != 0b10000000) return error.Utf8ExpectedContinuation;
+ value <<= 6;
+ value |= bytes[3] & 0b00111111;
+
+ if (value < 0x10000) return error.Utf8OverlongEncoding;
+ if (value > 0x10FFFF) return error.Utf8CodepointTooLarge;
+
+ return value;
+}
+
+error UnexpectedEof;
+test "valid utf8" {
+ testValid("\x00", 0x0);
+ testValid("\x20", 0x20);
+ testValid("\x7f", 0x7f);
+ testValid("\xc2\x80", 0x80);
+ testValid("\xdf\xbf", 0x7ff);
+ testValid("\xe0\xa0\x80", 0x800);
+ testValid("\xe1\x80\x80", 0x1000);
+ testValid("\xef\xbf\xbf", 0xffff);
+ testValid("\xf0\x90\x80\x80", 0x10000);
+ testValid("\xf1\x80\x80\x80", 0x40000);
+ testValid("\xf3\xbf\xbf\xbf", 0xfffff);
+ testValid("\xf4\x8f\xbf\xbf", 0x10ffff);
+}
+
+test "invalid utf8 continuation bytes" {
+ // unexpected continuation
+ testError("\x80", error.Utf8InvalidStartByte);
+ testError("\xbf", error.Utf8InvalidStartByte);
+ // too many leading 1's
+ testError("\xf8", error.Utf8InvalidStartByte);
+ testError("\xff", error.Utf8InvalidStartByte);
+ // expected continuation for 2 byte sequences
+ testError("\xc2", error.UnexpectedEof);
+ testError("\xc2\x00", error.Utf8ExpectedContinuation);
+ testError("\xc2\xc0", error.Utf8ExpectedContinuation);
+ // expected continuation for 3 byte sequences
+ testError("\xe0", error.UnexpectedEof);
+ testError("\xe0\x00", error.UnexpectedEof);
+ testError("\xe0\xc0", error.UnexpectedEof);
+ testError("\xe0\xa0", error.UnexpectedEof);
+ testError("\xe0\xa0\x00", error.Utf8ExpectedContinuation);
+ testError("\xe0\xa0\xc0", error.Utf8ExpectedContinuation);
+ // expected continuation for 4 byte sequences
+ testError("\xf0", error.UnexpectedEof);
+ testError("\xf0\x00", error.UnexpectedEof);
+ testError("\xf0\xc0", error.UnexpectedEof);
+ testError("\xf0\x90\x00", error.UnexpectedEof);
+ testError("\xf0\x90\xc0", error.UnexpectedEof);
+ testError("\xf0\x90\x80\x00", error.Utf8ExpectedContinuation);
+ testError("\xf0\x90\x80\xc0", error.Utf8ExpectedContinuation);
+}
+
+test "overlong utf8 codepoint" {
+ testError("\xc0\x80", error.Utf8OverlongEncoding);
+ testError("\xc1\xbf", error.Utf8OverlongEncoding);
+ testError("\xe0\x80\x80", error.Utf8OverlongEncoding);
+ testError("\xe0\x9f\xbf", error.Utf8OverlongEncoding);
+ testError("\xf0\x80\x80\x80", error.Utf8OverlongEncoding);
+ testError("\xf0\x8f\xbf\xbf", error.Utf8OverlongEncoding);
+}
+
+test "misc invalid utf8" {
+ // codepoint out of bounds
+ testError("\xf4\x90\x80\x80", error.Utf8CodepointTooLarge);
+ testError("\xf7\xbf\xbf\xbf", error.Utf8CodepointTooLarge);
+ // surrogate halves
+ testValid("\xed\x9f\xbf", 0xd7ff);
+ testError("\xed\xa0\x80", error.Utf8EncodesSurrogateHalf);
+ testError("\xed\xbf\xbf", error.Utf8EncodesSurrogateHalf);
+ testValid("\xee\x80\x80", 0xe000);
+}
+
+fn testError(bytes: []const u8, expected_err: error) {
+ if (testDecode(bytes)) |_| {
+ unreachable;
+ } else |err| {
+ std.debug.assert(err == expected_err);
+ }
+}
+
+fn testValid(bytes: []const u8, expected_codepoint: u32) {
+ std.debug.assert(%%testDecode(bytes) == expected_codepoint);
+}
+
+fn testDecode(bytes: []const u8) -> %u32 {
+ const length = %return utf8ByteSequenceLength(bytes[0]);
+ if (bytes.len < length) return error.UnexpectedEof;
+ std.debug.assert(bytes.len == length);
+ return utf8Decode(bytes);
+}