diff options
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/Build.zig | 13 | ||||
| -rw-r--r-- | lib/std/Build/Step/Compile.zig | 8 | ||||
| -rw-r--r-- | lib/std/crypto/aes.zig | 2 | ||||
| -rw-r--r-- | lib/std/crypto/blake3.zig | 2 | ||||
| -rw-r--r-- | lib/std/crypto/salsa20.zig | 5 | ||||
| -rw-r--r-- | lib/std/crypto/sha2.zig | 2 | ||||
| -rw-r--r-- | lib/std/http/Client.zig | 13 | ||||
| -rw-r--r-- | lib/std/http/HeaderIterator.zig | 16 | ||||
| -rw-r--r-- | lib/std/http/Server.zig | 28 | ||||
| -rw-r--r-- | lib/std/http/test.zig | 135 | ||||
| -rw-r--r-- | lib/std/mem.zig | 21 | ||||
| -rw-r--r-- | lib/std/meta.zig | 3 | ||||
| -rw-r--r-- | lib/std/unicode.zig | 299 | ||||
| -rw-r--r-- | lib/std/zig/c_translation.zig | 8 |
14 files changed, 364 insertions, 191 deletions
diff --git a/lib/std/Build.zig b/lib/std/Build.zig index ca00a7d15a..3892b9ca73 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -855,7 +855,9 @@ pub const TestOptions = struct { optimize: std.builtin.OptimizeMode = .Debug, version: ?std.SemanticVersion = null, max_rss: usize = 0, + /// deprecated: use `.filters = &.{filter}` instead of `.filter = filter`. filter: ?[]const u8 = null, + filters: []const []const u8 = &.{}, test_runner: ?[]const u8 = null, link_libc: ?bool = null, single_threaded: ?bool = null, @@ -888,7 +890,12 @@ pub fn addTest(b: *Build, options: TestOptions) *Step.Compile { .error_tracing = options.error_tracing, }, .max_rss = options.max_rss, - .filter = options.filter, + .filters = if (options.filter != null and options.filters.len > 0) filters: { + const filters = b.allocator.alloc([]const u8, 1 + options.filters.len) catch @panic("OOM"); + filters[0] = b.dupe(options.filter.?); + for (filters[1..], options.filters) |*dest, source| dest.* = b.dupe(source); + break :filters filters; + } else b.dupeStrings(if (options.filter) |filter| &.{filter} else options.filters), .test_runner = options.test_runner, .use_llvm = options.use_llvm, .use_lld = options.use_lld, @@ -993,9 +1000,7 @@ pub fn dupe(self: *Build, bytes: []const u8) []u8 { /// Duplicates an array of strings without the need to handle out of memory. pub fn dupeStrings(self: *Build, strings: []const []const u8) [][]u8 { const array = self.allocator.alloc([]u8, strings.len) catch @panic("OOM"); - for (strings, 0..) |s, i| { - array[i] = self.dupe(s); - } + for (array, strings) |*dest, source| dest.* = self.dupe(source); return array; } diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig index 51b5b2e52a..5ee92ffc22 100644 --- a/lib/std/Build/Step/Compile.zig +++ b/lib/std/Build/Step/Compile.zig @@ -54,7 +54,7 @@ global_base: ?u64 = null, /// Set via options; intended to be read-only after that. zig_lib_dir: ?LazyPath, exec_cmd_args: ?[]const ?[]const u8, -filter: ?[]const u8, +filters: []const []const u8, test_runner: ?[]const u8, test_server_mode: bool, wasi_exec_model: ?std.builtin.WasiExecModel = null, @@ -223,7 +223,7 @@ pub const Options = struct { linkage: ?Linkage = null, version: ?std.SemanticVersion = null, max_rss: usize = 0, - filter: ?[]const u8 = null, + filters: []const []const u8 = &.{}, test_runner: ?[]const u8 = null, use_llvm: ?bool = null, use_lld: ?bool = null, @@ -310,7 +310,7 @@ pub fn create(owner: *std.Build, options: Options) *Compile { .installed_headers = ArrayList(*Step).init(owner.allocator), .zig_lib_dir = null, .exec_cmd_args = null, - .filter = options.filter, + .filters = options.filters, .test_runner = options.test_runner, .test_server_mode = options.test_runner == null, .rdynamic = false, @@ -1297,7 +1297,7 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void { try zig_args.append(b.fmt("0x{x}", .{image_base})); } - if (self.filter) |filter| { + for (self.filters) |filter| { try zig_args.append("--test-filter"); try zig_args.append(filter); } diff --git a/lib/std/crypto/aes.zig b/lib/std/crypto/aes.zig index f5752888fc..5e5ae04b58 100644 --- a/lib/std/crypto/aes.zig +++ b/lib/std/crypto/aes.zig @@ -6,7 +6,7 @@ const has_aesni = std.Target.x86.featureSetHas(builtin.cpu.features, .aes); const has_avx = std.Target.x86.featureSetHas(builtin.cpu.features, .avx); const has_armaes = std.Target.aarch64.featureSetHas(builtin.cpu.features, .aes); // C backend doesn't currently support passing vectors to inline asm. -const impl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and has_aesni and has_avx) impl: { +const impl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_c and has_aesni and has_avx) impl: { break :impl @import("aes/aesni.zig"); } else if (builtin.cpu.arch == .aarch64 and builtin.zig_backend != .stage2_c and has_armaes) impl: { diff --git a/lib/std/crypto/blake3.zig b/lib/std/crypto/blake3.zig index d87211fb1e..585c338417 100644 --- a/lib/std/crypto/blake3.zig +++ b/lib/std/crypto/blake3.zig @@ -200,7 +200,7 @@ const CompressGeneric = struct { } }; -const compress = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64) +const compress = if (builtin.cpu.arch == .x86_64) CompressVectorized.compress else CompressGeneric.compress; diff --git a/lib/std/crypto/salsa20.zig b/lib/std/crypto/salsa20.zig index 7f4c1b0157..c791c6b773 100644 --- a/lib/std/crypto/salsa20.zig +++ b/lib/std/crypto/salsa20.zig @@ -302,7 +302,10 @@ fn SalsaNonVecImpl(comptime rounds: comptime_int) type { }; } -const SalsaImpl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64) SalsaVecImpl else SalsaNonVecImpl; +const SalsaImpl = if (builtin.cpu.arch == .x86_64) + SalsaVecImpl +else + SalsaNonVecImpl; fn keyToWords(key: [32]u8) [8]u32 { var k: [8]u32 = undefined; diff --git a/lib/std/crypto/sha2.zig b/lib/std/crypto/sha2.zig index 10909cfaec..31884c7381 100644 --- a/lib/std/crypto/sha2.zig +++ b/lib/std/crypto/sha2.zig @@ -238,7 +238,7 @@ fn Sha2x32(comptime params: Sha2Params32) type { return; }, // C backend doesn't currently support passing vectors to inline asm. - .x86_64 => if (builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) { + .x86_64 => if (builtin.zig_backend != .stage2_c and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) { var x: v4u32 = [_]u32{ d.s[5], d.s[4], d.s[1], d.s[0] }; var y: v4u32 = [_]u32{ d.s[7], d.s[6], d.s[3], d.s[2] }; const s_v = @as(*[16]v4u32, @ptrCast(&s)); diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig index 5f580bd53e..1ffe1e8ea3 100644 --- a/lib/std/http/Client.zig +++ b/lib/std/http/Client.zig @@ -488,7 +488,7 @@ pub const Response = struct { var line_it = mem.splitSequence(u8, line, ": "); const header_name = line_it.next().?; const header_value = line_it.rest(); - if (header_value.len == 0) return error.HttpHeadersInvalid; + if (header_name.len == 0) return error.HttpHeadersInvalid; if (std.ascii.eqlIgnoreCase(header_name, "connection")) { res.keep_alive = !std.ascii.eqlIgnoreCase(header_value, "close"); @@ -774,7 +774,7 @@ pub const Request = struct { } for (req.extra_headers) |header| { - assert(header.value.len != 0); + assert(header.name.len != 0); try w.writeAll(header.name); try w.writeAll(": "); @@ -857,9 +857,12 @@ pub const Request = struct { /// Must be called after `send` and, if any data was written to the request /// body, then also after `finish`. pub fn wait(req: *Request) WaitError!void { - const connection = req.connection.?; + while (true) { + // This while loop is for handling redirects, which means the request's + // connection may be different than the previous iteration. However, it + // is still guaranteed to be non-null with each iteration of this loop. + const connection = req.connection.?; - while (true) { // handle redirects while (true) { // read headers try connection.fill(); @@ -1515,11 +1518,13 @@ pub fn open( ) RequestError!Request { if (std.debug.runtime_safety) { for (options.extra_headers) |header| { + assert(header.name.len != 0); assert(std.mem.indexOfScalar(u8, header.name, ':') == null); assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null); assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null); } for (options.privileged_headers) |header| { + assert(header.name.len != 0); assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null); assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null); } diff --git a/lib/std/http/HeaderIterator.zig b/lib/std/http/HeaderIterator.zig index 8d36374f8c..515058859d 100644 --- a/lib/std/http/HeaderIterator.zig +++ b/lib/std/http/HeaderIterator.zig @@ -15,7 +15,7 @@ pub fn next(it: *HeaderIterator) ?std.http.Header { var kv_it = std.mem.splitSequence(u8, it.bytes[it.index..end], ": "); const name = kv_it.next().?; const value = kv_it.rest(); - if (value.len == 0) { + if (name.len == 0 and value.len == 0) { if (it.is_trailer) return null; const next_end = std.mem.indexOfPosLinear(u8, it.bytes, end + 2, "\r\n") orelse return null; @@ -35,7 +35,7 @@ pub fn next(it: *HeaderIterator) ?std.http.Header { } test next { - var it = HeaderIterator.init("200 OK\r\na: b\r\nc: d\r\n\r\ne: f\r\n\r\n"); + var it = HeaderIterator.init("200 OK\r\na: b\r\nc: \r\nd: e\r\n\r\nf: g\r\n\r\n"); try std.testing.expect(!it.is_trailer); { const header = it.next().?; @@ -47,13 +47,19 @@ test next { const header = it.next().?; try std.testing.expect(!it.is_trailer); try std.testing.expectEqualStrings("c", header.name); - try std.testing.expectEqualStrings("d", header.value); + try std.testing.expectEqualStrings("", header.value); + } + { + const header = it.next().?; + try std.testing.expect(!it.is_trailer); + try std.testing.expectEqualStrings("d", header.name); + try std.testing.expectEqualStrings("e", header.value); } { const header = it.next().?; try std.testing.expect(it.is_trailer); - try std.testing.expectEqualStrings("e", header.name); - try std.testing.expectEqualStrings("f", header.value); + try std.testing.expectEqualStrings("f", header.name); + try std.testing.expectEqualStrings("g", header.value); } try std.testing.expectEqual(null, it.next()); } diff --git a/lib/std/http/Server.zig b/lib/std/http/Server.zig index 2d360d40a4..0454fa739e 100644 --- a/lib/std/http/Server.zig +++ b/lib/std/http/Server.zig @@ -211,7 +211,7 @@ pub const Request = struct { var line_it = mem.splitSequence(u8, line, ": "); const header_name = line_it.next().?; const header_value = line_it.rest(); - if (header_value.len == 0) return error.HttpHeadersInvalid; + if (header_name.len == 0) return error.HttpHeadersInvalid; if (std.ascii.eqlIgnoreCase(header_name, "connection")) { head.keep_alive = !std.ascii.eqlIgnoreCase(header_value, "close"); @@ -311,6 +311,7 @@ pub const Request = struct { assert(options.extra_headers.len <= max_extra_headers); if (std.debug.runtime_safety) { for (options.extra_headers) |header| { + assert(header.name.len != 0); assert(std.mem.indexOfScalar(u8, header.name, ':') == null); assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null); assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null); @@ -370,11 +371,13 @@ pub const Request = struct { }; iovecs_len += 1; - iovecs[iovecs_len] = .{ - .iov_base = header.value.ptr, - .iov_len = header.value.len, - }; - iovecs_len += 1; + if (header.value.len != 0) { + iovecs[iovecs_len] = .{ + .iov_base = header.value.ptr, + .iov_len = header.value.len, + }; + iovecs_len += 1; + } iovecs[iovecs_len] = .{ .iov_base = "\r\n", @@ -496,6 +499,7 @@ pub const Request = struct { } for (o.extra_headers) |header| { + assert(header.name.len != 0); h.appendSliceAssumeCapacity(header.name); h.appendSliceAssumeCapacity(": "); h.appendSliceAssumeCapacity(header.value); @@ -986,11 +990,13 @@ pub const Response = struct { }; iovecs_len += 1; - iovecs[iovecs_len] = .{ - .iov_base = trailer.value.ptr, - .iov_len = trailer.value.len, - }; - iovecs_len += 1; + if (trailer.value.len != 0) { + iovecs[iovecs_len] = .{ + .iov_base = trailer.value.ptr, + .iov_len = trailer.value.len, + }; + iovecs_len += 1; + } iovecs[iovecs_len] = .{ .iov_base = "\r\n", diff --git a/lib/std/http/test.zig b/lib/std/http/test.zig index cfcfa5e5ac..ea766a8c20 100644 --- a/lib/std/http/test.zig +++ b/lib/std/http/test.zig @@ -490,6 +490,12 @@ test "general client/server API coverage" { .{ .name = "location", .value = location }, }, }); + } else if (mem.eql(u8, request.head.target, "/empty")) { + try request.respond("", .{ + .extra_headers = &.{ + .{ .name = "empty", .value = "" }, + }, + }); } else { try request.respond("", .{ .status = .not_found }); } @@ -502,7 +508,10 @@ test "general client/server API coverage" { return s.listen_address.in.getPort(); } }); - defer test_server.destroy(); + defer { + global.handle_new_requests = false; + test_server.destroy(); + } const log = std.log.scoped(.client); @@ -665,6 +674,56 @@ test "general client/server API coverage" { // connection has been closed try expect(client.connection_pool.free_len == 0); + { // handle empty header field value + const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/empty", .{port}); + defer gpa.free(location); + const uri = try std.Uri.parse(location); + + log.info("{s}", .{location}); + var server_header_buffer: [1024]u8 = undefined; + var req = try client.open(.GET, uri, .{ + .server_header_buffer = &server_header_buffer, + .extra_headers = &.{ + .{ .name = "empty", .value = "" }, + }, + }); + defer req.deinit(); + + try req.send(.{}); + try req.wait(); + + try std.testing.expectEqual(.ok, req.response.status); + + const body = try req.reader().readAllAlloc(gpa, 8192); + defer gpa.free(body); + + try expectEqualStrings("", body); + + var it = req.response.iterateHeaders(); + { + const header = it.next().?; + try expect(!it.is_trailer); + try expectEqualStrings("connection", header.name); + try expectEqualStrings("keep-alive", header.value); + } + { + const header = it.next().?; + try expect(!it.is_trailer); + try expectEqualStrings("content-length", header.name); + try expectEqualStrings("0", header.value); + } + { + const header = it.next().?; + try expect(!it.is_trailer); + try expectEqualStrings("empty", header.name); + try expectEqualStrings("", header.value); + } + try expectEqual(null, it.next()); + } + + // connection has been kept alive + try expect(client.http_proxy != null or client.connection_pool.free_len == 1); + { // relative redirect const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/1", .{port}); defer gpa.free(location); @@ -1004,3 +1063,77 @@ fn createTestServer(S: type) !*TestServer { test_server.server_thread = try std.Thread.spawn(.{}, S.run, .{&test_server.net_server}); return test_server; } + +test "redirect to different connection" { + const test_server_new = try createTestServer(struct { + fn run(net_server: *std.net.Server) anyerror!void { + var header_buffer: [888]u8 = undefined; + + const conn = try net_server.accept(); + defer conn.stream.close(); + + var server = http.Server.init(conn, &header_buffer); + var request = try server.receiveHead(); + try expectEqualStrings(request.head.target, "/ok"); + try request.respond("good job, you pass", .{}); + } + }); + defer test_server_new.destroy(); + + const global = struct { + var other_port: ?u16 = null; + }; + global.other_port = test_server_new.port(); + + const test_server_orig = try createTestServer(struct { + fn run(net_server: *std.net.Server) anyerror!void { + var header_buffer: [999]u8 = undefined; + var send_buffer: [100]u8 = undefined; + + const conn = try net_server.accept(); + defer conn.stream.close(); + + const new_loc = try std.fmt.bufPrint(&send_buffer, "http://127.0.0.1:{d}/ok", .{ + global.other_port.?, + }); + + var server = http.Server.init(conn, &header_buffer); + var request = try server.receiveHead(); + try expectEqualStrings(request.head.target, "/help"); + try request.respond("", .{ + .status = .found, + .extra_headers = &.{ + .{ .name = "location", .value = new_loc }, + }, + }); + } + }); + defer test_server_orig.destroy(); + + const gpa = std.testing.allocator; + + var client: http.Client = .{ .allocator = gpa }; + defer client.deinit(); + + var loc_buf: [100]u8 = undefined; + const location = try std.fmt.bufPrint(&loc_buf, "http://127.0.0.1:{d}/help", .{ + test_server_orig.port(), + }); + const uri = try std.Uri.parse(location); + + { + var server_header_buffer: [666]u8 = undefined; + var req = try client.open(.GET, uri, .{ + .server_header_buffer = &server_header_buffer, + }); + defer req.deinit(); + + try req.send(.{}); + try req.wait(); + + const body = try req.reader().readAllAlloc(gpa, 8192); + defer gpa.free(body); + + try expectEqualStrings("good job, you pass", body); + } +} diff --git a/lib/std/mem.zig b/lib/std/mem.zig index f263b3e851..fc0c226894 100644 --- a/lib/std/mem.zig +++ b/lib/std/mem.zig @@ -1346,6 +1346,7 @@ pub fn lastIndexOfLinear(comptime T: type, haystack: []const T, needle: []const /// Consider using `indexOfPos` instead of this, which will automatically use a /// more sophisticated algorithm on larger inputs. pub fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize { + if (needle.len > haystack.len) return null; var i: usize = start_index; const end = haystack.len - needle.len; while (i <= end) : (i += 1) { @@ -1354,6 +1355,26 @@ pub fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usiz return null; } +test indexOfPosLinear { + try testing.expectEqual(0, indexOfPosLinear(u8, "", 0, "")); + try testing.expectEqual(0, indexOfPosLinear(u8, "123", 0, "")); + + try testing.expectEqual(null, indexOfPosLinear(u8, "", 0, "1")); + try testing.expectEqual(0, indexOfPosLinear(u8, "1", 0, "1")); + try testing.expectEqual(null, indexOfPosLinear(u8, "2", 0, "1")); + try testing.expectEqual(1, indexOfPosLinear(u8, "21", 0, "1")); + try testing.expectEqual(null, indexOfPosLinear(u8, "222", 0, "1")); + + try testing.expectEqual(null, indexOfPosLinear(u8, "", 0, "12")); + try testing.expectEqual(null, indexOfPosLinear(u8, "1", 0, "12")); + try testing.expectEqual(null, indexOfPosLinear(u8, "2", 0, "12")); + try testing.expectEqual(0, indexOfPosLinear(u8, "12", 0, "12")); + try testing.expectEqual(null, indexOfPosLinear(u8, "21", 0, "12")); + try testing.expectEqual(1, indexOfPosLinear(u8, "212", 0, "12")); + try testing.expectEqual(0, indexOfPosLinear(u8, "122", 0, "12")); + try testing.expectEqual(1, indexOfPosLinear(u8, "212112", 0, "12")); +} + fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: *[256]usize) void { for (table) |*c| { c.* = pattern.len; diff --git a/lib/std/meta.zig b/lib/std/meta.zig index e7dd4e5652..17df0650f3 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -1286,5 +1286,6 @@ test "hasUniqueRepresentation" { try testing.expect(!hasUniqueRepresentation([]u8)); try testing.expect(!hasUniqueRepresentation([]const u8)); - try testing.expect(hasUniqueRepresentation(@Vector(4, u16))); + try testing.expect(hasUniqueRepresentation(@Vector(std.simd.suggestVectorLength(u8) orelse 1, u8))); + try testing.expect(@sizeOf(@Vector(3, u8)) == 3 or !hasUniqueRepresentation(@Vector(3, u8))); } diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index 224b0b3801..b2067c4f8f 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -239,18 +239,19 @@ pub fn utf8ValidateSlice(input: []const u8) bool { fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) bool { var remaining = input; - const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; - const Chunk = @Vector(chunk_len, u8); - - // Fast path. Check for and skip ASCII characters at the start of the input. - while (remaining.len >= chunk_len) { - const chunk: Chunk = remaining[0..chunk_len].*; - const mask: Chunk = @splat(0x80); - if (@reduce(.Or, chunk & mask == mask)) { - // found a non ASCII byte - break; + if (std.simd.suggestVectorLength(u8)) |chunk_len| { + const Chunk = @Vector(chunk_len, u8); + + // Fast path. Check for and skip ASCII characters at the start of the input. + while (remaining.len >= chunk_len) { + const chunk: Chunk = remaining[0..chunk_len].*; + const mask: Chunk = @splat(0x80); + if (@reduce(.Or, chunk & mask == mask)) { + // found a non ASCII byte + break; + } + remaining = remaining[chunk_len..]; } - remaining = remaining[chunk_len..]; } // default lowest and highest continuation byte @@ -601,9 +602,9 @@ fn testUtf8IteratorOnAscii() !void { const s = Utf8View.initComptime("abc"); var it1 = s.iterator(); - try testing.expect(std.mem.eql(u8, "a", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "b", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "c", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "a", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "b", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "c", it1.nextCodepointSlice().?)); try testing.expect(it1.nextCodepointSlice() == null); var it2 = s.iterator(); @@ -631,9 +632,9 @@ fn testUtf8ViewOk() !void { const s = Utf8View.initComptime("東京市"); var it1 = s.iterator(); - try testing.expect(std.mem.eql(u8, "東", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "京", it1.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "市", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "東", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "京", it1.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "市", it1.nextCodepointSlice().?)); try testing.expect(it1.nextCodepointSlice() == null); var it2 = s.iterator(); @@ -771,20 +772,20 @@ fn testUtf8Peeking() !void { const s = Utf8View.initComptime("noël"); var it = s.iterator(); - try testing.expect(std.mem.eql(u8, "n", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "n", it.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "o", it.peek(1))); - try testing.expect(std.mem.eql(u8, "oë", it.peek(2))); - try testing.expect(std.mem.eql(u8, "oël", it.peek(3))); - try testing.expect(std.mem.eql(u8, "oël", it.peek(4))); - try testing.expect(std.mem.eql(u8, "oël", it.peek(10))); + try testing.expect(mem.eql(u8, "o", it.peek(1))); + try testing.expect(mem.eql(u8, "oë", it.peek(2))); + try testing.expect(mem.eql(u8, "oël", it.peek(3))); + try testing.expect(mem.eql(u8, "oël", it.peek(4))); + try testing.expect(mem.eql(u8, "oël", it.peek(10))); - try testing.expect(std.mem.eql(u8, "o", it.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "ë", it.nextCodepointSlice().?)); - try testing.expect(std.mem.eql(u8, "l", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "o", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "ë", it.nextCodepointSlice().?)); + try testing.expect(mem.eql(u8, "l", it.nextCodepointSlice().?)); try testing.expect(it.nextCodepointSlice() == null); - try testing.expect(std.mem.eql(u8, &[_]u8{}, it.peek(1))); + try testing.expect(mem.eql(u8, &[_]u8{}, it.peek(1))); } fn testError(bytes: []const u8, expected_err: anyerror) !void { @@ -926,59 +927,50 @@ test "fmtUtf8" { } fn utf16LeToUtf8ArrayListImpl( - array_list: *std.ArrayList(u8), + result: *std.ArrayList(u8), utf16le: []const u16, comptime surrogates: Surrogates, ) (switch (surrogates) { .cannot_encode_surrogate_half => Utf16LeToUtf8AllocError, .can_encode_surrogate_half => mem.Allocator.Error, })!void { - // optimistically guess that it will all be ascii. - try array_list.ensureTotalCapacityPrecise(utf16le.len); + assert(result.capacity >= utf16le.len); var remaining = utf16le; - if (builtin.zig_backend != .stage2_x86_64) { - const chunk_len = std.simd.suggestVectorLength(u16) orelse 1; + vectorized: { + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u16); // Fast path. Check for and encode ASCII characters at the start of the input. while (remaining.len >= chunk_len) { const chunk: Chunk = remaining[0..chunk_len].*; - const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F)); + const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F)); if (@reduce(.Or, chunk | mask != mask)) { // found a non ASCII code unit break; } - const chunk_byte_len = chunk_len * 2; - const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*; - const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes); - const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0]; + const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk)); // We allocated enough space to encode every UTF-16 code unit // as ASCII, so if the entire string is ASCII then we are // guaranteed to have enough space allocated - array_list.appendSliceAssumeCapacity(&ascii_bytes); + result.addManyAsArrayAssumeCapacity(chunk_len).* = ascii_chunk; remaining = remaining[chunk_len..]; } } - var out_index: usize = array_list.items.len; switch (surrogates) { .cannot_encode_surrogate_half => { var it = Utf16LeIterator.init(remaining); while (try it.nextCodepoint()) |codepoint| { const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable; - try array_list.resize(array_list.items.len + utf8_len); - assert((utf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len); - out_index += utf8_len; + assert((utf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len); } }, .can_encode_surrogate_half => { var it = Wtf16LeIterator.init(remaining); while (it.nextCodepoint()) |codepoint| { const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable; - try array_list.resize(array_list.items.len + utf8_len); - assert((wtf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len); - out_index += utf8_len; + assert((wtf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len); } }, } @@ -986,8 +978,9 @@ fn utf16LeToUtf8ArrayListImpl( pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error; -pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void { - return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .cannot_encode_surrogate_half); +pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void { + try result.ensureTotalCapacityPrecise(utf16le.len); + return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half); } /// Deprecated; renamed to utf16LeToUtf8Alloc @@ -999,8 +992,7 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16L var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len); errdefer result.deinit(); - try utf16LeToUtf8ArrayList(&result, utf16le); - + try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half); return result.toOwnedSlice(); } @@ -1013,8 +1005,7 @@ pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16 var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1); errdefer result.deinit(); - try utf16LeToUtf8ArrayList(&result, utf16le); - + try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half); return result.toOwnedSliceSentinel(0); } @@ -1026,27 +1017,24 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr .cannot_encode_surrogate_half => Utf16LeToUtf8Error, .can_encode_surrogate_half => error{}, })!usize { - var end_index: usize = 0; + var dest_index: usize = 0; var remaining = utf16le; - if (builtin.zig_backend != .stage2_x86_64) { - const chunk_len = std.simd.suggestVectorLength(u16) orelse 1; + vectorized: { + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u16); // Fast path. Check for and encode ASCII characters at the start of the input. while (remaining.len >= chunk_len) { const chunk: Chunk = remaining[0..chunk_len].*; - const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F)); + const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F)); if (@reduce(.Or, chunk | mask != mask)) { // found a non ASCII code unit break; } - const chunk_byte_len = chunk_len * 2; - const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*; - const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes); - const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0]; - @memcpy(utf8[end_index .. end_index + chunk_len], &ascii_bytes); - end_index += chunk_len; + const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk)); + utf8[dest_index..][0..chunk_len].* = ascii_chunk; + dest_index += chunk_len; remaining = remaining[chunk_len..]; } } @@ -1055,7 +1043,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr .cannot_encode_surrogate_half => { var it = Utf16LeIterator.init(remaining); while (try it.nextCodepoint()) |codepoint| { - end_index += utf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) { + dest_index += utf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) { // The maximum possible codepoint encoded by UTF-16 is U+10FFFF, // which is within the valid codepoint range. error.CodepointTooLarge => unreachable, @@ -1068,7 +1056,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr .can_encode_surrogate_half => { var it = Wtf16LeIterator.init(remaining); while (it.nextCodepoint()) |codepoint| { - end_index += wtf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) { + dest_index += wtf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) { // The maximum possible codepoint encoded by UTF-16 is U+10FFFF, // which is within the valid codepoint range. error.CodepointTooLarge => unreachable, @@ -1076,7 +1064,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr } }, } - return end_index; + return dest_index; } /// Deprecated; renamed to utf16LeToUtf8 @@ -1149,14 +1137,12 @@ test utf16LeToUtf8 { } } -fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void { - // optimistically guess that it will not require surrogate pairs - try array_list.ensureTotalCapacityPrecise(utf8.len); +fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void { + assert(result.capacity >= utf8.len); var remaining = utf8; - // Need support for std.simd.interlace - if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) { - const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; + vectorized: { + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -1167,9 +1153,8 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, // found a non ASCII code unit break; } - const zeroes: Chunk = @splat(0); - const utf16_chunk: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes }); - array_list.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk)); + const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk); + result.addManyAsArrayAssumeCapacity(chunk_len).* = utf16_chunk; remaining = remaining[chunk_len..]; } } @@ -1181,21 +1166,18 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, var it = view.iterator(); while (it.nextCodepoint()) |codepoint| { if (codepoint < 0x10000) { - const short = @as(u16, @intCast(codepoint)); - try array_list.append(mem.nativeToLittle(u16, short)); + try result.append(mem.nativeToLittle(u16, @intCast(codepoint))); } else { const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; - var out: [2]u16 = undefined; - out[0] = mem.nativeToLittle(u16, high); - out[1] = mem.nativeToLittle(u16, low); - try array_list.appendSlice(out[0..]); + try result.appendSlice(&.{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) }); } } } -pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void { - return utf8ToUtf16LeArrayListImpl(array_list, utf8, .cannot_encode_surrogate_half); +pub fn utf8ToUtf16LeArrayList(result: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void { + try result.ensureTotalCapacityPrecise(utf8.len); + return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half); } pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 { @@ -1204,7 +1186,6 @@ pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ Inv errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half); - return result.toOwnedSlice(); } @@ -1217,7 +1198,6 @@ pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ In errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half); - return result.toOwnedSliceSentinel(0); } @@ -1228,12 +1208,11 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize } pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize { - var dest_i: usize = 0; + var dest_index: usize = 0; var remaining = utf8; - // Need support for std.simd.interlace - if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) { - const chunk_len = std.simd.suggestVectorLength(u8) orelse 1; + vectorized: { + const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized; const Chunk = @Vector(chunk_len, u8); // Fast path. Check for and encode ASCII characters at the start of the input. @@ -1244,57 +1223,60 @@ pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: // found a non ASCII code unit break; } - const zeroes: Chunk = @splat(0); - const utf16_bytes: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes }); - @memcpy(utf16le[dest_i..][0..chunk_len], std.mem.bytesAsSlice(u16, &utf16_bytes)); - dest_i += chunk_len; + const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk); + utf16le[dest_index..][0..chunk_len].* = utf16_chunk; + dest_index += chunk_len; remaining = remaining[chunk_len..]; } } - var src_i: usize = 0; - while (src_i < remaining.len) { - const n = utf8ByteSequenceLength(remaining[src_i]) catch return switch (surrogates) { - .cannot_encode_surrogate_half => error.InvalidUtf8, - .can_encode_surrogate_half => error.InvalidWtf8, - }; - const next_src_i = src_i + n; - const codepoint = switch (surrogates) { - .cannot_encode_surrogate_half => utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8, - .can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidWtf8, - }; + const view = switch (surrogates) { + .cannot_encode_surrogate_half => try Utf8View.init(remaining), + .can_encode_surrogate_half => try Wtf8View.init(remaining), + }; + var it = view.iterator(); + while (it.nextCodepoint()) |codepoint| { if (codepoint < 0x10000) { - const short = @as(u16, @intCast(codepoint)); - utf16le[dest_i] = mem.nativeToLittle(u16, short); - dest_i += 1; + utf16le[dest_index] = mem.nativeToLittle(u16, @intCast(codepoint)); + dest_index += 1; } else { const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800; const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00; - utf16le[dest_i] = mem.nativeToLittle(u16, high); - utf16le[dest_i + 1] = mem.nativeToLittle(u16, low); - dest_i += 2; + utf16le[dest_index..][0..2].* = .{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) }; + dest_index += 2; } - src_i = next_src_i; } - return dest_i; + return dest_index; } test "utf8ToUtf16Le" { - var utf16le: [2]u16 = [_]u16{0} ** 2; + var utf16le: [128]u16 = undefined; { const length = try utf8ToUtf16Le(utf16le[0..], "𐐷"); - try testing.expectEqual(@as(usize, 2), length); - try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..])); + try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..length])); } { const length = try utf8ToUtf16Le(utf16le[0..], "\u{10FFFF}"); - try testing.expectEqual(@as(usize, 2), length); - try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..])); + try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..length])); } { const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80"); try testing.expectError(error.InvalidUtf8, result); } + { + const length = try utf8ToUtf16Le(utf16le[0..], "This string has been designed to test the vectorized implementat" ++ + "ion by beginning with one hundred twenty-seven ASCII characters¡"); + try testing.expectEqualSlices(u8, &.{ + 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0, + 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0, + ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0, + 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0, + 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0, + ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0, + 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0, + 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0, + }, mem.sliceAsBytes(utf16le[0..length])); + } } test utf8ToUtf16LeArrayList { @@ -1339,25 +1321,40 @@ test utf8ToUtf16LeAllocZ { { const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "𐐷"); defer testing.allocator.free(utf16); - try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..])); + try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16)); try testing.expect(utf16[2] == 0); } { const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "\u{10FFFF}"); defer testing.allocator.free(utf16); - try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..])); + try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16)); try testing.expect(utf16[2] == 0); } { const result = utf8ToUtf16LeAllocZ(testing.allocator, "\xf4\x90\x80\x80"); try testing.expectError(error.InvalidUtf8, result); } + { + const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "This string has been designed to test the vectorized implementat" ++ + "ion by beginning with one hundred twenty-seven ASCII characters¡"); + defer testing.allocator.free(utf16); + try testing.expectEqualSlices(u8, &.{ + 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0, + 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0, + ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0, + 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0, + 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0, + ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0, + 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0, + 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0, + }, mem.sliceAsBytes(utf16)); + } } /// Converts a UTF-8 string literal into a UTF-16LE string literal. -pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch unreachable:0]u16 { +pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch |err| @compileError(err):0]u16 { return comptime blk: { - const len: usize = calcUtf16LeLen(utf8) catch |err| @compileError(err); + const len: usize = calcUtf16LeLen(utf8) catch unreachable; var utf16le: [len:0]u16 = [_:0]u16{0} ** len; const utf16le_len = utf8ToUtf16Le(&utf16le, utf8[0..]) catch |err| @compileError(err); assert(len == utf16le_len); @@ -1438,12 +1435,12 @@ test "fmtUtf16Le" { try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))}); try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))}); try expectFmt("𐐷", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("𐐷"))}); - try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})}); - try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})}); - try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})}); + try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})}); + try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})}); + try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})}); } test "utf8ToUtf16LeStringLiteral" { @@ -1686,8 +1683,9 @@ pub const Wtf8Iterator = struct { } }; -pub fn wtf16LeToWtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void { - return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .can_encode_surrogate_half); +pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void { + try result.ensureTotalCapacityPrecise(utf16le.len); + return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half); } /// Caller must free returned memory. @@ -1696,8 +1694,7 @@ pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Al var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len); errdefer result.deinit(); - try wtf16LeToWtf8ArrayList(&result, wtf16le); - + try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half); return result.toOwnedSlice(); } @@ -1707,8 +1704,7 @@ pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.A var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1); errdefer result.deinit(); - try wtf16LeToWtf8ArrayList(&result, wtf16le); - + try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half); return result.toOwnedSliceSentinel(0); } @@ -1716,8 +1712,9 @@ pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize { return utf16LeToUtf8Impl(wtf8, wtf16le, .can_encode_surrogate_half) catch |err| switch (err) {}; } -pub fn wtf8ToWtf16LeArrayList(array_list: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void { - return utf8ToUtf16LeArrayListImpl(array_list, wtf8, .can_encode_surrogate_half); +pub fn wtf8ToWtf16LeArrayList(result: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void { + try result.ensureTotalCapacityPrecise(wtf8.len); + return utf8ToUtf16LeArrayListImpl(result, wtf8, .can_encode_surrogate_half); } pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 { @@ -1726,7 +1723,6 @@ pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ Inv errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half); - return result.toOwnedSlice(); } @@ -1736,7 +1732,6 @@ pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ In errdefer result.deinit(); try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half); - return result.toOwnedSliceSentinel(0); } @@ -1895,7 +1890,7 @@ pub const Wtf16LeIterator = struct { pub fn init(s: []const u16) Wtf16LeIterator { return Wtf16LeIterator{ - .bytes = std.mem.sliceAsBytes(s), + .bytes = mem.sliceAsBytes(s), .i = 0, }; } @@ -1908,12 +1903,12 @@ pub const Wtf16LeIterator = struct { assert(it.i <= it.bytes.len); if (it.i == it.bytes.len) return null; var code_units: [2]u16 = undefined; - code_units[0] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little); + code_units[0] = mem.readInt(u16, it.bytes[it.i..][0..2], .little); it.i += 2; surrogate_pair: { if (utf16IsHighSurrogate(code_units[0])) { if (it.i >= it.bytes.len) break :surrogate_pair; - code_units[1] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little); + code_units[1] = mem.readInt(u16, it.bytes[it.i..][0..2], .little); const codepoint = utf16DecodeSurrogatePair(&code_units) catch break :surrogate_pair; it.i += 2; return codepoint; @@ -2030,31 +2025,31 @@ fn testRoundtripWtf16(wtf16le: []const u16) !void { test "well-formed WTF-16 roundtrips" { try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD83D), // high surrogate - std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate + mem.nativeToLittle(u16, 0xD83D), // high surrogate + mem.nativeToLittle(u16, 0xDCA9), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD83D), // high surrogate - std.mem.nativeToLittle(u16, ' '), // not surrogate - std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate + mem.nativeToLittle(u16, 0xD83D), // high surrogate + mem.nativeToLittle(u16, ' '), // not surrogate + mem.nativeToLittle(u16, 0xDCA9), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD800), // high surrogate - std.mem.nativeToLittle(u16, 0xDBFF), // high surrogate + mem.nativeToLittle(u16, 0xD800), // high surrogate + mem.nativeToLittle(u16, 0xDBFF), // high surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD800), // high surrogate - std.mem.nativeToLittle(u16, 0xE000), // not surrogate + mem.nativeToLittle(u16, 0xD800), // high surrogate + mem.nativeToLittle(u16, 0xE000), // not surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xD7FF), // not surrogate - std.mem.nativeToLittle(u16, 0xDC00), // low surrogate + mem.nativeToLittle(u16, 0xD7FF), // not surrogate + mem.nativeToLittle(u16, 0xDC00), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0x61), // not surrogate - std.mem.nativeToLittle(u16, 0xDC00), // low surrogate + mem.nativeToLittle(u16, 0x61), // not surrogate + mem.nativeToLittle(u16, 0xDC00), // low surrogate }); try testRoundtripWtf16(&[_]u16{ - std.mem.nativeToLittle(u16, 0xDC00), // low surrogate + mem.nativeToLittle(u16, 0xDC00), // low surrogate }); } diff --git a/lib/std/zig/c_translation.zig b/lib/std/zig/c_translation.zig index dfa888e94b..337149e97d 100644 --- a/lib/std/zig/c_translation.zig +++ b/lib/std/zig/c_translation.zig @@ -308,14 +308,12 @@ test "promoteIntLiteral" { /// Convert from clang __builtin_shufflevector index to Zig @shuffle index /// clang requires __builtin_shufflevector index arguments to be integer constants. -/// negative values for `this_index` indicate "don't care" so we arbitrarily choose 0 +/// negative values for `this_index` indicate "don't care". /// clang enforces that `this_index` is less than the total number of vector elements /// See https://ziglang.org/documentation/master/#shuffle /// See https://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len: usize) i32 { - if (this_index <= 0) return 0; - - const positive_index = @as(usize, @intCast(this_index)); + const positive_index = std.math.cast(usize, this_index) orelse return undefined; if (positive_index < source_vector_len) return @as(i32, @intCast(this_index)); const b_index = positive_index - source_vector_len; return ~@as(i32, @intCast(b_index)); @@ -324,7 +322,7 @@ pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len test "shuffleVectorIndex" { const vector_len: usize = 4; - try testing.expect(shuffleVectorIndex(-1, vector_len) == 0); + _ = shuffleVectorIndex(-1, vector_len); try testing.expect(shuffleVectorIndex(0, vector_len) == 0); try testing.expect(shuffleVectorIndex(1, vector_len) == 1); |
