aboutsummaryrefslogtreecommitdiff
path: root/lib/std
diff options
context:
space:
mode:
Diffstat (limited to 'lib/std')
-rw-r--r--lib/std/Build.zig13
-rw-r--r--lib/std/Build/Step/Compile.zig8
-rw-r--r--lib/std/crypto/aes.zig2
-rw-r--r--lib/std/crypto/blake3.zig2
-rw-r--r--lib/std/crypto/salsa20.zig5
-rw-r--r--lib/std/crypto/sha2.zig2
-rw-r--r--lib/std/http/Client.zig13
-rw-r--r--lib/std/http/HeaderIterator.zig16
-rw-r--r--lib/std/http/Server.zig28
-rw-r--r--lib/std/http/test.zig135
-rw-r--r--lib/std/mem.zig21
-rw-r--r--lib/std/meta.zig3
-rw-r--r--lib/std/unicode.zig299
-rw-r--r--lib/std/zig/c_translation.zig8
14 files changed, 364 insertions, 191 deletions
diff --git a/lib/std/Build.zig b/lib/std/Build.zig
index ca00a7d15a..3892b9ca73 100644
--- a/lib/std/Build.zig
+++ b/lib/std/Build.zig
@@ -855,7 +855,9 @@ pub const TestOptions = struct {
optimize: std.builtin.OptimizeMode = .Debug,
version: ?std.SemanticVersion = null,
max_rss: usize = 0,
+ /// deprecated: use `.filters = &.{filter}` instead of `.filter = filter`.
filter: ?[]const u8 = null,
+ filters: []const []const u8 = &.{},
test_runner: ?[]const u8 = null,
link_libc: ?bool = null,
single_threaded: ?bool = null,
@@ -888,7 +890,12 @@ pub fn addTest(b: *Build, options: TestOptions) *Step.Compile {
.error_tracing = options.error_tracing,
},
.max_rss = options.max_rss,
- .filter = options.filter,
+ .filters = if (options.filter != null and options.filters.len > 0) filters: {
+ const filters = b.allocator.alloc([]const u8, 1 + options.filters.len) catch @panic("OOM");
+ filters[0] = b.dupe(options.filter.?);
+ for (filters[1..], options.filters) |*dest, source| dest.* = b.dupe(source);
+ break :filters filters;
+ } else b.dupeStrings(if (options.filter) |filter| &.{filter} else options.filters),
.test_runner = options.test_runner,
.use_llvm = options.use_llvm,
.use_lld = options.use_lld,
@@ -993,9 +1000,7 @@ pub fn dupe(self: *Build, bytes: []const u8) []u8 {
/// Duplicates an array of strings without the need to handle out of memory.
pub fn dupeStrings(self: *Build, strings: []const []const u8) [][]u8 {
const array = self.allocator.alloc([]u8, strings.len) catch @panic("OOM");
- for (strings, 0..) |s, i| {
- array[i] = self.dupe(s);
- }
+ for (array, strings) |*dest, source| dest.* = self.dupe(source);
return array;
}
diff --git a/lib/std/Build/Step/Compile.zig b/lib/std/Build/Step/Compile.zig
index 51b5b2e52a..5ee92ffc22 100644
--- a/lib/std/Build/Step/Compile.zig
+++ b/lib/std/Build/Step/Compile.zig
@@ -54,7 +54,7 @@ global_base: ?u64 = null,
/// Set via options; intended to be read-only after that.
zig_lib_dir: ?LazyPath,
exec_cmd_args: ?[]const ?[]const u8,
-filter: ?[]const u8,
+filters: []const []const u8,
test_runner: ?[]const u8,
test_server_mode: bool,
wasi_exec_model: ?std.builtin.WasiExecModel = null,
@@ -223,7 +223,7 @@ pub const Options = struct {
linkage: ?Linkage = null,
version: ?std.SemanticVersion = null,
max_rss: usize = 0,
- filter: ?[]const u8 = null,
+ filters: []const []const u8 = &.{},
test_runner: ?[]const u8 = null,
use_llvm: ?bool = null,
use_lld: ?bool = null,
@@ -310,7 +310,7 @@ pub fn create(owner: *std.Build, options: Options) *Compile {
.installed_headers = ArrayList(*Step).init(owner.allocator),
.zig_lib_dir = null,
.exec_cmd_args = null,
- .filter = options.filter,
+ .filters = options.filters,
.test_runner = options.test_runner,
.test_server_mode = options.test_runner == null,
.rdynamic = false,
@@ -1297,7 +1297,7 @@ fn make(step: *Step, prog_node: *std.Progress.Node) !void {
try zig_args.append(b.fmt("0x{x}", .{image_base}));
}
- if (self.filter) |filter| {
+ for (self.filters) |filter| {
try zig_args.append("--test-filter");
try zig_args.append(filter);
}
diff --git a/lib/std/crypto/aes.zig b/lib/std/crypto/aes.zig
index f5752888fc..5e5ae04b58 100644
--- a/lib/std/crypto/aes.zig
+++ b/lib/std/crypto/aes.zig
@@ -6,7 +6,7 @@ const has_aesni = std.Target.x86.featureSetHas(builtin.cpu.features, .aes);
const has_avx = std.Target.x86.featureSetHas(builtin.cpu.features, .avx);
const has_armaes = std.Target.aarch64.featureSetHas(builtin.cpu.features, .aes);
// C backend doesn't currently support passing vectors to inline asm.
-const impl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and has_aesni and has_avx) impl: {
+const impl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_c and has_aesni and has_avx) impl: {
break :impl @import("aes/aesni.zig");
} else if (builtin.cpu.arch == .aarch64 and builtin.zig_backend != .stage2_c and has_armaes)
impl: {
diff --git a/lib/std/crypto/blake3.zig b/lib/std/crypto/blake3.zig
index d87211fb1e..585c338417 100644
--- a/lib/std/crypto/blake3.zig
+++ b/lib/std/crypto/blake3.zig
@@ -200,7 +200,7 @@ const CompressGeneric = struct {
}
};
-const compress = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64)
+const compress = if (builtin.cpu.arch == .x86_64)
CompressVectorized.compress
else
CompressGeneric.compress;
diff --git a/lib/std/crypto/salsa20.zig b/lib/std/crypto/salsa20.zig
index 7f4c1b0157..c791c6b773 100644
--- a/lib/std/crypto/salsa20.zig
+++ b/lib/std/crypto/salsa20.zig
@@ -302,7 +302,10 @@ fn SalsaNonVecImpl(comptime rounds: comptime_int) type {
};
}
-const SalsaImpl = if (builtin.cpu.arch == .x86_64 and builtin.zig_backend != .stage2_x86_64) SalsaVecImpl else SalsaNonVecImpl;
+const SalsaImpl = if (builtin.cpu.arch == .x86_64)
+ SalsaVecImpl
+else
+ SalsaNonVecImpl;
fn keyToWords(key: [32]u8) [8]u32 {
var k: [8]u32 = undefined;
diff --git a/lib/std/crypto/sha2.zig b/lib/std/crypto/sha2.zig
index 10909cfaec..31884c7381 100644
--- a/lib/std/crypto/sha2.zig
+++ b/lib/std/crypto/sha2.zig
@@ -238,7 +238,7 @@ fn Sha2x32(comptime params: Sha2Params32) type {
return;
},
// C backend doesn't currently support passing vectors to inline asm.
- .x86_64 => if (builtin.zig_backend != .stage2_c and builtin.zig_backend != .stage2_x86_64 and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) {
+ .x86_64 => if (builtin.zig_backend != .stage2_c and comptime std.Target.x86.featureSetHasAll(builtin.cpu.features, .{ .sha, .avx2 })) {
var x: v4u32 = [_]u32{ d.s[5], d.s[4], d.s[1], d.s[0] };
var y: v4u32 = [_]u32{ d.s[7], d.s[6], d.s[3], d.s[2] };
const s_v = @as(*[16]v4u32, @ptrCast(&s));
diff --git a/lib/std/http/Client.zig b/lib/std/http/Client.zig
index 5f580bd53e..1ffe1e8ea3 100644
--- a/lib/std/http/Client.zig
+++ b/lib/std/http/Client.zig
@@ -488,7 +488,7 @@ pub const Response = struct {
var line_it = mem.splitSequence(u8, line, ": ");
const header_name = line_it.next().?;
const header_value = line_it.rest();
- if (header_value.len == 0) return error.HttpHeadersInvalid;
+ if (header_name.len == 0) return error.HttpHeadersInvalid;
if (std.ascii.eqlIgnoreCase(header_name, "connection")) {
res.keep_alive = !std.ascii.eqlIgnoreCase(header_value, "close");
@@ -774,7 +774,7 @@ pub const Request = struct {
}
for (req.extra_headers) |header| {
- assert(header.value.len != 0);
+ assert(header.name.len != 0);
try w.writeAll(header.name);
try w.writeAll(": ");
@@ -857,9 +857,12 @@ pub const Request = struct {
/// Must be called after `send` and, if any data was written to the request
/// body, then also after `finish`.
pub fn wait(req: *Request) WaitError!void {
- const connection = req.connection.?;
+ while (true) {
+ // This while loop is for handling redirects, which means the request's
+ // connection may be different than the previous iteration. However, it
+ // is still guaranteed to be non-null with each iteration of this loop.
+ const connection = req.connection.?;
- while (true) { // handle redirects
while (true) { // read headers
try connection.fill();
@@ -1515,11 +1518,13 @@ pub fn open(
) RequestError!Request {
if (std.debug.runtime_safety) {
for (options.extra_headers) |header| {
+ assert(header.name.len != 0);
assert(std.mem.indexOfScalar(u8, header.name, ':') == null);
assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null);
assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null);
}
for (options.privileged_headers) |header| {
+ assert(header.name.len != 0);
assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null);
assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null);
}
diff --git a/lib/std/http/HeaderIterator.zig b/lib/std/http/HeaderIterator.zig
index 8d36374f8c..515058859d 100644
--- a/lib/std/http/HeaderIterator.zig
+++ b/lib/std/http/HeaderIterator.zig
@@ -15,7 +15,7 @@ pub fn next(it: *HeaderIterator) ?std.http.Header {
var kv_it = std.mem.splitSequence(u8, it.bytes[it.index..end], ": ");
const name = kv_it.next().?;
const value = kv_it.rest();
- if (value.len == 0) {
+ if (name.len == 0 and value.len == 0) {
if (it.is_trailer) return null;
const next_end = std.mem.indexOfPosLinear(u8, it.bytes, end + 2, "\r\n") orelse
return null;
@@ -35,7 +35,7 @@ pub fn next(it: *HeaderIterator) ?std.http.Header {
}
test next {
- var it = HeaderIterator.init("200 OK\r\na: b\r\nc: d\r\n\r\ne: f\r\n\r\n");
+ var it = HeaderIterator.init("200 OK\r\na: b\r\nc: \r\nd: e\r\n\r\nf: g\r\n\r\n");
try std.testing.expect(!it.is_trailer);
{
const header = it.next().?;
@@ -47,13 +47,19 @@ test next {
const header = it.next().?;
try std.testing.expect(!it.is_trailer);
try std.testing.expectEqualStrings("c", header.name);
- try std.testing.expectEqualStrings("d", header.value);
+ try std.testing.expectEqualStrings("", header.value);
+ }
+ {
+ const header = it.next().?;
+ try std.testing.expect(!it.is_trailer);
+ try std.testing.expectEqualStrings("d", header.name);
+ try std.testing.expectEqualStrings("e", header.value);
}
{
const header = it.next().?;
try std.testing.expect(it.is_trailer);
- try std.testing.expectEqualStrings("e", header.name);
- try std.testing.expectEqualStrings("f", header.value);
+ try std.testing.expectEqualStrings("f", header.name);
+ try std.testing.expectEqualStrings("g", header.value);
}
try std.testing.expectEqual(null, it.next());
}
diff --git a/lib/std/http/Server.zig b/lib/std/http/Server.zig
index 2d360d40a4..0454fa739e 100644
--- a/lib/std/http/Server.zig
+++ b/lib/std/http/Server.zig
@@ -211,7 +211,7 @@ pub const Request = struct {
var line_it = mem.splitSequence(u8, line, ": ");
const header_name = line_it.next().?;
const header_value = line_it.rest();
- if (header_value.len == 0) return error.HttpHeadersInvalid;
+ if (header_name.len == 0) return error.HttpHeadersInvalid;
if (std.ascii.eqlIgnoreCase(header_name, "connection")) {
head.keep_alive = !std.ascii.eqlIgnoreCase(header_value, "close");
@@ -311,6 +311,7 @@ pub const Request = struct {
assert(options.extra_headers.len <= max_extra_headers);
if (std.debug.runtime_safety) {
for (options.extra_headers) |header| {
+ assert(header.name.len != 0);
assert(std.mem.indexOfScalar(u8, header.name, ':') == null);
assert(std.mem.indexOfPosLinear(u8, header.name, 0, "\r\n") == null);
assert(std.mem.indexOfPosLinear(u8, header.value, 0, "\r\n") == null);
@@ -370,11 +371,13 @@ pub const Request = struct {
};
iovecs_len += 1;
- iovecs[iovecs_len] = .{
- .iov_base = header.value.ptr,
- .iov_len = header.value.len,
- };
- iovecs_len += 1;
+ if (header.value.len != 0) {
+ iovecs[iovecs_len] = .{
+ .iov_base = header.value.ptr,
+ .iov_len = header.value.len,
+ };
+ iovecs_len += 1;
+ }
iovecs[iovecs_len] = .{
.iov_base = "\r\n",
@@ -496,6 +499,7 @@ pub const Request = struct {
}
for (o.extra_headers) |header| {
+ assert(header.name.len != 0);
h.appendSliceAssumeCapacity(header.name);
h.appendSliceAssumeCapacity(": ");
h.appendSliceAssumeCapacity(header.value);
@@ -986,11 +990,13 @@ pub const Response = struct {
};
iovecs_len += 1;
- iovecs[iovecs_len] = .{
- .iov_base = trailer.value.ptr,
- .iov_len = trailer.value.len,
- };
- iovecs_len += 1;
+ if (trailer.value.len != 0) {
+ iovecs[iovecs_len] = .{
+ .iov_base = trailer.value.ptr,
+ .iov_len = trailer.value.len,
+ };
+ iovecs_len += 1;
+ }
iovecs[iovecs_len] = .{
.iov_base = "\r\n",
diff --git a/lib/std/http/test.zig b/lib/std/http/test.zig
index cfcfa5e5ac..ea766a8c20 100644
--- a/lib/std/http/test.zig
+++ b/lib/std/http/test.zig
@@ -490,6 +490,12 @@ test "general client/server API coverage" {
.{ .name = "location", .value = location },
},
});
+ } else if (mem.eql(u8, request.head.target, "/empty")) {
+ try request.respond("", .{
+ .extra_headers = &.{
+ .{ .name = "empty", .value = "" },
+ },
+ });
} else {
try request.respond("", .{ .status = .not_found });
}
@@ -502,7 +508,10 @@ test "general client/server API coverage" {
return s.listen_address.in.getPort();
}
});
- defer test_server.destroy();
+ defer {
+ global.handle_new_requests = false;
+ test_server.destroy();
+ }
const log = std.log.scoped(.client);
@@ -665,6 +674,56 @@ test "general client/server API coverage" {
// connection has been closed
try expect(client.connection_pool.free_len == 0);
+ { // handle empty header field value
+ const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/empty", .{port});
+ defer gpa.free(location);
+ const uri = try std.Uri.parse(location);
+
+ log.info("{s}", .{location});
+ var server_header_buffer: [1024]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ .extra_headers = &.{
+ .{ .name = "empty", .value = "" },
+ },
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ try std.testing.expectEqual(.ok, req.response.status);
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("", body);
+
+ var it = req.response.iterateHeaders();
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("connection", header.name);
+ try expectEqualStrings("keep-alive", header.value);
+ }
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("content-length", header.name);
+ try expectEqualStrings("0", header.value);
+ }
+ {
+ const header = it.next().?;
+ try expect(!it.is_trailer);
+ try expectEqualStrings("empty", header.name);
+ try expectEqualStrings("", header.value);
+ }
+ try expectEqual(null, it.next());
+ }
+
+ // connection has been kept alive
+ try expect(client.http_proxy != null or client.connection_pool.free_len == 1);
+
{ // relative redirect
const location = try std.fmt.allocPrint(gpa, "http://127.0.0.1:{d}/redirect/1", .{port});
defer gpa.free(location);
@@ -1004,3 +1063,77 @@ fn createTestServer(S: type) !*TestServer {
test_server.server_thread = try std.Thread.spawn(.{}, S.run, .{&test_server.net_server});
return test_server;
}
+
+test "redirect to different connection" {
+ const test_server_new = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var header_buffer: [888]u8 = undefined;
+
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ var server = http.Server.init(conn, &header_buffer);
+ var request = try server.receiveHead();
+ try expectEqualStrings(request.head.target, "/ok");
+ try request.respond("good job, you pass", .{});
+ }
+ });
+ defer test_server_new.destroy();
+
+ const global = struct {
+ var other_port: ?u16 = null;
+ };
+ global.other_port = test_server_new.port();
+
+ const test_server_orig = try createTestServer(struct {
+ fn run(net_server: *std.net.Server) anyerror!void {
+ var header_buffer: [999]u8 = undefined;
+ var send_buffer: [100]u8 = undefined;
+
+ const conn = try net_server.accept();
+ defer conn.stream.close();
+
+ const new_loc = try std.fmt.bufPrint(&send_buffer, "http://127.0.0.1:{d}/ok", .{
+ global.other_port.?,
+ });
+
+ var server = http.Server.init(conn, &header_buffer);
+ var request = try server.receiveHead();
+ try expectEqualStrings(request.head.target, "/help");
+ try request.respond("", .{
+ .status = .found,
+ .extra_headers = &.{
+ .{ .name = "location", .value = new_loc },
+ },
+ });
+ }
+ });
+ defer test_server_orig.destroy();
+
+ const gpa = std.testing.allocator;
+
+ var client: http.Client = .{ .allocator = gpa };
+ defer client.deinit();
+
+ var loc_buf: [100]u8 = undefined;
+ const location = try std.fmt.bufPrint(&loc_buf, "http://127.0.0.1:{d}/help", .{
+ test_server_orig.port(),
+ });
+ const uri = try std.Uri.parse(location);
+
+ {
+ var server_header_buffer: [666]u8 = undefined;
+ var req = try client.open(.GET, uri, .{
+ .server_header_buffer = &server_header_buffer,
+ });
+ defer req.deinit();
+
+ try req.send(.{});
+ try req.wait();
+
+ const body = try req.reader().readAllAlloc(gpa, 8192);
+ defer gpa.free(body);
+
+ try expectEqualStrings("good job, you pass", body);
+ }
+}
diff --git a/lib/std/mem.zig b/lib/std/mem.zig
index f263b3e851..fc0c226894 100644
--- a/lib/std/mem.zig
+++ b/lib/std/mem.zig
@@ -1346,6 +1346,7 @@ pub fn lastIndexOfLinear(comptime T: type, haystack: []const T, needle: []const
/// Consider using `indexOfPos` instead of this, which will automatically use a
/// more sophisticated algorithm on larger inputs.
pub fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usize, needle: []const T) ?usize {
+ if (needle.len > haystack.len) return null;
var i: usize = start_index;
const end = haystack.len - needle.len;
while (i <= end) : (i += 1) {
@@ -1354,6 +1355,26 @@ pub fn indexOfPosLinear(comptime T: type, haystack: []const T, start_index: usiz
return null;
}
+test indexOfPosLinear {
+ try testing.expectEqual(0, indexOfPosLinear(u8, "", 0, ""));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "123", 0, ""));
+
+ try testing.expectEqual(null, indexOfPosLinear(u8, "", 0, "1"));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "1", 0, "1"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "2", 0, "1"));
+ try testing.expectEqual(1, indexOfPosLinear(u8, "21", 0, "1"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "222", 0, "1"));
+
+ try testing.expectEqual(null, indexOfPosLinear(u8, "", 0, "12"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "1", 0, "12"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "2", 0, "12"));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "12", 0, "12"));
+ try testing.expectEqual(null, indexOfPosLinear(u8, "21", 0, "12"));
+ try testing.expectEqual(1, indexOfPosLinear(u8, "212", 0, "12"));
+ try testing.expectEqual(0, indexOfPosLinear(u8, "122", 0, "12"));
+ try testing.expectEqual(1, indexOfPosLinear(u8, "212112", 0, "12"));
+}
+
fn boyerMooreHorspoolPreprocessReverse(pattern: []const u8, table: *[256]usize) void {
for (table) |*c| {
c.* = pattern.len;
diff --git a/lib/std/meta.zig b/lib/std/meta.zig
index e7dd4e5652..17df0650f3 100644
--- a/lib/std/meta.zig
+++ b/lib/std/meta.zig
@@ -1286,5 +1286,6 @@ test "hasUniqueRepresentation" {
try testing.expect(!hasUniqueRepresentation([]u8));
try testing.expect(!hasUniqueRepresentation([]const u8));
- try testing.expect(hasUniqueRepresentation(@Vector(4, u16)));
+ try testing.expect(hasUniqueRepresentation(@Vector(std.simd.suggestVectorLength(u8) orelse 1, u8)));
+ try testing.expect(@sizeOf(@Vector(3, u8)) == 3 or !hasUniqueRepresentation(@Vector(3, u8)));
}
diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig
index 224b0b3801..b2067c4f8f 100644
--- a/lib/std/unicode.zig
+++ b/lib/std/unicode.zig
@@ -239,18 +239,19 @@ pub fn utf8ValidateSlice(input: []const u8) bool {
fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) bool {
var remaining = input;
- const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
- const Chunk = @Vector(chunk_len, u8);
-
- // Fast path. Check for and skip ASCII characters at the start of the input.
- while (remaining.len >= chunk_len) {
- const chunk: Chunk = remaining[0..chunk_len].*;
- const mask: Chunk = @splat(0x80);
- if (@reduce(.Or, chunk & mask == mask)) {
- // found a non ASCII byte
- break;
+ if (std.simd.suggestVectorLength(u8)) |chunk_len| {
+ const Chunk = @Vector(chunk_len, u8);
+
+ // Fast path. Check for and skip ASCII characters at the start of the input.
+ while (remaining.len >= chunk_len) {
+ const chunk: Chunk = remaining[0..chunk_len].*;
+ const mask: Chunk = @splat(0x80);
+ if (@reduce(.Or, chunk & mask == mask)) {
+ // found a non ASCII byte
+ break;
+ }
+ remaining = remaining[chunk_len..];
}
- remaining = remaining[chunk_len..];
}
// default lowest and highest continuation byte
@@ -601,9 +602,9 @@ fn testUtf8IteratorOnAscii() !void {
const s = Utf8View.initComptime("abc");
var it1 = s.iterator();
- try testing.expect(std.mem.eql(u8, "a", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "b", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "c", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "a", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "b", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "c", it1.nextCodepointSlice().?));
try testing.expect(it1.nextCodepointSlice() == null);
var it2 = s.iterator();
@@ -631,9 +632,9 @@ fn testUtf8ViewOk() !void {
const s = Utf8View.initComptime("東京市");
var it1 = s.iterator();
- try testing.expect(std.mem.eql(u8, "東", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "京", it1.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "市", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "東", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "京", it1.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "市", it1.nextCodepointSlice().?));
try testing.expect(it1.nextCodepointSlice() == null);
var it2 = s.iterator();
@@ -771,20 +772,20 @@ fn testUtf8Peeking() !void {
const s = Utf8View.initComptime("noël");
var it = s.iterator();
- try testing.expect(std.mem.eql(u8, "n", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "n", it.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "o", it.peek(1)));
- try testing.expect(std.mem.eql(u8, "oë", it.peek(2)));
- try testing.expect(std.mem.eql(u8, "oël", it.peek(3)));
- try testing.expect(std.mem.eql(u8, "oël", it.peek(4)));
- try testing.expect(std.mem.eql(u8, "oël", it.peek(10)));
+ try testing.expect(mem.eql(u8, "o", it.peek(1)));
+ try testing.expect(mem.eql(u8, "oë", it.peek(2)));
+ try testing.expect(mem.eql(u8, "oël", it.peek(3)));
+ try testing.expect(mem.eql(u8, "oël", it.peek(4)));
+ try testing.expect(mem.eql(u8, "oël", it.peek(10)));
- try testing.expect(std.mem.eql(u8, "o", it.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "ë", it.nextCodepointSlice().?));
- try testing.expect(std.mem.eql(u8, "l", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "o", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "ë", it.nextCodepointSlice().?));
+ try testing.expect(mem.eql(u8, "l", it.nextCodepointSlice().?));
try testing.expect(it.nextCodepointSlice() == null);
- try testing.expect(std.mem.eql(u8, &[_]u8{}, it.peek(1)));
+ try testing.expect(mem.eql(u8, &[_]u8{}, it.peek(1)));
}
fn testError(bytes: []const u8, expected_err: anyerror) !void {
@@ -926,59 +927,50 @@ test "fmtUtf8" {
}
fn utf16LeToUtf8ArrayListImpl(
- array_list: *std.ArrayList(u8),
+ result: *std.ArrayList(u8),
utf16le: []const u16,
comptime surrogates: Surrogates,
) (switch (surrogates) {
.cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,
.can_encode_surrogate_half => mem.Allocator.Error,
})!void {
- // optimistically guess that it will all be ascii.
- try array_list.ensureTotalCapacityPrecise(utf16le.len);
+ assert(result.capacity >= utf16le.len);
var remaining = utf16le;
- if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u16);
// Fast path. Check for and encode ASCII characters at the start of the input.
while (remaining.len >= chunk_len) {
const chunk: Chunk = remaining[0..chunk_len].*;
- const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
+ const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F));
if (@reduce(.Or, chunk | mask != mask)) {
// found a non ASCII code unit
break;
}
- const chunk_byte_len = chunk_len * 2;
- const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
- const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
- const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
+ const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk));
// We allocated enough space to encode every UTF-16 code unit
// as ASCII, so if the entire string is ASCII then we are
// guaranteed to have enough space allocated
- array_list.appendSliceAssumeCapacity(&ascii_bytes);
+ result.addManyAsArrayAssumeCapacity(chunk_len).* = ascii_chunk;
remaining = remaining[chunk_len..];
}
}
- var out_index: usize = array_list.items.len;
switch (surrogates) {
.cannot_encode_surrogate_half => {
var it = Utf16LeIterator.init(remaining);
while (try it.nextCodepoint()) |codepoint| {
const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
- try array_list.resize(array_list.items.len + utf8_len);
- assert((utf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);
- out_index += utf8_len;
+ assert((utf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len);
}
},
.can_encode_surrogate_half => {
var it = Wtf16LeIterator.init(remaining);
while (it.nextCodepoint()) |codepoint| {
const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
- try array_list.resize(array_list.items.len + utf8_len);
- assert((wtf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);
- out_index += utf8_len;
+ assert((wtf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len);
}
},
}
@@ -986,8 +978,9 @@ fn utf16LeToUtf8ArrayListImpl(
pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;
-pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
- return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .cannot_encode_surrogate_half);
+pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
+ try result.ensureTotalCapacityPrecise(utf16le.len);
+ return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half);
}
/// Deprecated; renamed to utf16LeToUtf8Alloc
@@ -999,8 +992,7 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16L
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
errdefer result.deinit();
- try utf16LeToUtf8ArrayList(&result, utf16le);
-
+ try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
return result.toOwnedSlice();
}
@@ -1013,8 +1005,7 @@ pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);
errdefer result.deinit();
- try utf16LeToUtf8ArrayList(&result, utf16le);
-
+ try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);
return result.toOwnedSliceSentinel(0);
}
@@ -1026,27 +1017,24 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr
.cannot_encode_surrogate_half => Utf16LeToUtf8Error,
.can_encode_surrogate_half => error{},
})!usize {
- var end_index: usize = 0;
+ var dest_index: usize = 0;
var remaining = utf16le;
- if (builtin.zig_backend != .stage2_x86_64) {
- const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u16);
// Fast path. Check for and encode ASCII characters at the start of the input.
while (remaining.len >= chunk_len) {
const chunk: Chunk = remaining[0..chunk_len].*;
- const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
+ const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F));
if (@reduce(.Or, chunk | mask != mask)) {
// found a non ASCII code unit
break;
}
- const chunk_byte_len = chunk_len * 2;
- const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
- const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
- const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
- @memcpy(utf8[end_index .. end_index + chunk_len], &ascii_bytes);
- end_index += chunk_len;
+ const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk));
+ utf8[dest_index..][0..chunk_len].* = ascii_chunk;
+ dest_index += chunk_len;
remaining = remaining[chunk_len..];
}
}
@@ -1055,7 +1043,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr
.cannot_encode_surrogate_half => {
var it = Utf16LeIterator.init(remaining);
while (try it.nextCodepoint()) |codepoint| {
- end_index += utf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {
+ dest_index += utf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) {
// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,
// which is within the valid codepoint range.
error.CodepointTooLarge => unreachable,
@@ -1068,7 +1056,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr
.can_encode_surrogate_half => {
var it = Wtf16LeIterator.init(remaining);
while (it.nextCodepoint()) |codepoint| {
- end_index += wtf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {
+ dest_index += wtf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) {
// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,
// which is within the valid codepoint range.
error.CodepointTooLarge => unreachable,
@@ -1076,7 +1064,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr
}
},
}
- return end_index;
+ return dest_index;
}
/// Deprecated; renamed to utf16LeToUtf8
@@ -1149,14 +1137,12 @@ test utf16LeToUtf8 {
}
}
-fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {
- // optimistically guess that it will not require surrogate pairs
- try array_list.ensureTotalCapacityPrecise(utf8.len);
+fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {
+ assert(result.capacity >= utf8.len);
var remaining = utf8;
- // Need support for std.simd.interlace
- if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
- const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u8);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -1167,9 +1153,8 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8,
// found a non ASCII code unit
break;
}
- const zeroes: Chunk = @splat(0);
- const utf16_chunk: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });
- array_list.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk));
+ const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk);
+ result.addManyAsArrayAssumeCapacity(chunk_len).* = utf16_chunk;
remaining = remaining[chunk_len..];
}
}
@@ -1181,21 +1166,18 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8,
var it = view.iterator();
while (it.nextCodepoint()) |codepoint| {
if (codepoint < 0x10000) {
- const short = @as(u16, @intCast(codepoint));
- try array_list.append(mem.nativeToLittle(u16, short));
+ try result.append(mem.nativeToLittle(u16, @intCast(codepoint)));
} else {
const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;
const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;
- var out: [2]u16 = undefined;
- out[0] = mem.nativeToLittle(u16, high);
- out[1] = mem.nativeToLittle(u16, low);
- try array_list.appendSlice(out[0..]);
+ try result.appendSlice(&.{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) });
}
}
}
-pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {
- return utf8ToUtf16LeArrayListImpl(array_list, utf8, .cannot_encode_surrogate_half);
+pub fn utf8ToUtf16LeArrayList(result: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {
+ try result.ensureTotalCapacityPrecise(utf8.len);
+ return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half);
}
pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 {
@@ -1204,7 +1186,6 @@ pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ Inv
errdefer result.deinit();
try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);
-
return result.toOwnedSlice();
}
@@ -1217,7 +1198,6 @@ pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ In
errdefer result.deinit();
try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);
-
return result.toOwnedSliceSentinel(0);
}
@@ -1228,12 +1208,11 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize
}
pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize {
- var dest_i: usize = 0;
+ var dest_index: usize = 0;
var remaining = utf8;
- // Need support for std.simd.interlace
- if (builtin.zig_backend != .stage2_x86_64 and comptime !builtin.cpu.arch.isMIPS()) {
- const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
+ vectorized: {
+ const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;
const Chunk = @Vector(chunk_len, u8);
// Fast path. Check for and encode ASCII characters at the start of the input.
@@ -1244,57 +1223,60 @@ pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates:
// found a non ASCII code unit
break;
}
- const zeroes: Chunk = @splat(0);
- const utf16_bytes: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });
- @memcpy(utf16le[dest_i..][0..chunk_len], std.mem.bytesAsSlice(u16, &utf16_bytes));
- dest_i += chunk_len;
+ const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk);
+ utf16le[dest_index..][0..chunk_len].* = utf16_chunk;
+ dest_index += chunk_len;
remaining = remaining[chunk_len..];
}
}
- var src_i: usize = 0;
- while (src_i < remaining.len) {
- const n = utf8ByteSequenceLength(remaining[src_i]) catch return switch (surrogates) {
- .cannot_encode_surrogate_half => error.InvalidUtf8,
- .can_encode_surrogate_half => error.InvalidWtf8,
- };
- const next_src_i = src_i + n;
- const codepoint = switch (surrogates) {
- .cannot_encode_surrogate_half => utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8,
- .can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidWtf8,
- };
+ const view = switch (surrogates) {
+ .cannot_encode_surrogate_half => try Utf8View.init(remaining),
+ .can_encode_surrogate_half => try Wtf8View.init(remaining),
+ };
+ var it = view.iterator();
+ while (it.nextCodepoint()) |codepoint| {
if (codepoint < 0x10000) {
- const short = @as(u16, @intCast(codepoint));
- utf16le[dest_i] = mem.nativeToLittle(u16, short);
- dest_i += 1;
+ utf16le[dest_index] = mem.nativeToLittle(u16, @intCast(codepoint));
+ dest_index += 1;
} else {
const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;
const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;
- utf16le[dest_i] = mem.nativeToLittle(u16, high);
- utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);
- dest_i += 2;
+ utf16le[dest_index..][0..2].* = .{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) };
+ dest_index += 2;
}
- src_i = next_src_i;
}
- return dest_i;
+ return dest_index;
}
test "utf8ToUtf16Le" {
- var utf16le: [2]u16 = [_]u16{0} ** 2;
+ var utf16le: [128]u16 = undefined;
{
const length = try utf8ToUtf16Le(utf16le[0..], "𐐷");
- try testing.expectEqual(@as(usize, 2), length);
- try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..]));
+ try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..length]));
}
{
const length = try utf8ToUtf16Le(utf16le[0..], "\u{10FFFF}");
- try testing.expectEqual(@as(usize, 2), length);
- try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..]));
+ try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..length]));
}
{
const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80");
try testing.expectError(error.InvalidUtf8, result);
}
+ {
+ const length = try utf8ToUtf16Le(utf16le[0..], "This string has been designed to test the vectorized implementat" ++
+ "ion by beginning with one hundred twenty-seven ASCII characters¡");
+ try testing.expectEqualSlices(u8, &.{
+ 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0,
+ 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0,
+ ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0,
+ 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0,
+ 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0,
+ ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0,
+ 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0,
+ 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0,
+ }, mem.sliceAsBytes(utf16le[0..length]));
+ }
}
test utf8ToUtf16LeArrayList {
@@ -1339,25 +1321,40 @@ test utf8ToUtf16LeAllocZ {
{
const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "𐐷");
defer testing.allocator.free(utf16);
- try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..]));
+ try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16));
try testing.expect(utf16[2] == 0);
}
{
const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "\u{10FFFF}");
defer testing.allocator.free(utf16);
- try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));
+ try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16));
try testing.expect(utf16[2] == 0);
}
{
const result = utf8ToUtf16LeAllocZ(testing.allocator, "\xf4\x90\x80\x80");
try testing.expectError(error.InvalidUtf8, result);
}
+ {
+ const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "This string has been designed to test the vectorized implementat" ++
+ "ion by beginning with one hundred twenty-seven ASCII characters¡");
+ defer testing.allocator.free(utf16);
+ try testing.expectEqualSlices(u8, &.{
+ 'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0,
+ 'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0,
+ ' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0,
+ 'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0,
+ 'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0,
+ ' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0,
+ 'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0,
+ 'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0,
+ }, mem.sliceAsBytes(utf16));
+ }
}
/// Converts a UTF-8 string literal into a UTF-16LE string literal.
-pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch unreachable:0]u16 {
+pub fn utf8ToUtf16LeStringLiteral(comptime utf8: []const u8) *const [calcUtf16LeLen(utf8) catch |err| @compileError(err):0]u16 {
return comptime blk: {
- const len: usize = calcUtf16LeLen(utf8) catch |err| @compileError(err);
+ const len: usize = calcUtf16LeLen(utf8) catch unreachable;
var utf16le: [len:0]u16 = [_:0]u16{0} ** len;
const utf16le_len = utf8ToUtf16Le(&utf16le, utf8[0..]) catch |err| @compileError(err);
assert(len == utf16le_len);
@@ -1438,12 +1435,12 @@ test "fmtUtf16Le" {
try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});
try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});
try expectFmt("𐐷", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("𐐷"))});
- try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})});
- try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})});
- try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})});
+ try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xd7", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xd8", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdb", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xdc", native_endian)})});
+ try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\xff\xdf", native_endian)})});
+ try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{mem.readInt(u16, "\x00\xe0", native_endian)})});
}
test "utf8ToUtf16LeStringLiteral" {
@@ -1686,8 +1683,9 @@ pub const Wtf8Iterator = struct {
}
};
-pub fn wtf16LeToWtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {
- return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .can_encode_surrogate_half);
+pub fn wtf16LeToWtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {
+ try result.ensureTotalCapacityPrecise(utf16le.len);
+ return utf16LeToUtf8ArrayListImpl(result, utf16le, .can_encode_surrogate_half);
}
/// Caller must free returned memory.
@@ -1696,8 +1694,7 @@ pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Al
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len);
errdefer result.deinit();
- try wtf16LeToWtf8ArrayList(&result, wtf16le);
-
+ try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
return result.toOwnedSlice();
}
@@ -1707,8 +1704,7 @@ pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.A
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1);
errdefer result.deinit();
- try wtf16LeToWtf8ArrayList(&result, wtf16le);
-
+ try utf16LeToUtf8ArrayListImpl(&result, wtf16le, .can_encode_surrogate_half);
return result.toOwnedSliceSentinel(0);
}
@@ -1716,8 +1712,9 @@ pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize {
return utf16LeToUtf8Impl(wtf8, wtf16le, .can_encode_surrogate_half) catch |err| switch (err) {};
}
-pub fn wtf8ToWtf16LeArrayList(array_list: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void {
- return utf8ToUtf16LeArrayListImpl(array_list, wtf8, .can_encode_surrogate_half);
+pub fn wtf8ToWtf16LeArrayList(result: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void {
+ try result.ensureTotalCapacityPrecise(wtf8.len);
+ return utf8ToUtf16LeArrayListImpl(result, wtf8, .can_encode_surrogate_half);
}
pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 {
@@ -1726,7 +1723,6 @@ pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ Inv
errdefer result.deinit();
try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);
-
return result.toOwnedSlice();
}
@@ -1736,7 +1732,6 @@ pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ In
errdefer result.deinit();
try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);
-
return result.toOwnedSliceSentinel(0);
}
@@ -1895,7 +1890,7 @@ pub const Wtf16LeIterator = struct {
pub fn init(s: []const u16) Wtf16LeIterator {
return Wtf16LeIterator{
- .bytes = std.mem.sliceAsBytes(s),
+ .bytes = mem.sliceAsBytes(s),
.i = 0,
};
}
@@ -1908,12 +1903,12 @@ pub const Wtf16LeIterator = struct {
assert(it.i <= it.bytes.len);
if (it.i == it.bytes.len) return null;
var code_units: [2]u16 = undefined;
- code_units[0] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little);
+ code_units[0] = mem.readInt(u16, it.bytes[it.i..][0..2], .little);
it.i += 2;
surrogate_pair: {
if (utf16IsHighSurrogate(code_units[0])) {
if (it.i >= it.bytes.len) break :surrogate_pair;
- code_units[1] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little);
+ code_units[1] = mem.readInt(u16, it.bytes[it.i..][0..2], .little);
const codepoint = utf16DecodeSurrogatePair(&code_units) catch break :surrogate_pair;
it.i += 2;
return codepoint;
@@ -2030,31 +2025,31 @@ fn testRoundtripWtf16(wtf16le: []const u16) !void {
test "well-formed WTF-16 roundtrips" {
try testRoundtripWtf16(&[_]u16{
- std.mem.nativeToLittle(u16, 0xD83D), // high surrogate
- std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate
+ mem.nativeToLittle(u16, 0xD83D), // high surrogate
+ mem.nativeToLittle(u16, 0xDCA9), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
- std.mem.nativeToLittle(u16, 0xD83D), // high surrogate
- std.mem.nativeToLittle(u16, ' '), // not surrogate
- std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate
+ mem.nativeToLittle(u16, 0xD83D), // high surrogate
+ mem.nativeToLittle(u16, ' '), // not surrogate
+ mem.nativeToLittle(u16, 0xDCA9), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
- std.mem.nativeToLittle(u16, 0xD800), // high surrogate
- std.mem.nativeToLittle(u16, 0xDBFF), // high surrogate
+ mem.nativeToLittle(u16, 0xD800), // high surrogate
+ mem.nativeToLittle(u16, 0xDBFF), // high surrogate
});
try testRoundtripWtf16(&[_]u16{
- std.mem.nativeToLittle(u16, 0xD800), // high surrogate
- std.mem.nativeToLittle(u16, 0xE000), // not surrogate
+ mem.nativeToLittle(u16, 0xD800), // high surrogate
+ mem.nativeToLittle(u16, 0xE000), // not surrogate
});
try testRoundtripWtf16(&[_]u16{
- std.mem.nativeToLittle(u16, 0xD7FF), // not surrogate
- std.mem.nativeToLittle(u16, 0xDC00), // low surrogate
+ mem.nativeToLittle(u16, 0xD7FF), // not surrogate
+ mem.nativeToLittle(u16, 0xDC00), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
- std.mem.nativeToLittle(u16, 0x61), // not surrogate
- std.mem.nativeToLittle(u16, 0xDC00), // low surrogate
+ mem.nativeToLittle(u16, 0x61), // not surrogate
+ mem.nativeToLittle(u16, 0xDC00), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
- std.mem.nativeToLittle(u16, 0xDC00), // low surrogate
+ mem.nativeToLittle(u16, 0xDC00), // low surrogate
});
}
diff --git a/lib/std/zig/c_translation.zig b/lib/std/zig/c_translation.zig
index dfa888e94b..337149e97d 100644
--- a/lib/std/zig/c_translation.zig
+++ b/lib/std/zig/c_translation.zig
@@ -308,14 +308,12 @@ test "promoteIntLiteral" {
/// Convert from clang __builtin_shufflevector index to Zig @shuffle index
/// clang requires __builtin_shufflevector index arguments to be integer constants.
-/// negative values for `this_index` indicate "don't care" so we arbitrarily choose 0
+/// negative values for `this_index` indicate "don't care".
/// clang enforces that `this_index` is less than the total number of vector elements
/// See https://ziglang.org/documentation/master/#shuffle
/// See https://clang.llvm.org/docs/LanguageExtensions.html#langext-builtin-shufflevector
pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len: usize) i32 {
- if (this_index <= 0) return 0;
-
- const positive_index = @as(usize, @intCast(this_index));
+ const positive_index = std.math.cast(usize, this_index) orelse return undefined;
if (positive_index < source_vector_len) return @as(i32, @intCast(this_index));
const b_index = positive_index - source_vector_len;
return ~@as(i32, @intCast(b_index));
@@ -324,7 +322,7 @@ pub fn shuffleVectorIndex(comptime this_index: c_int, comptime source_vector_len
test "shuffleVectorIndex" {
const vector_len: usize = 4;
- try testing.expect(shuffleVectorIndex(-1, vector_len) == 0);
+ _ = shuffleVectorIndex(-1, vector_len);
try testing.expect(shuffleVectorIndex(0, vector_len) == 0);
try testing.expect(shuffleVectorIndex(1, vector_len) == 1);