From 27d07c6c4de36af1186392d4bec321825403860e Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Sat, 13 Mar 2021 13:08:46 +0100 Subject: std: Replace testing fns for floating-point values Beside handling NaNs and other non-numeric values better we finally offer the same pair of testing predicates in math and testing. --- lib/std/testing.zig | 74 +++++++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 36 deletions(-) (limited to 'lib/std/testing.zig') diff --git a/lib/std/testing.zig b/lib/std/testing.zig index 1d89155a58..67831c92fb 100644 --- a/lib/std/testing.zig +++ b/lib/std/testing.zig @@ -200,67 +200,69 @@ pub fn expectFmt(expected: []const u8, comptime template: []const u8, args: anyt return error.TestFailed; } -/// This function is intended to be used only in tests. When the actual value is not -/// within the margin of the expected value, -/// prints diagnostics to stderr to show exactly how they are not equal, then aborts. +pub const expectWithinMargin = @compileError("expectWithinMargin is deprecated, use expectApproxEqAbs or expectApproxEqRel"); +pub const expectWithinEpsilon = @compileError("expectWithinEpsilon is deprecated, use expectApproxEqAbs or expectApproxEqRel"); + +/// This function is intended to be used only in tests. When the actual value is +/// not approximately equal to the expected value, prints diagnostics to stderr +/// to show exactly how they are not equal, then aborts. +/// See `math.approxEqAbs` for more informations on the tolerance parameter. /// The types must be floating point -pub fn expectWithinMargin(expected: anytype, actual: @TypeOf(expected), margin: @TypeOf(expected)) void { - std.debug.assert(margin >= 0.0); +pub fn expectApproxEqAbs(expected: anytype, actual: @TypeOf(expected), tolerance: @TypeOf(expected)) void { + const T = @TypeOf(expected); + + switch (@typeInfo(T)) { + .Float => if (!math.approxEqAbs(T, expected, actual, tolerance)) + std.debug.panic("actual {}, not within absolute tolerance {} of expected {}", .{ actual, tolerance, expected }), + + .ComptimeFloat => @compileError("Cannot approximately compare two comptime_float values"), - switch (@typeInfo(@TypeOf(actual))) { - .Float, - .ComptimeFloat, - => { - if (@fabs(expected - actual) > margin) { - std.debug.panic("actual {}, not within margin {} of expected {}", .{ actual, margin, expected }); - } - }, else => @compileError("Unable to compare non floating point values"), } } -test "expectWithinMargin" { +test "expectApproxEqAbs" { inline for ([_]type{ f16, f32, f64, f128 }) |T| { const pos_x: T = 12.0; const pos_y: T = 12.06; const neg_x: T = -12.0; const neg_y: T = -12.06; - expectWithinMargin(pos_x, pos_y, 0.1); - expectWithinMargin(neg_x, neg_y, 0.1); + expectApproxEqAbs(pos_x, pos_y, 0.1); + expectApproxEqAbs(neg_x, neg_y, 0.1); } } -/// This function is intended to be used only in tests. When the actual value is not -/// within the epsilon of the expected value, -/// prints diagnostics to stderr to show exactly how they are not equal, then aborts. +/// This function is intended to be used only in tests. When the actual value is +/// not approximately equal to the expected value, prints diagnostics to stderr +/// to show exactly how they are not equal, then aborts. +/// See `math.approxEqRel` for more informations on the tolerance parameter. /// The types must be floating point -pub fn expectWithinEpsilon(expected: anytype, actual: @TypeOf(expected), epsilon: @TypeOf(expected)) void { - std.debug.assert(epsilon >= 0.0 and epsilon <= 1.0); +pub fn expectApproxEqRel(expected: anytype, actual: @TypeOf(expected), tolerance: @TypeOf(expected)) void { + const T = @TypeOf(expected); + + switch (@typeInfo(T)) { + .Float => if (!math.approxEqRel(T, expected, actual, tolerance)) + std.debug.panic("actual {}, not within relative tolerance {} of expected {}", .{ actual, tolerance, expected }), + + .ComptimeFloat => @compileError("Cannot approximately compare two comptime_float values"), - // Relative epsilon test. - const margin = math.max(math.fabs(expected), math.fabs(actual)) * epsilon; - switch (@typeInfo(@TypeOf(actual))) { - .Float, - .ComptimeFloat, - => { - if (@fabs(expected - actual) > margin) { - std.debug.panic("actual {}, not within epsilon {}, of expected {}", .{ actual, epsilon, expected }); - } - }, else => @compileError("Unable to compare non floating point values"), } } -test "expectWithinEpsilon" { +test "expectApproxEqRel" { inline for ([_]type{ f16, f32, f64, f128 }) |T| { + const eps_value = comptime math.epsilon(T); + const sqrt_eps_value = comptime math.sqrt(eps_value); + const pos_x: T = 12.0; - const pos_y: T = 13.2; + const pos_y: T = pos_x + 2 * eps_value; const neg_x: T = -12.0; - const neg_y: T = -13.2; + const neg_y: T = neg_x - 2 * eps_value; - expectWithinEpsilon(pos_x, pos_y, 0.1); - expectWithinEpsilon(neg_x, neg_y, 0.1); + expectApproxEqRel(pos_x, pos_y, sqrt_eps_value); + expectApproxEqRel(neg_x, neg_y, sqrt_eps_value); } } -- cgit v1.2.3 From b8c019ef49be54d76acc6721d5d8d493193bcf5d Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Fri, 19 Mar 2021 19:26:30 +0100 Subject: std/base64: cleanups & support url-safe and other non-padded variants This makes a few changes to the base64 codecs. * The padding character is optional. The common "URL-safe" variant, in particular, is generally not used with padding. This is also the case for password hashes, so having this will avoid code duplication with bcrypt, scrypt and other functions. * The URL-safe variant is added. Instead of having individual constants for each parameter of each variant, we are now grouping these in a struct. So, `standard_pad_char` just becomes `standard.pad_char`. * Types are not `snake_case`'d any more. So, `standard_encoder` becomes `standard.Encoder`, as it is a type. * Creating a decoder with ignored characters required the alphabet and padding. Now, `standard.decoderWithIgnore()` returns a decoder with the standard parameters and the set of ignored chars. * Whatever applies to `standard.*` obviously also works with `url_safe.*` * the `calcSize()` interface was inconsistent, taking a length in the encoder, and a slice in the encoder. Rename the variant that takes a slice to `calcSizeForSlice()`. * In the decoder with ignored characters, add `calcSizeUpperBound()`, which is more useful than the one that takes a slice in order to size a fixed buffer before we have the data. * Return `error.InvalidCharacter` when the input actually contains characters that are neither padding nor part of the alphabet. If we hit a padding issue (which includes extra bits at the end), consistently return `error.InvalidPadding`. * Don't keep the `char_in_alphabet` array permanently in a decoder; it is only required for sanity checks during initialization. * Tests are unchanged, but now cover both the standard (padded) and the url-safe (non-padded) variants. * Add an error set, rename `OutputTooSmallError` to `NoSpaceLeft` to match the `hex2bin` equivalent. --- doc/langref.html.in | 4 +- lib/std/base64.zig | 592 ++++++++++++++++++--------------- lib/std/fs.zig | 10 +- lib/std/testing.zig | 2 +- test/standalone/mix_o_files/base64.zig | 4 +- 5 files changed, 337 insertions(+), 275 deletions(-) (limited to 'lib/std/testing.zig') diff --git a/doc/langref.html.in b/doc/langref.html.in index aca09c55fe..b385647b99 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -9952,8 +9952,8 @@ export fn decode_base_64( ) usize { const src = source_ptr[0..source_len]; const dest = dest_ptr[0..dest_len]; - const base64_decoder = base64.standard_decoder_unsafe; - const decoded_size = base64_decoder.calcSize(src); + const base64_decoder = base64.standard.DecoderUnsafe; + const decoded_size = base64_decoder.calcSizeForSlice(src) catch unreachable; base64_decoder.decode(dest[0..decoded_size], src); return decoded_size; } diff --git a/lib/std/base64.zig b/lib/std/base64.zig index e6a780c239..3963436982 100644 --- a/lib/std/base64.zig +++ b/lib/std/base64.zig @@ -8,308 +8,339 @@ const assert = std.debug.assert; const testing = std.testing; const mem = std.mem; -pub const standard_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -pub const standard_pad_char = '='; -pub const standard_encoder = Base64Encoder.init(standard_alphabet_chars, standard_pad_char); +pub const Error = error{ + InvalidCharacter, + InvalidPadding, + NoSpaceLeft, +}; + +/// Base64 codecs +pub const Codecs = struct { + alphabet_chars: [64]u8, + pad_char: ?u8, + decoderWithIgnore: fn (ignore: []const u8) Base64DecoderWithIgnore, + Encoder: Base64Encoder, + Decoder: Base64Decoder, + DecoderUnsafe: Base64DecoderUnsafe, +}; + +pub const standard_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".*; +fn standardBase64DecoderWithIgnore(ignore: []const u8) Base64DecoderWithIgnore { + return Base64DecoderWithIgnore.init(standard_alphabet_chars, '=', ignore); +} + +/// Standard Base64 codecs, with padding +pub const standard = Codecs{ + .alphabet_chars = standard_alphabet_chars, + .pad_char = '=', + .decoderWithIgnore = standardBase64DecoderWithIgnore, + .Encoder = Base64Encoder.init(standard_alphabet_chars, '='), + .Decoder = Base64Decoder.init(standard_alphabet_chars, '='), + .DecoderUnsafe = Base64DecoderUnsafe.init(standard_alphabet_chars, '='), +}; + +pub const url_safe_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*; +fn urlSafeBase64DecoderWithIgnore(ignore: []const u8) Base64DecoderWithIgnore { + return Base64DecoderWithIgnore.init(url_safe_alphabet_chars, null, ignore); +} + +/// URL-safe Base64 codecs, without padding +pub const url_safe = Codecs{ + .alphabet_chars = url_safe_alphabet_chars, + .pad_char = null, + .decoderWithIgnore = urlSafeBase64DecoderWithIgnore, + .Encoder = Base64Encoder.init(url_safe_alphabet_chars, null), + .Decoder = Base64Decoder.init(url_safe_alphabet_chars, null), + .DecoderUnsafe = Base64DecoderUnsafe.init(url_safe_alphabet_chars, null), +}; + +// Backwards compatibility + +/// Deprecated - Use `standard.pad_char` +pub const standard_pad_char = standard.pad_char; +/// Deprecated - Use `standard.Encoder` +pub const standard_encoder = standard.Encoder; +/// Deprecated - Use `standard.Decoder` +pub const standard_decoder = standard.Decoder; +/// Deprecated - Use `standard.DecoderUnsafe` +pub const standard_decoder_unsafe = standard.DecoderUnsafe; pub const Base64Encoder = struct { - alphabet_chars: []const u8, - pad_char: u8, + alphabet_chars: [64]u8, + pad_char: ?u8, - /// a bunch of assertions, then simply pass the data right through. - pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Encoder { + /// A bunch of assertions, then simply pass the data right through. + pub fn init(alphabet_chars: [64]u8, pad_char: ?u8) Base64Encoder { assert(alphabet_chars.len == 64); var char_in_alphabet = [_]bool{false} ** 256; for (alphabet_chars) |c| { assert(!char_in_alphabet[c]); - assert(c != pad_char); + assert(pad_char == null or c != pad_char.?); char_in_alphabet[c] = true; } - return Base64Encoder{ .alphabet_chars = alphabet_chars, .pad_char = pad_char, }; } - /// ceil(source_len * 4/3) - pub fn calcSize(source_len: usize) usize { - return @divTrunc(source_len + 2, 3) * 4; + /// Compute the encoded length + pub fn calcSize(encoder: *const Base64Encoder, source_len: usize) usize { + if (encoder.pad_char != null) { + return @divTrunc(source_len + 2, 3) * 4; + } else { + const leftover = source_len % 3; + return @divTrunc(source_len, 3) * 4 + @divTrunc(leftover * 4 + 2, 3); + } } - /// dest.len must be what you get from ::calcSize. + /// dest.len must at least be what you get from ::calcSize. pub fn encode(encoder: *const Base64Encoder, dest: []u8, source: []const u8) []const u8 { - assert(dest.len >= Base64Encoder.calcSize(source.len)); - - var i: usize = 0; - var out_index: usize = 0; - while (i + 2 < source.len) : (i += 3) { - dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f]; - out_index += 1; - - dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)]; - out_index += 1; - - dest[out_index] = encoder.alphabet_chars[((source[i + 1] & 0xf) << 2) | ((source[i + 2] & 0xc0) >> 6)]; - out_index += 1; - - dest[out_index] = encoder.alphabet_chars[source[i + 2] & 0x3f]; - out_index += 1; + const out_len = encoder.calcSize(source.len); + assert(dest.len >= out_len); + + const nibbles = source.len / 3; + const leftover = source.len - 3 * nibbles; + + var acc: u12 = 0; + var acc_len: u4 = 0; + var out_idx: usize = 0; + for (source) |v| { + acc = (acc << 8) + v; + acc_len += 8; + while (acc_len >= 6) { + acc_len -= 6; + dest[out_idx] = encoder.alphabet_chars[@truncate(u6, (acc >> acc_len))]; + out_idx += 1; + } } - - if (i < source.len) { - dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f]; - out_index += 1; - - if (i + 1 == source.len) { - dest[out_index] = encoder.alphabet_chars[(source[i] & 0x3) << 4]; - out_index += 1; - - dest[out_index] = encoder.pad_char; - out_index += 1; - } else { - dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)]; - out_index += 1; - - dest[out_index] = encoder.alphabet_chars[(source[i + 1] & 0xf) << 2]; - out_index += 1; + if (acc_len > 0) { + dest[out_idx] = encoder.alphabet_chars[@truncate(u6, (acc << 6 - acc_len))]; + out_idx += 1; + } + if (encoder.pad_char) |pad_char| { + for (dest[out_idx..]) |*pad| { + pad.* = pad_char; } - - dest[out_index] = encoder.pad_char; - out_index += 1; } - return dest[0..out_index]; + return dest[0..out_len]; } }; -pub const standard_decoder = Base64Decoder.init(standard_alphabet_chars, standard_pad_char); - pub const Base64Decoder = struct { + const invalid_char: u8 = 0xff; + /// e.g. 'A' => 0. - /// undefined for any value not in the 64 alphabet chars. + /// `invalid_char` for any value not in the 64 alphabet chars. char_to_index: [256]u8, + pad_char: ?u8, - /// true only for the 64 chars in the alphabet, not the pad char. - char_in_alphabet: [256]bool, - pad_char: u8, - - pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Decoder { - assert(alphabet_chars.len == 64); - + pub fn init(alphabet_chars: [64]u8, pad_char: ?u8) Base64Decoder { var result = Base64Decoder{ - .char_to_index = undefined, - .char_in_alphabet = [_]bool{false} ** 256, + .char_to_index = [_]u8{invalid_char} ** 256, .pad_char = pad_char, }; + var char_in_alphabet = [_]bool{false} ** 256; for (alphabet_chars) |c, i| { - assert(!result.char_in_alphabet[c]); - assert(c != pad_char); + assert(!char_in_alphabet[c]); + assert(pad_char == null or c != pad_char.?); result.char_to_index[c] = @intCast(u8, i); - result.char_in_alphabet[c] = true; + char_in_alphabet[c] = true; } + return result; + } + /// Return the maximum possible decoded size for a given input length - The actual length may be less if the input includes padding. + /// `InvalidPadding` is returned if the input length is not valid. + pub fn calcSizeUpperBound(decoder: *const Base64Decoder, source_len: usize) Error!usize { + var result = source_len / 4 * 3; + const leftover = source_len % 4; + if (decoder.pad_char != null) { + if (leftover % 4 != 0) return error.InvalidPadding; + } else { + if (leftover % 4 == 1) return error.InvalidPadding; + result += leftover * 3 / 4; + } return result; } - /// If the encoded buffer is detected to be invalid, returns error.InvalidPadding. - pub fn calcSize(decoder: *const Base64Decoder, source: []const u8) !usize { - if (source.len % 4 != 0) return error.InvalidPadding; - return calcDecodedSizeExactUnsafe(source, decoder.pad_char); + /// Return the exact decoded size for a slice. + /// `InvalidPadding` is returned if the input length is not valid. + pub fn calcSizeForSlice(decoder: *const Base64Decoder, source: []const u8) Error!usize { + const source_len = source.len; + var result = try decoder.calcSizeUpperBound(source_len); + if (decoder.pad_char) |pad_char| { + if (source_len >= 1 and source[source_len - 1] == pad_char) result -= 1; + if (source_len >= 2 and source[source_len - 2] == pad_char) result -= 1; + } + return result; } /// dest.len must be what you get from ::calcSize. /// invalid characters result in error.InvalidCharacter. /// invalid padding results in error.InvalidPadding. - pub fn decode(decoder: *const Base64Decoder, dest: []u8, source: []const u8) !void { - assert(dest.len == (decoder.calcSize(source) catch unreachable)); - assert(source.len % 4 == 0); - - var src_cursor: usize = 0; - var dest_cursor: usize = 0; - - while (src_cursor < source.len) : (src_cursor += 4) { - if (!decoder.char_in_alphabet[source[src_cursor + 0]]) return error.InvalidCharacter; - if (!decoder.char_in_alphabet[source[src_cursor + 1]]) return error.InvalidCharacter; - if (src_cursor < source.len - 4 or source[src_cursor + 3] != decoder.pad_char) { - // common case - if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter; - if (!decoder.char_in_alphabet[source[src_cursor + 3]]) return error.InvalidCharacter; - dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4; - dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2; - dest[dest_cursor + 2] = decoder.char_to_index[source[src_cursor + 2]] << 6 | decoder.char_to_index[source[src_cursor + 3]]; - dest_cursor += 3; - } else if (source[src_cursor + 2] != decoder.pad_char) { - // one pad char - if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter; - dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4; - dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2; - if (decoder.char_to_index[source[src_cursor + 2]] << 6 != 0) return error.InvalidPadding; - dest_cursor += 2; - } else { - // two pad chars - dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4; - if (decoder.char_to_index[source[src_cursor + 1]] << 4 != 0) return error.InvalidPadding; - dest_cursor += 1; + pub fn decode(decoder: *const Base64Decoder, dest: []u8, source: []const u8) Error!void { + if (decoder.pad_char != null and source.len % 4 != 0) return error.InvalidPadding; + var acc: u12 = 0; + var acc_len: u4 = 0; + var dest_idx: usize = 0; + var leftover_idx: ?usize = null; + for (source) |c, src_idx| { + const d = decoder.char_to_index[c]; + if (d == invalid_char) { + if (decoder.pad_char == null or c != decoder.pad_char.?) return error.InvalidCharacter; + leftover_idx = src_idx; + break; + } + acc = (acc << 6) + d; + acc_len += 6; + if (acc_len >= 8) { + acc_len -= 8; + dest[dest_idx] = @truncate(u8, acc >> acc_len); + dest_idx += 1; } } - - assert(src_cursor == source.len); - assert(dest_cursor == dest.len); + if (acc_len > 4 or (acc & (@as(u12, 1) << acc_len) - 1) != 0) { + return error.InvalidPadding; + } + if (leftover_idx == null) return; + var leftover = source[leftover_idx.?..]; + if (decoder.pad_char) |pad_char| { + const padding_len = acc_len / 2; + var padding_chars: usize = 0; + var i: usize = 0; + for (leftover) |c| { + if (c != pad_char) { + return if (c == Base64Decoder.invalid_char) error.InvalidCharacter else error.InvalidPadding; + } + padding_chars += 1; + } + if (padding_chars != padding_len) return error.InvalidPadding; + } } }; pub const Base64DecoderWithIgnore = struct { decoder: Base64Decoder, char_is_ignored: [256]bool, - pub fn init(alphabet_chars: []const u8, pad_char: u8, ignore_chars: []const u8) Base64DecoderWithIgnore { + + pub fn init(alphabet_chars: [64]u8, pad_char: ?u8, ignore_chars: []const u8) Base64DecoderWithIgnore { var result = Base64DecoderWithIgnore{ .decoder = Base64Decoder.init(alphabet_chars, pad_char), .char_is_ignored = [_]bool{false} ** 256, }; - for (ignore_chars) |c| { - assert(!result.decoder.char_in_alphabet[c]); + assert(result.decoder.char_to_index[c] == Base64Decoder.invalid_char); assert(!result.char_is_ignored[c]); assert(result.decoder.pad_char != c); result.char_is_ignored[c] = true; } - return result; } - /// If no characters end up being ignored or padding, this will be the exact decoded size. - pub fn calcSizeUpperBound(encoded_len: usize) usize { - return @divTrunc(encoded_len, 4) * 3; + /// Return the maximum possible decoded size for a given input length - The actual length may be less if the input includes padding + /// `InvalidPadding` is returned if the input length is not valid. + pub fn calcSizeUpperBound(decoder_with_ignore: *const Base64DecoderWithIgnore, source_len: usize) Error!usize { + var result = source_len / 4 * 3; + if (decoder_with_ignore.decoder.pad_char == null) { + const leftover = source_len % 4; + result += leftover * 3 / 4; + } + return result; } /// Invalid characters that are not ignored result in error.InvalidCharacter. /// Invalid padding results in error.InvalidPadding. - /// Decoding more data than can fit in dest results in error.OutputTooSmall. See also ::calcSizeUpperBound. + /// Decoding more data than can fit in dest results in error.NoSpaceLeft. See also ::calcSizeUpperBound. /// Returns the number of bytes written to dest. - pub fn decode(decoder_with_ignore: *const Base64DecoderWithIgnore, dest: []u8, source: []const u8) !usize { + pub fn decode(decoder_with_ignore: *const Base64DecoderWithIgnore, dest: []u8, source: []const u8) Error!usize { const decoder = &decoder_with_ignore.decoder; - - var src_cursor: usize = 0; - var dest_cursor: usize = 0; - - while (true) { - // get the next 4 chars, if available - var next_4_chars: [4]u8 = undefined; - var available_chars: usize = 0; - var pad_char_count: usize = 0; - while (available_chars < 4 and src_cursor < source.len) { - var c = source[src_cursor]; - src_cursor += 1; - - if (decoder.char_in_alphabet[c]) { - // normal char - next_4_chars[available_chars] = c; - available_chars += 1; - } else if (decoder_with_ignore.char_is_ignored[c]) { - // we're told to skip this one - continue; - } else if (c == decoder.pad_char) { - // the padding has begun. count the pad chars. - pad_char_count += 1; - while (src_cursor < source.len) { - c = source[src_cursor]; - src_cursor += 1; - if (c == decoder.pad_char) { - pad_char_count += 1; - if (pad_char_count > 2) return error.InvalidCharacter; - } else if (decoder_with_ignore.char_is_ignored[c]) { - // we can even ignore chars during the padding - continue; - } else return error.InvalidCharacter; - } - break; - } else return error.InvalidCharacter; + var acc: u12 = 0; + var acc_len: u4 = 0; + var dest_idx: usize = 0; + var leftover_idx: ?usize = null; + for (source) |c, src_idx| { + if (decoder_with_ignore.char_is_ignored[c]) continue; + const d = decoder.char_to_index[c]; + if (d == Base64Decoder.invalid_char) { + if (decoder.pad_char == null or c != decoder.pad_char.?) return error.InvalidCharacter; + leftover_idx = src_idx; + break; } - - switch (available_chars) { - 4 => { - // common case - if (dest_cursor + 3 > dest.len) return error.OutputTooSmall; - assert(pad_char_count == 0); - dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4; - dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2; - dest[dest_cursor + 2] = decoder.char_to_index[next_4_chars[2]] << 6 | decoder.char_to_index[next_4_chars[3]]; - dest_cursor += 3; - continue; - }, - 3 => { - if (dest_cursor + 2 > dest.len) return error.OutputTooSmall; - if (pad_char_count != 1) return error.InvalidPadding; - dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4; - dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2; - if (decoder.char_to_index[next_4_chars[2]] << 6 != 0) return error.InvalidPadding; - dest_cursor += 2; - break; - }, - 2 => { - if (dest_cursor + 1 > dest.len) return error.OutputTooSmall; - if (pad_char_count != 2) return error.InvalidPadding; - dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4; - if (decoder.char_to_index[next_4_chars[1]] << 4 != 0) return error.InvalidPadding; - dest_cursor += 1; - break; - }, - 1 => { - return error.InvalidPadding; - }, - 0 => { - if (pad_char_count != 0) return error.InvalidPadding; - break; - }, - else => unreachable, + acc = (acc << 6) + d; + acc_len += 6; + if (acc_len >= 8) { + if (dest_idx == dest.len) return error.NoSpaceLeft; + acc_len -= 8; + dest[dest_idx] = @truncate(u8, acc >> acc_len); + dest_idx += 1; } } - - assert(src_cursor == source.len); - - return dest_cursor; + if (acc_len > 4 or (acc & (@as(u12, 1) << acc_len) - 1) != 0) { + return error.InvalidPadding; + } + const padding_len = acc_len / 2; + if (leftover_idx == null) { + if (decoder.pad_char != null and padding_len != 0) return error.InvalidPadding; + return dest_idx; + } + var leftover = source[leftover_idx.?..]; + if (decoder.pad_char) |pad_char| { + var padding_chars: usize = 0; + var i: usize = 0; + for (leftover) |c| { + if (decoder_with_ignore.char_is_ignored[c]) continue; + if (c != pad_char) { + return if (c == Base64Decoder.invalid_char) error.InvalidCharacter else error.InvalidPadding; + } + padding_chars += 1; + } + if (padding_chars != padding_len) return error.InvalidPadding; + } + return dest_idx; } }; -pub const standard_decoder_unsafe = Base64DecoderUnsafe.init(standard_alphabet_chars, standard_pad_char); - pub const Base64DecoderUnsafe = struct { /// e.g. 'A' => 0. /// undefined for any value not in the 64 alphabet chars. char_to_index: [256]u8, - pad_char: u8, + pad_char: ?u8, - pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64DecoderUnsafe { - assert(alphabet_chars.len == 64); + pub fn init(alphabet_chars: [64]u8, pad_char: ?u8) Base64DecoderUnsafe { var result = Base64DecoderUnsafe{ .char_to_index = undefined, .pad_char = pad_char, }; for (alphabet_chars) |c, i| { - assert(c != pad_char); + assert(pad_char == null or c != pad_char.?); result.char_to_index[c] = @intCast(u8, i); } return result; } - /// The source buffer must be valid. - pub fn calcSize(decoder: *const Base64DecoderUnsafe, source: []const u8) usize { - return calcDecodedSizeExactUnsafe(source, decoder.pad_char); + /// Return the exact decoded size for a slice. + /// `InvalidPadding` is returned if the input length is not valid. + pub fn calcSizeForSlice(decoder: *const Base64DecoderUnsafe, source: []const u8) Error!usize { + const safe_decoder = Base64Decoder{ .char_to_index = undefined, .pad_char = decoder.pad_char }; + return safe_decoder.calcSizeForSlice(source); } /// dest.len must be what you get from ::calcDecodedSizeExactUnsafe. /// invalid characters or padding will result in undefined values. pub fn decode(decoder: *const Base64DecoderUnsafe, dest: []u8, source: []const u8) void { - assert(dest.len == decoder.calcSize(source)); + assert(dest.len == decoder.calcSizeForSlice(source) catch unreachable); var src_index: usize = 0; var dest_index: usize = 0; var in_buf_len: usize = source.len; - while (in_buf_len > 0 and source[in_buf_len - 1] == decoder.pad_char) { - in_buf_len -= 1; + if (decoder.pad_char) |pad_char| { + while (in_buf_len > 0 and source[in_buf_len - 1] == pad_char) { + in_buf_len -= 1; + } } while (in_buf_len > 4) { @@ -341,80 +372,111 @@ pub const Base64DecoderUnsafe = struct { } }; -fn calcDecodedSizeExactUnsafe(source: []const u8, pad_char: u8) usize { - if (source.len == 0) return 0; - var result = @divExact(source.len, 4) * 3; - if (source[source.len - 1] == pad_char) { - result -= 1; - if (source[source.len - 2] == pad_char) { - result -= 1; - } - } - return result; -} - test "base64" { @setEvalBranchQuota(8000); testBase64() catch unreachable; - comptime (testBase64() catch unreachable); + comptime testAllApis(standard, "comptime", "Y29tcHRpbWU=") catch unreachable; +} + +test "base64 url_safe" { + @setEvalBranchQuota(8000); + testBase64UrlSafe() catch unreachable; + comptime testAllApis(url_safe, "comptime", "Y29tcHRpbWU") catch unreachable; } fn testBase64() !void { - try testAllApis("", ""); - try testAllApis("f", "Zg=="); - try testAllApis("fo", "Zm8="); - try testAllApis("foo", "Zm9v"); - try testAllApis("foob", "Zm9vYg=="); - try testAllApis("fooba", "Zm9vYmE="); - try testAllApis("foobar", "Zm9vYmFy"); - - try testDecodeIgnoreSpace("", " "); - try testDecodeIgnoreSpace("f", "Z g= ="); - try testDecodeIgnoreSpace("fo", " Zm8="); - try testDecodeIgnoreSpace("foo", "Zm9v "); - try testDecodeIgnoreSpace("foob", "Zm9vYg = = "); - try testDecodeIgnoreSpace("fooba", "Zm9v YmE="); - try testDecodeIgnoreSpace("foobar", " Z m 9 v Y m F y "); + const codecs = standard; + + try testAllApis(codecs, "", ""); + try testAllApis(codecs, "f", "Zg=="); + try testAllApis(codecs, "fo", "Zm8="); + try testAllApis(codecs, "foo", "Zm9v"); + try testAllApis(codecs, "foob", "Zm9vYg=="); + try testAllApis(codecs, "fooba", "Zm9vYmE="); + try testAllApis(codecs, "foobar", "Zm9vYmFy"); + + try testDecodeIgnoreSpace(codecs, "", " "); + try testDecodeIgnoreSpace(codecs, "f", "Z g= ="); + try testDecodeIgnoreSpace(codecs, "fo", " Zm8="); + try testDecodeIgnoreSpace(codecs, "foo", "Zm9v "); + try testDecodeIgnoreSpace(codecs, "foob", "Zm9vYg = = "); + try testDecodeIgnoreSpace(codecs, "fooba", "Zm9v YmE="); + try testDecodeIgnoreSpace(codecs, "foobar", " Z m 9 v Y m F y "); + + // test getting some api errors + try testError(codecs, "A", error.InvalidPadding); + try testError(codecs, "AA", error.InvalidPadding); + try testError(codecs, "AAA", error.InvalidPadding); + try testError(codecs, "A..A", error.InvalidCharacter); + try testError(codecs, "AA=A", error.InvalidPadding); + try testError(codecs, "AA/=", error.InvalidPadding); + try testError(codecs, "A/==", error.InvalidPadding); + try testError(codecs, "A===", error.InvalidPadding); + try testError(codecs, "====", error.InvalidPadding); + + try testNoSpaceLeftError(codecs, "AA=="); + try testNoSpaceLeftError(codecs, "AAA="); + try testNoSpaceLeftError(codecs, "AAAA"); + try testNoSpaceLeftError(codecs, "AAAAAA=="); +} + +fn testBase64UrlSafe() !void { + const codecs = url_safe; + + try testAllApis(codecs, "", ""); + try testAllApis(codecs, "f", "Zg"); + try testAllApis(codecs, "fo", "Zm8"); + try testAllApis(codecs, "foo", "Zm9v"); + try testAllApis(codecs, "foob", "Zm9vYg"); + try testAllApis(codecs, "fooba", "Zm9vYmE"); + try testAllApis(codecs, "foobar", "Zm9vYmFy"); + + try testDecodeIgnoreSpace(codecs, "", " "); + try testDecodeIgnoreSpace(codecs, "f", "Z g "); + try testDecodeIgnoreSpace(codecs, "fo", " Zm8"); + try testDecodeIgnoreSpace(codecs, "foo", "Zm9v "); + try testDecodeIgnoreSpace(codecs, "foob", "Zm9vYg "); + try testDecodeIgnoreSpace(codecs, "fooba", "Zm9v YmE"); + try testDecodeIgnoreSpace(codecs, "foobar", " Z m 9 v Y m F y "); // test getting some api errors - try testError("A", error.InvalidPadding); - try testError("AA", error.InvalidPadding); - try testError("AAA", error.InvalidPadding); - try testError("A..A", error.InvalidCharacter); - try testError("AA=A", error.InvalidCharacter); - try testError("AA/=", error.InvalidPadding); - try testError("A/==", error.InvalidPadding); - try testError("A===", error.InvalidCharacter); - try testError("====", error.InvalidCharacter); - - try testOutputTooSmallError("AA=="); - try testOutputTooSmallError("AAA="); - try testOutputTooSmallError("AAAA"); - try testOutputTooSmallError("AAAAAA=="); + try testError(codecs, "A", error.InvalidPadding); + try testError(codecs, "AAA=", error.InvalidCharacter); + try testError(codecs, "A..A", error.InvalidCharacter); + try testError(codecs, "AA=A", error.InvalidCharacter); + try testError(codecs, "AA/=", error.InvalidCharacter); + try testError(codecs, "A/==", error.InvalidCharacter); + try testError(codecs, "A===", error.InvalidCharacter); + try testError(codecs, "====", error.InvalidCharacter); + + try testNoSpaceLeftError(codecs, "AA"); + try testNoSpaceLeftError(codecs, "AAA"); + try testNoSpaceLeftError(codecs, "AAAA"); + try testNoSpaceLeftError(codecs, "AAAAAA"); } -fn testAllApis(expected_decoded: []const u8, expected_encoded: []const u8) !void { +fn testAllApis(codecs: Codecs, expected_decoded: []const u8, expected_encoded: []const u8) !void { // Base64Encoder { var buffer: [0x100]u8 = undefined; - const encoded = standard_encoder.encode(&buffer, expected_decoded); + const encoded = codecs.Encoder.encode(&buffer, expected_decoded); testing.expectEqualSlices(u8, expected_encoded, encoded); } // Base64Decoder { var buffer: [0x100]u8 = undefined; - var decoded = buffer[0..try standard_decoder.calcSize(expected_encoded)]; - try standard_decoder.decode(decoded, expected_encoded); + var decoded = buffer[0..try codecs.Decoder.calcSizeForSlice(expected_encoded)]; + try codecs.Decoder.decode(decoded, expected_encoded); testing.expectEqualSlices(u8, expected_decoded, decoded); } // Base64DecoderWithIgnore { - const standard_decoder_ignore_nothing = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, ""); + const decoder_ignore_nothing = codecs.decoderWithIgnore(""); var buffer: [0x100]u8 = undefined; - var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(expected_encoded.len)]; - var written = try standard_decoder_ignore_nothing.decode(decoded, expected_encoded); + var decoded = buffer[0..try decoder_ignore_nothing.calcSizeUpperBound(expected_encoded.len)]; + var written = try decoder_ignore_nothing.decode(decoded, expected_encoded); testing.expect(written <= decoded.len); testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]); } @@ -422,40 +484,40 @@ fn testAllApis(expected_decoded: []const u8, expected_encoded: []const u8) !void // Base64DecoderUnsafe { var buffer: [0x100]u8 = undefined; - var decoded = buffer[0..standard_decoder_unsafe.calcSize(expected_encoded)]; - standard_decoder_unsafe.decode(decoded, expected_encoded); + var decoded = buffer[0..try codecs.DecoderUnsafe.calcSizeForSlice(expected_encoded)]; + codecs.DecoderUnsafe.decode(decoded, expected_encoded); testing.expectEqualSlices(u8, expected_decoded, decoded); } } -fn testDecodeIgnoreSpace(expected_decoded: []const u8, encoded: []const u8) !void { - const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " "); +fn testDecodeIgnoreSpace(codecs: Codecs, expected_decoded: []const u8, encoded: []const u8) !void { + const decoder_ignore_space = codecs.decoderWithIgnore(" "); var buffer: [0x100]u8 = undefined; - var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(encoded.len)]; - var written = try standard_decoder_ignore_space.decode(decoded, encoded); + var decoded = buffer[0..try decoder_ignore_space.calcSizeUpperBound(encoded.len)]; + var written = try decoder_ignore_space.decode(decoded, encoded); testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]); } -fn testError(encoded: []const u8, expected_err: anyerror) !void { - const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " "); +fn testError(codecs: Codecs, encoded: []const u8, expected_err: anyerror) !void { + const decoder_ignore_space = codecs.decoderWithIgnore(" "); var buffer: [0x100]u8 = undefined; - if (standard_decoder.calcSize(encoded)) |decoded_size| { + if (codecs.Decoder.calcSizeForSlice(encoded)) |decoded_size| { var decoded = buffer[0..decoded_size]; - if (standard_decoder.decode(decoded, encoded)) |_| { + if (codecs.Decoder.decode(decoded, encoded)) |_| { return error.ExpectedError; } else |err| if (err != expected_err) return err; } else |err| if (err != expected_err) return err; - if (standard_decoder_ignore_space.decode(buffer[0..], encoded)) |_| { + if (decoder_ignore_space.decode(buffer[0..], encoded)) |_| { return error.ExpectedError; } else |err| if (err != expected_err) return err; } -fn testOutputTooSmallError(encoded: []const u8) !void { - const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " "); +fn testNoSpaceLeftError(codecs: Codecs, encoded: []const u8) !void { + const decoder_ignore_space = codecs.decoderWithIgnore(" "); var buffer: [0x100]u8 = undefined; - var decoded = buffer[0 .. calcDecodedSizeExactUnsafe(encoded, standard_pad_char) - 1]; - if (standard_decoder_ignore_space.decode(decoded, encoded)) |_| { + var decoded = buffer[0 .. (try codecs.Decoder.calcSizeForSlice(encoded)) - 1]; + if (decoder_ignore_space.decode(decoded, encoded)) |_| { return error.ExpectedError; - } else |err| if (err != error.OutputTooSmall) return err; + } else |err| if (err != error.NoSpaceLeft) return err; } diff --git a/lib/std/fs.zig b/lib/std/fs.zig index 79385708af..1a02cd5b6b 100644 --- a/lib/std/fs.zig +++ b/lib/std/fs.zig @@ -50,13 +50,13 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) { else => @compileError("Unsupported OS"), }; -pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; +pub const base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".*; /// Base64 encoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. -pub const base64_encoder = base64.Base64Encoder.init(base64_alphabet, base64.standard_pad_char); +pub const base64_encoder = base64.Base64Encoder.init(base64_alphabet, null); /// Base64 decoder, replacing the standard `+/` with `-_` so that it can be used in a file name on any filesystem. -pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, base64.standard_pad_char); +pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, null); /// Whether or not async file system syscalls need a dedicated thread because the operating /// system does not support non-blocking I/O on the file system. @@ -77,7 +77,7 @@ pub fn atomicSymLink(allocator: *Allocator, existing_path: []const u8, new_path: const dirname = path.dirname(new_path) orelse "."; var rand_buf: [AtomicFile.RANDOM_BYTES]u8 = undefined; - const tmp_path = try allocator.alloc(u8, dirname.len + 1 + base64.Base64Encoder.calcSize(rand_buf.len)); + const tmp_path = try allocator.alloc(u8, dirname.len + 1 + base64_encoder.calcSize(rand_buf.len)); defer allocator.free(tmp_path); mem.copy(u8, tmp_path[0..], dirname); tmp_path[dirname.len] = path.sep; @@ -142,7 +142,7 @@ pub const AtomicFile = struct { const InitError = File.OpenError; const RANDOM_BYTES = 12; - const TMP_PATH_LEN = base64.Base64Encoder.calcSize(RANDOM_BYTES); + const TMP_PATH_LEN = base64_encoder.calcSize(RANDOM_BYTES); /// Note that the `Dir.atomicFile` API may be more handy than this lower-level function. pub fn init( diff --git a/lib/std/testing.zig b/lib/std/testing.zig index 67831c92fb..eb2b6e87b3 100644 --- a/lib/std/testing.zig +++ b/lib/std/testing.zig @@ -298,7 +298,7 @@ pub const TmpDir = struct { sub_path: [sub_path_len]u8, const random_bytes_count = 12; - const sub_path_len = std.base64.Base64Encoder.calcSize(random_bytes_count); + const sub_path_len = std.fs.base64_encoder.calcSize(random_bytes_count); pub fn cleanup(self: *TmpDir) void { self.dir.close(); diff --git a/test/standalone/mix_o_files/base64.zig b/test/standalone/mix_o_files/base64.zig index 7ded9824a0..aa308c1b1e 100644 --- a/test/standalone/mix_o_files/base64.zig +++ b/test/standalone/mix_o_files/base64.zig @@ -3,8 +3,8 @@ const base64 = @import("std").base64; export fn decode_base_64(dest_ptr: [*]u8, dest_len: usize, source_ptr: [*]const u8, source_len: usize) usize { const src = source_ptr[0..source_len]; const dest = dest_ptr[0..dest_len]; - const base64_decoder = base64.standard_decoder_unsafe; - const decoded_size = base64_decoder.calcSize(src); + const base64_decoder = base64.standard.DecoderUnsafe; + const decoded_size = base64_decoder.calcSizeForSlice(src) catch unreachable; base64_decoder.decode(dest[0..decoded_size], src); return decoded_size; } -- cgit v1.2.3