diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2019-09-26 01:54:45 -0400 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-09-26 01:54:45 -0400 |
| commit | 68bb3945708c43109c48bda3664176307d45b62c (patch) | |
| tree | afb9731e10cef9d192560b52cd9ae2cf179775c4 /lib/std/base64.zig | |
| parent | 6128bc728d1e1024a178c16c2149f5b1a167a013 (diff) | |
| parent | 4637e8f9699af9c3c6cf4df50ef5bb67c7a318a4 (diff) | |
| download | zig-68bb3945708c43109c48bda3664176307d45b62c.tar.gz zig-68bb3945708c43109c48bda3664176307d45b62c.zip | |
Merge pull request #3315 from ziglang/mv-std-lib
Move std/ to lib/std/
Diffstat (limited to 'lib/std/base64.zig')
| -rw-r--r-- | lib/std/base64.zig | 458 |
1 files changed, 458 insertions, 0 deletions
diff --git a/lib/std/base64.zig b/lib/std/base64.zig new file mode 100644 index 0000000000..39ad811eb6 --- /dev/null +++ b/lib/std/base64.zig @@ -0,0 +1,458 @@ +const std = @import("std.zig"); +const assert = std.debug.assert; +const testing = std.testing; +const mem = std.mem; + +pub const standard_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +pub const standard_pad_char = '='; +pub const standard_encoder = Base64Encoder.init(standard_alphabet_chars, standard_pad_char); + +pub const Base64Encoder = struct { + alphabet_chars: []const u8, + pad_char: u8, + + /// a bunch of assertions, then simply pass the data right through. + pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Encoder { + assert(alphabet_chars.len == 64); + var char_in_alphabet = [_]bool{false} ** 256; + for (alphabet_chars) |c| { + assert(!char_in_alphabet[c]); + assert(c != pad_char); + char_in_alphabet[c] = true; + } + + return Base64Encoder{ + .alphabet_chars = alphabet_chars, + .pad_char = pad_char, + }; + } + + /// ceil(source_len * 4/3) + pub fn calcSize(source_len: usize) usize { + return @divTrunc(source_len + 2, 3) * 4; + } + + /// dest.len must be what you get from ::calcSize. + pub fn encode(encoder: *const Base64Encoder, dest: []u8, source: []const u8) void { + assert(dest.len == Base64Encoder.calcSize(source.len)); + + var i: usize = 0; + var out_index: usize = 0; + while (i + 2 < source.len) : (i += 3) { + dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f]; + out_index += 1; + + dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)]; + out_index += 1; + + dest[out_index] = encoder.alphabet_chars[((source[i + 1] & 0xf) << 2) | ((source[i + 2] & 0xc0) >> 6)]; + out_index += 1; + + dest[out_index] = encoder.alphabet_chars[source[i + 2] & 0x3f]; + out_index += 1; + } + + if (i < source.len) { + dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f]; + out_index += 1; + + if (i + 1 == source.len) { + dest[out_index] = encoder.alphabet_chars[(source[i] & 0x3) << 4]; + out_index += 1; + + dest[out_index] = encoder.pad_char; + out_index += 1; + } else { + dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)]; + out_index += 1; + + dest[out_index] = encoder.alphabet_chars[(source[i + 1] & 0xf) << 2]; + out_index += 1; + } + + dest[out_index] = encoder.pad_char; + out_index += 1; + } + } +}; + +pub const standard_decoder = Base64Decoder.init(standard_alphabet_chars, standard_pad_char); + +pub const Base64Decoder = struct { + /// e.g. 'A' => 0. + /// undefined for any value not in the 64 alphabet chars. + char_to_index: [256]u8, + + /// true only for the 64 chars in the alphabet, not the pad char. + char_in_alphabet: [256]bool, + pad_char: u8, + + pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Decoder { + assert(alphabet_chars.len == 64); + + var result = Base64Decoder{ + .char_to_index = undefined, + .char_in_alphabet = [_]bool{false} ** 256, + .pad_char = pad_char, + }; + + for (alphabet_chars) |c, i| { + assert(!result.char_in_alphabet[c]); + assert(c != pad_char); + + result.char_to_index[c] = @intCast(u8, i); + result.char_in_alphabet[c] = true; + } + + return result; + } + + /// If the encoded buffer is detected to be invalid, returns error.InvalidPadding. + pub fn calcSize(decoder: *const Base64Decoder, source: []const u8) !usize { + if (source.len % 4 != 0) return error.InvalidPadding; + return calcDecodedSizeExactUnsafe(source, decoder.pad_char); + } + + /// dest.len must be what you get from ::calcSize. + /// invalid characters result in error.InvalidCharacter. + /// invalid padding results in error.InvalidPadding. + pub fn decode(decoder: *const Base64Decoder, dest: []u8, source: []const u8) !void { + assert(dest.len == (decoder.calcSize(source) catch unreachable)); + assert(source.len % 4 == 0); + + var src_cursor: usize = 0; + var dest_cursor: usize = 0; + + while (src_cursor < source.len) : (src_cursor += 4) { + if (!decoder.char_in_alphabet[source[src_cursor + 0]]) return error.InvalidCharacter; + if (!decoder.char_in_alphabet[source[src_cursor + 1]]) return error.InvalidCharacter; + if (src_cursor < source.len - 4 or source[src_cursor + 3] != decoder.pad_char) { + // common case + if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter; + if (!decoder.char_in_alphabet[source[src_cursor + 3]]) return error.InvalidCharacter; + dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4; + dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2; + dest[dest_cursor + 2] = decoder.char_to_index[source[src_cursor + 2]] << 6 | decoder.char_to_index[source[src_cursor + 3]]; + dest_cursor += 3; + } else if (source[src_cursor + 2] != decoder.pad_char) { + // one pad char + if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter; + dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4; + dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2; + if (decoder.char_to_index[source[src_cursor + 2]] << 6 != 0) return error.InvalidPadding; + dest_cursor += 2; + } else { + // two pad chars + dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4; + if (decoder.char_to_index[source[src_cursor + 1]] << 4 != 0) return error.InvalidPadding; + dest_cursor += 1; + } + } + + assert(src_cursor == source.len); + assert(dest_cursor == dest.len); + } +}; + +pub const Base64DecoderWithIgnore = struct { + decoder: Base64Decoder, + char_is_ignored: [256]bool, + pub fn init(alphabet_chars: []const u8, pad_char: u8, ignore_chars: []const u8) Base64DecoderWithIgnore { + var result = Base64DecoderWithIgnore{ + .decoder = Base64Decoder.init(alphabet_chars, pad_char), + .char_is_ignored = [_]bool{false} ** 256, + }; + + for (ignore_chars) |c| { + assert(!result.decoder.char_in_alphabet[c]); + assert(!result.char_is_ignored[c]); + assert(result.decoder.pad_char != c); + result.char_is_ignored[c] = true; + } + + return result; + } + + /// If no characters end up being ignored or padding, this will be the exact decoded size. + pub fn calcSizeUpperBound(encoded_len: usize) usize { + return @divTrunc(encoded_len, 4) * 3; + } + + /// Invalid characters that are not ignored result in error.InvalidCharacter. + /// Invalid padding results in error.InvalidPadding. + /// Decoding more data than can fit in dest results in error.OutputTooSmall. See also ::calcSizeUpperBound. + /// Returns the number of bytes written to dest. + pub fn decode(decoder_with_ignore: *const Base64DecoderWithIgnore, dest: []u8, source: []const u8) !usize { + const decoder = &decoder_with_ignore.decoder; + + var src_cursor: usize = 0; + var dest_cursor: usize = 0; + + while (true) { + // get the next 4 chars, if available + var next_4_chars: [4]u8 = undefined; + var available_chars: usize = 0; + var pad_char_count: usize = 0; + while (available_chars < 4 and src_cursor < source.len) { + var c = source[src_cursor]; + src_cursor += 1; + + if (decoder.char_in_alphabet[c]) { + // normal char + next_4_chars[available_chars] = c; + available_chars += 1; + } else if (decoder_with_ignore.char_is_ignored[c]) { + // we're told to skip this one + continue; + } else if (c == decoder.pad_char) { + // the padding has begun. count the pad chars. + pad_char_count += 1; + while (src_cursor < source.len) { + c = source[src_cursor]; + src_cursor += 1; + if (c == decoder.pad_char) { + pad_char_count += 1; + if (pad_char_count > 2) return error.InvalidCharacter; + } else if (decoder_with_ignore.char_is_ignored[c]) { + // we can even ignore chars during the padding + continue; + } else + return error.InvalidCharacter; + } + break; + } else + return error.InvalidCharacter; + } + + switch (available_chars) { + 4 => { + // common case + if (dest_cursor + 3 > dest.len) return error.OutputTooSmall; + assert(pad_char_count == 0); + dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4; + dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2; + dest[dest_cursor + 2] = decoder.char_to_index[next_4_chars[2]] << 6 | decoder.char_to_index[next_4_chars[3]]; + dest_cursor += 3; + continue; + }, + 3 => { + if (dest_cursor + 2 > dest.len) return error.OutputTooSmall; + if (pad_char_count != 1) return error.InvalidPadding; + dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4; + dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2; + if (decoder.char_to_index[next_4_chars[2]] << 6 != 0) return error.InvalidPadding; + dest_cursor += 2; + break; + }, + 2 => { + if (dest_cursor + 1 > dest.len) return error.OutputTooSmall; + if (pad_char_count != 2) return error.InvalidPadding; + dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4; + if (decoder.char_to_index[next_4_chars[1]] << 4 != 0) return error.InvalidPadding; + dest_cursor += 1; + break; + }, + 1 => { + return error.InvalidPadding; + }, + 0 => { + if (pad_char_count != 0) return error.InvalidPadding; + break; + }, + else => unreachable, + } + } + + assert(src_cursor == source.len); + + return dest_cursor; + } +}; + +pub const standard_decoder_unsafe = Base64DecoderUnsafe.init(standard_alphabet_chars, standard_pad_char); + +pub const Base64DecoderUnsafe = struct { + /// e.g. 'A' => 0. + /// undefined for any value not in the 64 alphabet chars. + char_to_index: [256]u8, + pad_char: u8, + + pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64DecoderUnsafe { + assert(alphabet_chars.len == 64); + var result = Base64DecoderUnsafe{ + .char_to_index = undefined, + .pad_char = pad_char, + }; + for (alphabet_chars) |c, i| { + assert(c != pad_char); + result.char_to_index[c] = @intCast(u8, i); + } + return result; + } + + /// The source buffer must be valid. + pub fn calcSize(decoder: *const Base64DecoderUnsafe, source: []const u8) usize { + return calcDecodedSizeExactUnsafe(source, decoder.pad_char); + } + + /// dest.len must be what you get from ::calcDecodedSizeExactUnsafe. + /// invalid characters or padding will result in undefined values. + pub fn decode(decoder: *const Base64DecoderUnsafe, dest: []u8, source: []const u8) void { + assert(dest.len == decoder.calcSize(source)); + + var src_index: usize = 0; + var dest_index: usize = 0; + var in_buf_len: usize = source.len; + + while (in_buf_len > 0 and source[in_buf_len - 1] == decoder.pad_char) { + in_buf_len -= 1; + } + + while (in_buf_len > 4) { + dest[dest_index] = decoder.char_to_index[source[src_index + 0]] << 2 | decoder.char_to_index[source[src_index + 1]] >> 4; + dest_index += 1; + + dest[dest_index] = decoder.char_to_index[source[src_index + 1]] << 4 | decoder.char_to_index[source[src_index + 2]] >> 2; + dest_index += 1; + + dest[dest_index] = decoder.char_to_index[source[src_index + 2]] << 6 | decoder.char_to_index[source[src_index + 3]]; + dest_index += 1; + + src_index += 4; + in_buf_len -= 4; + } + + if (in_buf_len > 1) { + dest[dest_index] = decoder.char_to_index[source[src_index + 0]] << 2 | decoder.char_to_index[source[src_index + 1]] >> 4; + dest_index += 1; + } + if (in_buf_len > 2) { + dest[dest_index] = decoder.char_to_index[source[src_index + 1]] << 4 | decoder.char_to_index[source[src_index + 2]] >> 2; + dest_index += 1; + } + if (in_buf_len > 3) { + dest[dest_index] = decoder.char_to_index[source[src_index + 2]] << 6 | decoder.char_to_index[source[src_index + 3]]; + dest_index += 1; + } + } +}; + +fn calcDecodedSizeExactUnsafe(source: []const u8, pad_char: u8) usize { + if (source.len == 0) return 0; + var result = @divExact(source.len, 4) * 3; + if (source[source.len - 1] == pad_char) { + result -= 1; + if (source[source.len - 2] == pad_char) { + result -= 1; + } + } + return result; +} + +test "base64" { + @setEvalBranchQuota(8000); + testBase64() catch unreachable; + comptime (testBase64() catch unreachable); +} + +fn testBase64() !void { + try testAllApis("", ""); + try testAllApis("f", "Zg=="); + try testAllApis("fo", "Zm8="); + try testAllApis("foo", "Zm9v"); + try testAllApis("foob", "Zm9vYg=="); + try testAllApis("fooba", "Zm9vYmE="); + try testAllApis("foobar", "Zm9vYmFy"); + + try testDecodeIgnoreSpace("", " "); + try testDecodeIgnoreSpace("f", "Z g= ="); + try testDecodeIgnoreSpace("fo", " Zm8="); + try testDecodeIgnoreSpace("foo", "Zm9v "); + try testDecodeIgnoreSpace("foob", "Zm9vYg = = "); + try testDecodeIgnoreSpace("fooba", "Zm9v YmE="); + try testDecodeIgnoreSpace("foobar", " Z m 9 v Y m F y "); + + // test getting some api errors + try testError("A", error.InvalidPadding); + try testError("AA", error.InvalidPadding); + try testError("AAA", error.InvalidPadding); + try testError("A..A", error.InvalidCharacter); + try testError("AA=A", error.InvalidCharacter); + try testError("AA/=", error.InvalidPadding); + try testError("A/==", error.InvalidPadding); + try testError("A===", error.InvalidCharacter); + try testError("====", error.InvalidCharacter); + + try testOutputTooSmallError("AA=="); + try testOutputTooSmallError("AAA="); + try testOutputTooSmallError("AAAA"); + try testOutputTooSmallError("AAAAAA=="); +} + +fn testAllApis(expected_decoded: []const u8, expected_encoded: []const u8) !void { + // Base64Encoder + { + var buffer: [0x100]u8 = undefined; + var encoded = buffer[0..Base64Encoder.calcSize(expected_decoded.len)]; + standard_encoder.encode(encoded, expected_decoded); + testing.expectEqualSlices(u8, expected_encoded, encoded); + } + + // Base64Decoder + { + var buffer: [0x100]u8 = undefined; + var decoded = buffer[0..try standard_decoder.calcSize(expected_encoded)]; + try standard_decoder.decode(decoded, expected_encoded); + testing.expectEqualSlices(u8, expected_decoded, decoded); + } + + // Base64DecoderWithIgnore + { + const standard_decoder_ignore_nothing = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, ""); + var buffer: [0x100]u8 = undefined; + var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(expected_encoded.len)]; + var written = try standard_decoder_ignore_nothing.decode(decoded, expected_encoded); + testing.expect(written <= decoded.len); + testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]); + } + + // Base64DecoderUnsafe + { + var buffer: [0x100]u8 = undefined; + var decoded = buffer[0..standard_decoder_unsafe.calcSize(expected_encoded)]; + standard_decoder_unsafe.decode(decoded, expected_encoded); + testing.expectEqualSlices(u8, expected_decoded, decoded); + } +} + +fn testDecodeIgnoreSpace(expected_decoded: []const u8, encoded: []const u8) !void { + const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " "); + var buffer: [0x100]u8 = undefined; + var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(encoded.len)]; + var written = try standard_decoder_ignore_space.decode(decoded, encoded); + testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]); +} + +fn testError(encoded: []const u8, expected_err: anyerror) !void { + const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " "); + var buffer: [0x100]u8 = undefined; + if (standard_decoder.calcSize(encoded)) |decoded_size| { + var decoded = buffer[0..decoded_size]; + if (standard_decoder.decode(decoded, encoded)) |_| { + return error.ExpectedError; + } else |err| if (err != expected_err) return err; + } else |err| if (err != expected_err) return err; + + if (standard_decoder_ignore_space.decode(buffer[0..], encoded)) |_| { + return error.ExpectedError; + } else |err| if (err != expected_err) return err; +} + +fn testOutputTooSmallError(encoded: []const u8) !void { + const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " "); + var buffer: [0x100]u8 = undefined; + var decoded = buffer[0 .. calcDecodedSizeExactUnsafe(encoded, standard_pad_char) - 1]; + if (standard_decoder_ignore_space.decode(decoded, encoded)) |_| { + return error.ExpectedError; + } else |err| if (err != error.OutputTooSmall) return err; +} |
