aboutsummaryrefslogtreecommitdiff
path: root/lib/std/base64.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2019-09-26 01:54:45 -0400
committerGitHub <noreply@github.com>2019-09-26 01:54:45 -0400
commit68bb3945708c43109c48bda3664176307d45b62c (patch)
treeafb9731e10cef9d192560b52cd9ae2cf179775c4 /lib/std/base64.zig
parent6128bc728d1e1024a178c16c2149f5b1a167a013 (diff)
parent4637e8f9699af9c3c6cf4df50ef5bb67c7a318a4 (diff)
downloadzig-68bb3945708c43109c48bda3664176307d45b62c.tar.gz
zig-68bb3945708c43109c48bda3664176307d45b62c.zip
Merge pull request #3315 from ziglang/mv-std-lib
Move std/ to lib/std/
Diffstat (limited to 'lib/std/base64.zig')
-rw-r--r--lib/std/base64.zig458
1 files changed, 458 insertions, 0 deletions
diff --git a/lib/std/base64.zig b/lib/std/base64.zig
new file mode 100644
index 0000000000..39ad811eb6
--- /dev/null
+++ b/lib/std/base64.zig
@@ -0,0 +1,458 @@
+const std = @import("std.zig");
+const assert = std.debug.assert;
+const testing = std.testing;
+const mem = std.mem;
+
+pub const standard_alphabet_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+pub const standard_pad_char = '=';
+pub const standard_encoder = Base64Encoder.init(standard_alphabet_chars, standard_pad_char);
+
+pub const Base64Encoder = struct {
+ alphabet_chars: []const u8,
+ pad_char: u8,
+
+ /// a bunch of assertions, then simply pass the data right through.
+ pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Encoder {
+ assert(alphabet_chars.len == 64);
+ var char_in_alphabet = [_]bool{false} ** 256;
+ for (alphabet_chars) |c| {
+ assert(!char_in_alphabet[c]);
+ assert(c != pad_char);
+ char_in_alphabet[c] = true;
+ }
+
+ return Base64Encoder{
+ .alphabet_chars = alphabet_chars,
+ .pad_char = pad_char,
+ };
+ }
+
+ /// ceil(source_len * 4/3)
+ pub fn calcSize(source_len: usize) usize {
+ return @divTrunc(source_len + 2, 3) * 4;
+ }
+
+ /// dest.len must be what you get from ::calcSize.
+ pub fn encode(encoder: *const Base64Encoder, dest: []u8, source: []const u8) void {
+ assert(dest.len == Base64Encoder.calcSize(source.len));
+
+ var i: usize = 0;
+ var out_index: usize = 0;
+ while (i + 2 < source.len) : (i += 3) {
+ dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f];
+ out_index += 1;
+
+ dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)];
+ out_index += 1;
+
+ dest[out_index] = encoder.alphabet_chars[((source[i + 1] & 0xf) << 2) | ((source[i + 2] & 0xc0) >> 6)];
+ out_index += 1;
+
+ dest[out_index] = encoder.alphabet_chars[source[i + 2] & 0x3f];
+ out_index += 1;
+ }
+
+ if (i < source.len) {
+ dest[out_index] = encoder.alphabet_chars[(source[i] >> 2) & 0x3f];
+ out_index += 1;
+
+ if (i + 1 == source.len) {
+ dest[out_index] = encoder.alphabet_chars[(source[i] & 0x3) << 4];
+ out_index += 1;
+
+ dest[out_index] = encoder.pad_char;
+ out_index += 1;
+ } else {
+ dest[out_index] = encoder.alphabet_chars[((source[i] & 0x3) << 4) | ((source[i + 1] & 0xf0) >> 4)];
+ out_index += 1;
+
+ dest[out_index] = encoder.alphabet_chars[(source[i + 1] & 0xf) << 2];
+ out_index += 1;
+ }
+
+ dest[out_index] = encoder.pad_char;
+ out_index += 1;
+ }
+ }
+};
+
+pub const standard_decoder = Base64Decoder.init(standard_alphabet_chars, standard_pad_char);
+
+pub const Base64Decoder = struct {
+ /// e.g. 'A' => 0.
+ /// undefined for any value not in the 64 alphabet chars.
+ char_to_index: [256]u8,
+
+ /// true only for the 64 chars in the alphabet, not the pad char.
+ char_in_alphabet: [256]bool,
+ pad_char: u8,
+
+ pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64Decoder {
+ assert(alphabet_chars.len == 64);
+
+ var result = Base64Decoder{
+ .char_to_index = undefined,
+ .char_in_alphabet = [_]bool{false} ** 256,
+ .pad_char = pad_char,
+ };
+
+ for (alphabet_chars) |c, i| {
+ assert(!result.char_in_alphabet[c]);
+ assert(c != pad_char);
+
+ result.char_to_index[c] = @intCast(u8, i);
+ result.char_in_alphabet[c] = true;
+ }
+
+ return result;
+ }
+
+ /// If the encoded buffer is detected to be invalid, returns error.InvalidPadding.
+ pub fn calcSize(decoder: *const Base64Decoder, source: []const u8) !usize {
+ if (source.len % 4 != 0) return error.InvalidPadding;
+ return calcDecodedSizeExactUnsafe(source, decoder.pad_char);
+ }
+
+ /// dest.len must be what you get from ::calcSize.
+ /// invalid characters result in error.InvalidCharacter.
+ /// invalid padding results in error.InvalidPadding.
+ pub fn decode(decoder: *const Base64Decoder, dest: []u8, source: []const u8) !void {
+ assert(dest.len == (decoder.calcSize(source) catch unreachable));
+ assert(source.len % 4 == 0);
+
+ var src_cursor: usize = 0;
+ var dest_cursor: usize = 0;
+
+ while (src_cursor < source.len) : (src_cursor += 4) {
+ if (!decoder.char_in_alphabet[source[src_cursor + 0]]) return error.InvalidCharacter;
+ if (!decoder.char_in_alphabet[source[src_cursor + 1]]) return error.InvalidCharacter;
+ if (src_cursor < source.len - 4 or source[src_cursor + 3] != decoder.pad_char) {
+ // common case
+ if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter;
+ if (!decoder.char_in_alphabet[source[src_cursor + 3]]) return error.InvalidCharacter;
+ dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4;
+ dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2;
+ dest[dest_cursor + 2] = decoder.char_to_index[source[src_cursor + 2]] << 6 | decoder.char_to_index[source[src_cursor + 3]];
+ dest_cursor += 3;
+ } else if (source[src_cursor + 2] != decoder.pad_char) {
+ // one pad char
+ if (!decoder.char_in_alphabet[source[src_cursor + 2]]) return error.InvalidCharacter;
+ dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4;
+ dest[dest_cursor + 1] = decoder.char_to_index[source[src_cursor + 1]] << 4 | decoder.char_to_index[source[src_cursor + 2]] >> 2;
+ if (decoder.char_to_index[source[src_cursor + 2]] << 6 != 0) return error.InvalidPadding;
+ dest_cursor += 2;
+ } else {
+ // two pad chars
+ dest[dest_cursor + 0] = decoder.char_to_index[source[src_cursor + 0]] << 2 | decoder.char_to_index[source[src_cursor + 1]] >> 4;
+ if (decoder.char_to_index[source[src_cursor + 1]] << 4 != 0) return error.InvalidPadding;
+ dest_cursor += 1;
+ }
+ }
+
+ assert(src_cursor == source.len);
+ assert(dest_cursor == dest.len);
+ }
+};
+
+pub const Base64DecoderWithIgnore = struct {
+ decoder: Base64Decoder,
+ char_is_ignored: [256]bool,
+ pub fn init(alphabet_chars: []const u8, pad_char: u8, ignore_chars: []const u8) Base64DecoderWithIgnore {
+ var result = Base64DecoderWithIgnore{
+ .decoder = Base64Decoder.init(alphabet_chars, pad_char),
+ .char_is_ignored = [_]bool{false} ** 256,
+ };
+
+ for (ignore_chars) |c| {
+ assert(!result.decoder.char_in_alphabet[c]);
+ assert(!result.char_is_ignored[c]);
+ assert(result.decoder.pad_char != c);
+ result.char_is_ignored[c] = true;
+ }
+
+ return result;
+ }
+
+ /// If no characters end up being ignored or padding, this will be the exact decoded size.
+ pub fn calcSizeUpperBound(encoded_len: usize) usize {
+ return @divTrunc(encoded_len, 4) * 3;
+ }
+
+ /// Invalid characters that are not ignored result in error.InvalidCharacter.
+ /// Invalid padding results in error.InvalidPadding.
+ /// Decoding more data than can fit in dest results in error.OutputTooSmall. See also ::calcSizeUpperBound.
+ /// Returns the number of bytes written to dest.
+ pub fn decode(decoder_with_ignore: *const Base64DecoderWithIgnore, dest: []u8, source: []const u8) !usize {
+ const decoder = &decoder_with_ignore.decoder;
+
+ var src_cursor: usize = 0;
+ var dest_cursor: usize = 0;
+
+ while (true) {
+ // get the next 4 chars, if available
+ var next_4_chars: [4]u8 = undefined;
+ var available_chars: usize = 0;
+ var pad_char_count: usize = 0;
+ while (available_chars < 4 and src_cursor < source.len) {
+ var c = source[src_cursor];
+ src_cursor += 1;
+
+ if (decoder.char_in_alphabet[c]) {
+ // normal char
+ next_4_chars[available_chars] = c;
+ available_chars += 1;
+ } else if (decoder_with_ignore.char_is_ignored[c]) {
+ // we're told to skip this one
+ continue;
+ } else if (c == decoder.pad_char) {
+ // the padding has begun. count the pad chars.
+ pad_char_count += 1;
+ while (src_cursor < source.len) {
+ c = source[src_cursor];
+ src_cursor += 1;
+ if (c == decoder.pad_char) {
+ pad_char_count += 1;
+ if (pad_char_count > 2) return error.InvalidCharacter;
+ } else if (decoder_with_ignore.char_is_ignored[c]) {
+ // we can even ignore chars during the padding
+ continue;
+ } else
+ return error.InvalidCharacter;
+ }
+ break;
+ } else
+ return error.InvalidCharacter;
+ }
+
+ switch (available_chars) {
+ 4 => {
+ // common case
+ if (dest_cursor + 3 > dest.len) return error.OutputTooSmall;
+ assert(pad_char_count == 0);
+ dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4;
+ dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2;
+ dest[dest_cursor + 2] = decoder.char_to_index[next_4_chars[2]] << 6 | decoder.char_to_index[next_4_chars[3]];
+ dest_cursor += 3;
+ continue;
+ },
+ 3 => {
+ if (dest_cursor + 2 > dest.len) return error.OutputTooSmall;
+ if (pad_char_count != 1) return error.InvalidPadding;
+ dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4;
+ dest[dest_cursor + 1] = decoder.char_to_index[next_4_chars[1]] << 4 | decoder.char_to_index[next_4_chars[2]] >> 2;
+ if (decoder.char_to_index[next_4_chars[2]] << 6 != 0) return error.InvalidPadding;
+ dest_cursor += 2;
+ break;
+ },
+ 2 => {
+ if (dest_cursor + 1 > dest.len) return error.OutputTooSmall;
+ if (pad_char_count != 2) return error.InvalidPadding;
+ dest[dest_cursor + 0] = decoder.char_to_index[next_4_chars[0]] << 2 | decoder.char_to_index[next_4_chars[1]] >> 4;
+ if (decoder.char_to_index[next_4_chars[1]] << 4 != 0) return error.InvalidPadding;
+ dest_cursor += 1;
+ break;
+ },
+ 1 => {
+ return error.InvalidPadding;
+ },
+ 0 => {
+ if (pad_char_count != 0) return error.InvalidPadding;
+ break;
+ },
+ else => unreachable,
+ }
+ }
+
+ assert(src_cursor == source.len);
+
+ return dest_cursor;
+ }
+};
+
+pub const standard_decoder_unsafe = Base64DecoderUnsafe.init(standard_alphabet_chars, standard_pad_char);
+
+pub const Base64DecoderUnsafe = struct {
+ /// e.g. 'A' => 0.
+ /// undefined for any value not in the 64 alphabet chars.
+ char_to_index: [256]u8,
+ pad_char: u8,
+
+ pub fn init(alphabet_chars: []const u8, pad_char: u8) Base64DecoderUnsafe {
+ assert(alphabet_chars.len == 64);
+ var result = Base64DecoderUnsafe{
+ .char_to_index = undefined,
+ .pad_char = pad_char,
+ };
+ for (alphabet_chars) |c, i| {
+ assert(c != pad_char);
+ result.char_to_index[c] = @intCast(u8, i);
+ }
+ return result;
+ }
+
+ /// The source buffer must be valid.
+ pub fn calcSize(decoder: *const Base64DecoderUnsafe, source: []const u8) usize {
+ return calcDecodedSizeExactUnsafe(source, decoder.pad_char);
+ }
+
+ /// dest.len must be what you get from ::calcDecodedSizeExactUnsafe.
+ /// invalid characters or padding will result in undefined values.
+ pub fn decode(decoder: *const Base64DecoderUnsafe, dest: []u8, source: []const u8) void {
+ assert(dest.len == decoder.calcSize(source));
+
+ var src_index: usize = 0;
+ var dest_index: usize = 0;
+ var in_buf_len: usize = source.len;
+
+ while (in_buf_len > 0 and source[in_buf_len - 1] == decoder.pad_char) {
+ in_buf_len -= 1;
+ }
+
+ while (in_buf_len > 4) {
+ dest[dest_index] = decoder.char_to_index[source[src_index + 0]] << 2 | decoder.char_to_index[source[src_index + 1]] >> 4;
+ dest_index += 1;
+
+ dest[dest_index] = decoder.char_to_index[source[src_index + 1]] << 4 | decoder.char_to_index[source[src_index + 2]] >> 2;
+ dest_index += 1;
+
+ dest[dest_index] = decoder.char_to_index[source[src_index + 2]] << 6 | decoder.char_to_index[source[src_index + 3]];
+ dest_index += 1;
+
+ src_index += 4;
+ in_buf_len -= 4;
+ }
+
+ if (in_buf_len > 1) {
+ dest[dest_index] = decoder.char_to_index[source[src_index + 0]] << 2 | decoder.char_to_index[source[src_index + 1]] >> 4;
+ dest_index += 1;
+ }
+ if (in_buf_len > 2) {
+ dest[dest_index] = decoder.char_to_index[source[src_index + 1]] << 4 | decoder.char_to_index[source[src_index + 2]] >> 2;
+ dest_index += 1;
+ }
+ if (in_buf_len > 3) {
+ dest[dest_index] = decoder.char_to_index[source[src_index + 2]] << 6 | decoder.char_to_index[source[src_index + 3]];
+ dest_index += 1;
+ }
+ }
+};
+
+fn calcDecodedSizeExactUnsafe(source: []const u8, pad_char: u8) usize {
+ if (source.len == 0) return 0;
+ var result = @divExact(source.len, 4) * 3;
+ if (source[source.len - 1] == pad_char) {
+ result -= 1;
+ if (source[source.len - 2] == pad_char) {
+ result -= 1;
+ }
+ }
+ return result;
+}
+
+test "base64" {
+ @setEvalBranchQuota(8000);
+ testBase64() catch unreachable;
+ comptime (testBase64() catch unreachable);
+}
+
+fn testBase64() !void {
+ try testAllApis("", "");
+ try testAllApis("f", "Zg==");
+ try testAllApis("fo", "Zm8=");
+ try testAllApis("foo", "Zm9v");
+ try testAllApis("foob", "Zm9vYg==");
+ try testAllApis("fooba", "Zm9vYmE=");
+ try testAllApis("foobar", "Zm9vYmFy");
+
+ try testDecodeIgnoreSpace("", " ");
+ try testDecodeIgnoreSpace("f", "Z g= =");
+ try testDecodeIgnoreSpace("fo", " Zm8=");
+ try testDecodeIgnoreSpace("foo", "Zm9v ");
+ try testDecodeIgnoreSpace("foob", "Zm9vYg = = ");
+ try testDecodeIgnoreSpace("fooba", "Zm9v YmE=");
+ try testDecodeIgnoreSpace("foobar", " Z m 9 v Y m F y ");
+
+ // test getting some api errors
+ try testError("A", error.InvalidPadding);
+ try testError("AA", error.InvalidPadding);
+ try testError("AAA", error.InvalidPadding);
+ try testError("A..A", error.InvalidCharacter);
+ try testError("AA=A", error.InvalidCharacter);
+ try testError("AA/=", error.InvalidPadding);
+ try testError("A/==", error.InvalidPadding);
+ try testError("A===", error.InvalidCharacter);
+ try testError("====", error.InvalidCharacter);
+
+ try testOutputTooSmallError("AA==");
+ try testOutputTooSmallError("AAA=");
+ try testOutputTooSmallError("AAAA");
+ try testOutputTooSmallError("AAAAAA==");
+}
+
+fn testAllApis(expected_decoded: []const u8, expected_encoded: []const u8) !void {
+ // Base64Encoder
+ {
+ var buffer: [0x100]u8 = undefined;
+ var encoded = buffer[0..Base64Encoder.calcSize(expected_decoded.len)];
+ standard_encoder.encode(encoded, expected_decoded);
+ testing.expectEqualSlices(u8, expected_encoded, encoded);
+ }
+
+ // Base64Decoder
+ {
+ var buffer: [0x100]u8 = undefined;
+ var decoded = buffer[0..try standard_decoder.calcSize(expected_encoded)];
+ try standard_decoder.decode(decoded, expected_encoded);
+ testing.expectEqualSlices(u8, expected_decoded, decoded);
+ }
+
+ // Base64DecoderWithIgnore
+ {
+ const standard_decoder_ignore_nothing = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, "");
+ var buffer: [0x100]u8 = undefined;
+ var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(expected_encoded.len)];
+ var written = try standard_decoder_ignore_nothing.decode(decoded, expected_encoded);
+ testing.expect(written <= decoded.len);
+ testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]);
+ }
+
+ // Base64DecoderUnsafe
+ {
+ var buffer: [0x100]u8 = undefined;
+ var decoded = buffer[0..standard_decoder_unsafe.calcSize(expected_encoded)];
+ standard_decoder_unsafe.decode(decoded, expected_encoded);
+ testing.expectEqualSlices(u8, expected_decoded, decoded);
+ }
+}
+
+fn testDecodeIgnoreSpace(expected_decoded: []const u8, encoded: []const u8) !void {
+ const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " ");
+ var buffer: [0x100]u8 = undefined;
+ var decoded = buffer[0..Base64DecoderWithIgnore.calcSizeUpperBound(encoded.len)];
+ var written = try standard_decoder_ignore_space.decode(decoded, encoded);
+ testing.expectEqualSlices(u8, expected_decoded, decoded[0..written]);
+}
+
+fn testError(encoded: []const u8, expected_err: anyerror) !void {
+ const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " ");
+ var buffer: [0x100]u8 = undefined;
+ if (standard_decoder.calcSize(encoded)) |decoded_size| {
+ var decoded = buffer[0..decoded_size];
+ if (standard_decoder.decode(decoded, encoded)) |_| {
+ return error.ExpectedError;
+ } else |err| if (err != expected_err) return err;
+ } else |err| if (err != expected_err) return err;
+
+ if (standard_decoder_ignore_space.decode(buffer[0..], encoded)) |_| {
+ return error.ExpectedError;
+ } else |err| if (err != expected_err) return err;
+}
+
+fn testOutputTooSmallError(encoded: []const u8) !void {
+ const standard_decoder_ignore_space = Base64DecoderWithIgnore.init(standard_alphabet_chars, standard_pad_char, " ");
+ var buffer: [0x100]u8 = undefined;
+ var decoded = buffer[0 .. calcDecodedSizeExactUnsafe(encoded, standard_pad_char) - 1];
+ if (standard_decoder_ignore_space.decode(decoded, encoded)) |_| {
+ return error.ExpectedError;
+ } else |err| if (err != error.OutputTooSmall) return err;
+}