aboutsummaryrefslogtreecommitdiff
path: root/lib/std/tz.zig
diff options
context:
space:
mode:
authorJens Goldberg <jens.goldberg@gmail.com>2021-12-31 17:17:49 +0000
committerJens Goldberg <jens.goldberg@gmail.com>2021-12-31 17:17:49 +0000
commit9a564356661d31bc7fad5b670bad8ebeecc5dad4 (patch)
tree31348593f1a1365e5a41316c62dfffb5d831c8f6 /lib/std/tz.zig
parente4672c95f116eefd0a87a1f857062017e6a7fd97 (diff)
downloadzig-9a564356661d31bc7fad5b670bad8ebeecc5dad4.tar.gz
zig-9a564356661d31bc7fad5b670bad8ebeecc5dad4.zip
tz parsing reader interface, test thicc files, and exclude tzif
Diffstat (limited to 'lib/std/tz.zig')
-rw-r--r--lib/std/tz.zig208
1 files changed, 112 insertions, 96 deletions
diff --git a/lib/std/tz.zig b/lib/std/tz.zig
index 2680a8d48c..528150d1a2 100644
--- a/lib/std/tz.zig
+++ b/lib/std/tz.zig
@@ -40,160 +40,163 @@ pub const Tz = struct {
leapseconds: []const Leapsecond,
footer: []const u8,
- pub fn parse(allocator: std.mem.Allocator, data: []const u8) !Tz {
- const header_size = 4 + 1 + 15 + 6 * 4;
- if (data.len < header_size) return error.BadSize;
-
- const magic_l = data[0..4];
- const version_l = data[4];
- if (!std.mem.eql(u8, magic_l, "TZif")) return error.BadHeader;
- if (version_l != '2' and version_l != '3') return error.BadVersion;
-
- // Parse the legacy header and skip the entire thing
- const isutcnt_l = std.mem.readIntBig(u32, data[20..24]);
- const isstdcnt_l = std.mem.readIntBig(u32, data[24..28]);
- const leapcnt_l = std.mem.readIntBig(u32, data[28..32]);
- const timecnt_l = std.mem.readIntBig(u32, data[32..36]);
- const typecnt_l = std.mem.readIntBig(u32, data[36..40]);
- const charcnt_l = std.mem.readIntBig(u32, data[40..44]);
- const data_block_size_legacy = timecnt_l * 5 + typecnt_l * 6 + charcnt_l + leapcnt_l * 8 + isstdcnt_l + isutcnt_l;
- if (data.len < header_size + data_block_size_legacy) return error.BadSize;
-
- const data2 = data[header_size + data_block_size_legacy ..];
- if (data2.len < header_size) return error.BadSize;
-
- const magic = data2[0..4];
- const version = data2[4];
- if (!std.mem.eql(u8, magic, "TZif")) return error.BadHeader;
- if (version != '2' and version != '3') return error.BadVersion;
-
- const isutcnt = std.mem.readIntBig(u32, data2[20..24]);
- const isstdcnt = std.mem.readIntBig(u32, data2[24..28]);
- const leapcnt = std.mem.readIntBig(u32, data2[28..32]);
- const timecnt = std.mem.readIntBig(u32, data2[32..36]);
- const typecnt = std.mem.readIntBig(u32, data2[36..40]);
- const charcnt = std.mem.readIntBig(u32, data2[40..44]);
-
- if (isstdcnt != 0 and isstdcnt != typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt"
- if (isutcnt != 0 and isutcnt != typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt"
- if (typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero
- if (charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero
-
- const data_block_size = timecnt * 9 + typecnt * 6 + charcnt + leapcnt * 12 + isstdcnt + isutcnt;
- if (data2.len < header_size + data_block_size) return error.BadSize;
-
- var leapseconds = try allocator.alloc(Leapsecond, leapcnt);
+ pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz {
+ _ = allocator;
+ const Header = extern struct {
+ magic: [4]u8,
+ version: u8,
+ reserved: [15]u8,
+ };
+
+ const Counts = extern struct {
+ isutcnt: u32,
+ isstdcnt: u32,
+ leapcnt: u32,
+ timecnt: u32,
+ typecnt: u32,
+ charcnt: u32,
+ };
+
+ // Parse and skip the legacy header and data
+ {
+ const header = try reader.readStruct(Header);
+ if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader;
+ if (header.version == 0) return error.UnsupportedLegacyFormat;
+ if (header.version != '2' and header.version != '3') return error.BadVersion;
+
+ var counts = try reader.readStruct(Counts);
+ if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) {
+ std.mem.bswapAllFields(Counts, &counts);
+ }
+
+ const skipv = counts.timecnt * 5 + counts.typecnt * 6 + counts.charcnt + counts.leapcnt * 8 + counts.isstdcnt + counts.isutcnt;
+ try reader.skipBytes(skipv, .{});
+ }
+
+ const header = try reader.readStruct(Header);
+ if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader;
+ if (header.version != '2' and header.version != '3') return error.BadVersion;
+
+ var counts = try reader.readStruct(Counts);
+ if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) {
+ std.mem.bswapAllFields(Counts, &counts);
+ }
+
+ if (counts.isstdcnt != 0 and counts.isstdcnt != counts.typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt"
+ if (counts.isutcnt != 0 and counts.isutcnt != counts.typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt"
+ if (counts.typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero
+ if (counts.charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero
+ if (counts.charcnt > 256 + 6) return error.Malformed; // Not explicitly banned by rfc8536 but nonsensical
+
+ var leapseconds = try allocator.alloc(Leapsecond, counts.leapcnt);
errdefer allocator.free(leapseconds);
- var transitions = try allocator.alloc(Transition, timecnt);
+ var transitions = try allocator.alloc(Transition, counts.timecnt);
errdefer allocator.free(transitions);
- var timetypes = try allocator.alloc(Timetype, typecnt);
+ var timetypes = try allocator.alloc(Timetype, counts.typecnt);
errdefer allocator.free(timetypes);
- var p: usize = header_size;
-
- // First, parse timezone designators ahead of time so that we can reject malformed files early
- const designators = data2[header_size + timecnt * 9 + typecnt * 6 .. header_size + timecnt * 9 + typecnt * 6 + charcnt];
- if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet
-
// Parse transition types
var i: usize = 0;
- while (i < timecnt) : (i += 1) {
- transitions[i].ts = std.mem.readIntSliceBig(i64, data2[p .. p + 8]);
- p += 8;
+ while (i < counts.timecnt) : (i += 1) {
+ transitions[i].ts = try reader.readIntBig(i64);
}
i = 0;
- while (i < timecnt) : (i += 1) {
- const tt = data2[p];
+ while (i < counts.timecnt) : (i += 1) {
+ const tt = try reader.readByte();
if (tt >= timetypes.len) return error.Malformed; // rfc8536: Each type index MUST be in the range [0, "typecnt" - 1]
transitions[i].timetype = &timetypes[tt];
- p += 1;
}
// Parse time types
i = 0;
- while (i < typecnt) : (i += 1) {
- const offset = std.mem.readIntSliceBig(i32, data2[p .. p + 4]);
+ while (i < counts.typecnt) : (i += 1) {
+ const offset = try reader.readIntBig(i32);
if (offset < -2147483648) return error.Malformed; // rfc8536: utoff [...] MUST NOT be -2**31
- const dst = data2[p + 4];
+ const dst = try reader.readByte();
if (dst != 0 and dst != 1) return error.Malformed; // rfc8536: (is)dst [...] The value MUST be 0 or 1.
- const idx = data2[p + 5];
- if (idx > designators.len - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1]
-
- const name = std.mem.sliceTo(designators[idx..], 0);
-
- // We are mandating the "SHOULD" 6-character limit so we can pack the struct better, and to conform to POSIX.
- if (name.len > 6) return error.Malformed; // rfc8536: Time zone designations SHOULD consist of at least three (3) and no more than six (6) ASCII characters.
-
+ const idx = try reader.readByte();
+ if (idx > counts.charcnt - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1]
timetypes[i] = .{
.offset = offset,
.flags = dst,
.name_data = undefined,
};
- std.mem.copy(u8, timetypes[i].name_data[0..], name);
- timetypes[i].name_data[name.len] = 0;
-
- p += 6;
+ // Temporarily cache idx in name_data to be processed after we've read the designator names below
+ timetypes[i].name_data[0] = idx;
}
- // Skip the designators we got earlier
- p += charcnt;
+ var designators_data: [256 + 6]u8 = undefined;
+ try reader.readNoEof(designators_data[0..counts.charcnt]);
+ const designators = designators_data[0..counts.charcnt];
+ if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet
+
+ // Iterate through the timetypes again, setting the designator names
+ for (timetypes) |*tt| {
+ const name = std.mem.sliceTo(designators[tt.name_data[0]..], 0);
+ // We are mandating the "SHOULD" 6-character limit so we can pack the struct better, and to conform to POSIX.
+ if (name.len > 6) return error.Malformed; // rfc8536: Time zone designations SHOULD consist of at least three (3) and no more than six (6) ASCII characters.
+ std.mem.copy(u8, tt.name_data[0..], name);
+ tt.name_data[name.len] = 0;
+ }
// Parse leap seconds
i = 0;
- while (i < leapcnt) : (i += 1) {
- const occur = std.mem.readIntSliceBig(i64, data2[p .. p + 8]);
+ while (i < counts.leapcnt) : (i += 1) {
+ const occur = try reader.readIntBig(i64);
if (occur < 0) return error.Malformed; // rfc8536: occur [...] MUST be nonnegative
if (i > 0 and leapseconds[i - 1].occurrence + 2419199 > occur) return error.Malformed; // rfc8536: occur [...] each later value MUST be at least 2419199 greater than the previous value
if (occur > std.math.maxInt(i48)) return error.Malformed; // Unreasonably far into the future
- const corr = std.mem.readIntSliceBig(i32, data2[p + 8 .. p + 12]);
+ const corr = try reader.readIntBig(i32);
if (i == 0 and corr != -1 and corr != 1) return error.Malformed; // rfc8536: The correction value in the first leap-second record, if present, MUST be either one (1) or minus one (-1)
- if (i > 0 and leapseconds[i - 1].correction != corr + 1 and leapseconds[i - 1].correction != corr - 1) return error.Malformed; // rfc8536: The correction values in adjacent leap- second records MUST differ by exactly one (1)
+ if (i > 0 and leapseconds[i - 1].correction != corr + 1 and leapseconds[i - 1].correction != corr - 1) return error.Malformed; // rfc8536: The correction values in adjacent leap-second records MUST differ by exactly one (1)
if (corr > std.math.maxInt(i16)) return error.Malformed; // Unreasonably large correction
leapseconds[i] = .{
.occurrence = @intCast(i48, occur),
.correction = @intCast(i16, corr),
};
- p += 12;
}
// Parse standard/wall indicators
i = 0;
- while (i < isstdcnt) : (i += 1) {
- const stdtime = data2[p];
+ while (i < counts.isstdcnt) : (i += 1) {
+ const stdtime = try reader.readByte();
if (stdtime == 1) {
timetypes[i].flags |= 0x02;
}
- p += 1;
}
// Parse UT/local indicators
i = 0;
- while (i < isutcnt) : (i += 1) {
- const ut = data2[p];
+ while (i < counts.isutcnt) : (i += 1) {
+ const ut = try reader.readByte();
if (ut == 1) {
timetypes[i].flags |= 0x04;
if (!timetypes[i].standardTimeIndicator()) return error.Malformed; // rfc8536: standard/wall value MUST be one (1) if the UT/local value is one (1)
}
- p += 1;
}
+ if ((try reader.readByte()) != '\n') return error.Malformed; // An rfc8536 footer must start with a newline
+
// Footer
- if (data2[p..].len < 2) return error.Malformed; // rfc8536 requires at least 2 newlines
- if (data2[p] != '\n') return error.Malformed; // Not a rfc8536 footer
- const footer_end = std.mem.indexOfScalar(u8, data2[p + 1 ..], '\n') orelse return error.Malformed; // No 2nd rfc8536 newline
- const footer = try allocator.dupe(u8, data2[p + 1 .. p + 1 + footer_end]);
- errdefer allocator.free(footer);
+ var footerdata_buf: [128]u8 = undefined;
+ const footer = reader.readUntilDelimiter(&footerdata_buf, '\n') catch |err| switch (err) {
+ error.StreamTooLong => return error.OverlargeFooter, // Read more than 128 bytes, much larger than any reasonable POSIX TZ string
+ else => return err,
+ };
+
+ const footer_dup = try allocator.dupe(u8, footer);
+ errdefer allocator.free(footer_dup);
return Tz{
.allocator = allocator,
.transitions = transitions,
.timetypes = timetypes,
.leapseconds = leapseconds,
- .footer = footer,
+ .footer = footer_dup,
};
}
@@ -205,14 +208,27 @@ pub const Tz = struct {
}
};
-test "parse" {
- // Asia/Tokyo is good for embedding, as Japan only had DST for a short while during the US occupation
+test "slim" {
const data = @embedFile("tz/asia_tokyo.tzif");
- var tz = try Tz.parse(std.testing.allocator, data);
+ var in_stream = std.io.fixedBufferStream(data);
+
+ var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader());
defer tz.deinit();
try std.testing.expectEqual(tz.transitions.len, 9);
try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "JDT"));
- try std.testing.expectEqual(tz.transitions[5].ts, -620298000); // 1950-05-06 15:00:00 (UTC)
- try std.testing.expectEqual(tz.leapseconds[13].occurrence, 567993613); // 1988-01-01 00:00:13 (IAT)
+ try std.testing.expectEqual(tz.transitions[5].ts, -620298000); // 1950-05-06 15:00:00 UTC
+ try std.testing.expectEqual(tz.leapseconds[13].occurrence, 567993613); // 1988-01-01 00:00:00 UTC (+23s in TAI, and +13 in the data since it doesn't store the initial 10 second offset)
+}
+
+test "fat" {
+ const data = @embedFile("tz/antarctica_davis.tzif");
+ var in_stream = std.io.fixedBufferStream(data);
+
+ var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader());
+ defer tz.deinit();
+
+ try std.testing.expectEqual(tz.transitions.len, 8);
+ try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "+05"));
+ try std.testing.expectEqual(tz.transitions[4].ts, 1268251224); // 2010-03-10 20:00:00 UTC
}