diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2022-01-02 15:15:04 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-01-02 15:15:04 -0500 |
| commit | 36b069910ef71278ad3aaf059f12144d8b86e856 (patch) | |
| tree | 72b5e2a540d94035dd70a0ddc10d442502c330a3 /lib/std | |
| parent | ae8d6fd7b78a1d41651fc1ab68a973a068f5bd6f (diff) | |
| parent | a54788ba7af64e59d3fbaf8951e3164f1347a2f4 (diff) | |
| download | zig-36b069910ef71278ad3aaf059f12144d8b86e856.tar.gz zig-36b069910ef71278ad3aaf059f12144d8b86e856.zip | |
Merge pull request #10456 from Aransentin/master
Support parsing tz timezone data
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/std.zig | 1 | ||||
| -rw-r--r-- | lib/std/tz.zig | 252 | ||||
| -rw-r--r-- | lib/std/tz/antarctica_davis.tzif | bin | 0 -> 837 bytes | |||
| -rw-r--r-- | lib/std/tz/asia_tokyo.tzif | bin | 0 -> 537 bytes | |||
| -rw-r--r-- | lib/std/tz/europe_vatican.tzif | bin | 0 -> 951 bytes |
5 files changed, 253 insertions, 0 deletions
diff --git a/lib/std/std.zig b/lib/std/std.zig index f94185f2fb..5ae09a7f5b 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -39,6 +39,7 @@ pub const StringArrayHashMapUnmanaged = array_hash_map.StringArrayHashMapUnmanag pub const TailQueue = @import("linked_list.zig").TailQueue; pub const Target = @import("target.zig").Target; pub const Thread = @import("Thread.zig"); +pub const Tz = @import("tz.zig").Tz; pub const array_hash_map = @import("array_hash_map.zig"); pub const atomic = @import("atomic.zig"); diff --git a/lib/std/tz.zig b/lib/std/tz.zig new file mode 100644 index 0000000000..d505a790b7 --- /dev/null +++ b/lib/std/tz.zig @@ -0,0 +1,252 @@ +const std = @import("std.zig"); +const builtin = @import("builtin"); + +pub const Transition = struct { + ts: i64, + timetype: *Timetype, +}; + +pub const Timetype = struct { + offset: i32, + flags: u8, + name_data: [6:0]u8, + + pub fn name(self: Timetype) [:0]const u8 { + return std.mem.sliceTo(self.name_data[0..], 0); + } + + pub fn isDst(self: Timetype) bool { + return (self.flags & 0x01) > 0; + } + + pub fn standardTimeIndicator(self: Timetype) bool { + return (self.flags & 0x02) > 0; + } + + pub fn utIndicator(self: Timetype) bool { + return (self.flags & 0x04) > 0; + } +}; + +pub const Leapsecond = struct { + occurrence: i48, + correction: i16, +}; + +pub const Tz = struct { + allocator: std.mem.Allocator, + transitions: []const Transition, + timetypes: []const Timetype, + leapseconds: []const Leapsecond, + footer: ?[]const u8, + + const Header = extern struct { + magic: [4]u8, + version: u8, + reserved: [15]u8, + counts: extern struct { + isutcnt: u32, + isstdcnt: u32, + leapcnt: u32, + timecnt: u32, + typecnt: u32, + charcnt: u32, + }, + }; + + pub fn parse(allocator: std.mem.Allocator, reader: anytype) !Tz { + var legacy_header = try reader.readStruct(Header); + if (!std.mem.eql(u8, &legacy_header.magic, "TZif")) return error.BadHeader; + if (legacy_header.version != 0 and legacy_header.version != '2' and legacy_header.version != '3') return error.BadVersion; + + if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { + std.mem.bswapAllFields(@TypeOf(legacy_header.counts), &legacy_header.counts); + } + + if (legacy_header.version == 0) { + return parseBlock(allocator, reader, legacy_header, true); + } else { + // If the format is modern, just skip over the legacy data + const skipv = legacy_header.counts.timecnt * 5 + legacy_header.counts.typecnt * 6 + legacy_header.counts.charcnt + legacy_header.counts.leapcnt * 8 + legacy_header.counts.isstdcnt + legacy_header.counts.isutcnt; + try reader.skipBytes(skipv, .{}); + + var header = try reader.readStruct(Header); + if (!std.mem.eql(u8, &header.magic, "TZif")) return error.BadHeader; + if (header.version != '2' and header.version != '3') return error.BadVersion; + if (builtin.target.cpu.arch.endian() != std.builtin.Endian.Big) { + std.mem.bswapAllFields(@TypeOf(header.counts), &header.counts); + } + + return parseBlock(allocator, reader, header, false); + } + } + + fn parseBlock(allocator: std.mem.Allocator, reader: anytype, header: Header, legacy: bool) !Tz { + if (header.counts.isstdcnt != 0 and header.counts.isstdcnt != header.counts.typecnt) return error.Malformed; // rfc8536: isstdcnt [...] MUST either be zero or equal to "typecnt" + if (header.counts.isutcnt != 0 and header.counts.isutcnt != header.counts.typecnt) return error.Malformed; // rfc8536: isutcnt [...] MUST either be zero or equal to "typecnt" + if (header.counts.typecnt == 0) return error.Malformed; // rfc8536: typecnt [...] MUST NOT be zero + if (header.counts.charcnt == 0) return error.Malformed; // rfc8536: charcnt [...] MUST NOT be zero + if (header.counts.charcnt > 256 + 6) return error.Malformed; // Not explicitly banned by rfc8536 but nonsensical + + var leapseconds = try allocator.alloc(Leapsecond, header.counts.leapcnt); + errdefer allocator.free(leapseconds); + var transitions = try allocator.alloc(Transition, header.counts.timecnt); + errdefer allocator.free(transitions); + var timetypes = try allocator.alloc(Timetype, header.counts.typecnt); + errdefer allocator.free(timetypes); + + // Parse transition types + var i: usize = 0; + while (i < header.counts.timecnt) : (i += 1) { + transitions[i].ts = if (legacy) try reader.readIntBig(i32) else try reader.readIntBig(i64); + } + + i = 0; + while (i < header.counts.timecnt) : (i += 1) { + const tt = try reader.readByte(); + if (tt >= timetypes.len) return error.Malformed; // rfc8536: Each type index MUST be in the range [0, "typecnt" - 1] + transitions[i].timetype = &timetypes[tt]; + } + + // Parse time types + i = 0; + while (i < header.counts.typecnt) : (i += 1) { + const offset = try reader.readIntBig(i32); + if (offset < -2147483648) return error.Malformed; // rfc8536: utoff [...] MUST NOT be -2**31 + const dst = try reader.readByte(); + if (dst != 0 and dst != 1) return error.Malformed; // rfc8536: (is)dst [...] The value MUST be 0 or 1. + const idx = try reader.readByte(); + if (idx > header.counts.charcnt - 1) return error.Malformed; // rfc8536: (desig)idx [...] Each index MUST be in the range [0, "charcnt" - 1] + timetypes[i] = .{ + .offset = offset, + .flags = dst, + .name_data = undefined, + }; + + // Temporarily cache idx in name_data to be processed after we've read the designator names below + timetypes[i].name_data[0] = idx; + } + + var designators_data: [256 + 6]u8 = undefined; + try reader.readNoEof(designators_data[0..header.counts.charcnt]); + const designators = designators_data[0..header.counts.charcnt]; + if (designators[designators.len - 1] != 0) return error.Malformed; // rfc8536: charcnt [...] includes the trailing NUL (0x00) octet + + // Iterate through the timetypes again, setting the designator names + for (timetypes) |*tt| { + const name = std.mem.sliceTo(designators[tt.name_data[0]..], 0); + // We are mandating the "SHOULD" 6-character limit so we can pack the struct better, and to conform to POSIX. + if (name.len > 6) return error.Malformed; // rfc8536: Time zone designations SHOULD consist of at least three (3) and no more than six (6) ASCII characters. + std.mem.copy(u8, tt.name_data[0..], name); + tt.name_data[name.len] = 0; + } + + // Parse leap seconds + i = 0; + while (i < header.counts.leapcnt) : (i += 1) { + const occur: i64 = if (legacy) try reader.readIntBig(i32) else try reader.readIntBig(i64); + if (occur < 0) return error.Malformed; // rfc8536: occur [...] MUST be nonnegative + if (i > 0 and leapseconds[i - 1].occurrence + 2419199 > occur) return error.Malformed; // rfc8536: occur [...] each later value MUST be at least 2419199 greater than the previous value + if (occur > std.math.maxInt(i48)) return error.Malformed; // Unreasonably far into the future + + const corr = try reader.readIntBig(i32); + if (i == 0 and corr != -1 and corr != 1) return error.Malformed; // rfc8536: The correction value in the first leap-second record, if present, MUST be either one (1) or minus one (-1) + if (i > 0 and leapseconds[i - 1].correction != corr + 1 and leapseconds[i - 1].correction != corr - 1) return error.Malformed; // rfc8536: The correction values in adjacent leap-second records MUST differ by exactly one (1) + if (corr > std.math.maxInt(i16)) return error.Malformed; // Unreasonably large correction + + leapseconds[i] = .{ + .occurrence = @intCast(i48, occur), + .correction = @intCast(i16, corr), + }; + } + + // Parse standard/wall indicators + i = 0; + while (i < header.counts.isstdcnt) : (i += 1) { + const stdtime = try reader.readByte(); + if (stdtime == 1) { + timetypes[i].flags |= 0x02; + } + } + + // Parse UT/local indicators + i = 0; + while (i < header.counts.isutcnt) : (i += 1) { + const ut = try reader.readByte(); + if (ut == 1) { + timetypes[i].flags |= 0x04; + if (!timetypes[i].standardTimeIndicator()) return error.Malformed; // rfc8536: standard/wall value MUST be one (1) if the UT/local value is one (1) + } + } + + // Footer + var footer: ?[]u8 = null; + if (!legacy) { + if ((try reader.readByte()) != '\n') return error.Malformed; // An rfc8536 footer must start with a newline + var footerdata_buf: [128]u8 = undefined; + const footer_mem = reader.readUntilDelimiter(&footerdata_buf, '\n') catch |err| switch (err) { + error.StreamTooLong => return error.OverlargeFooter, // Read more than 128 bytes, much larger than any reasonable POSIX TZ string + else => return err, + }; + if (footer_mem.len != 0) { + footer = try allocator.dupe(u8, footer_mem); + } + } + errdefer if (footer) |ft| allocator.free(ft); + + return Tz{ + .allocator = allocator, + .transitions = transitions, + .timetypes = timetypes, + .leapseconds = leapseconds, + .footer = footer, + }; + } + + pub fn deinit(self: *Tz) void { + if (self.footer) |footer| { + self.allocator.free(footer); + } + self.allocator.free(self.leapseconds); + self.allocator.free(self.transitions); + self.allocator.free(self.timetypes); + } +}; + +test "slim" { + const data = @embedFile("tz/asia_tokyo.tzif"); + var in_stream = std.io.fixedBufferStream(data); + + var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + defer tz.deinit(); + + try std.testing.expectEqual(tz.transitions.len, 9); + try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "JDT")); + try std.testing.expectEqual(tz.transitions[5].ts, -620298000); // 1950-05-06 15:00:00 UTC + try std.testing.expectEqual(tz.leapseconds[13].occurrence, 567993613); // 1988-01-01 00:00:00 UTC (+23s in TAI, and +13 in the data since it doesn't store the initial 10 second offset) +} + +test "fat" { + const data = @embedFile("tz/antarctica_davis.tzif"); + var in_stream = std.io.fixedBufferStream(data); + + var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + defer tz.deinit(); + + try std.testing.expectEqual(tz.transitions.len, 8); + try std.testing.expect(std.mem.eql(u8, tz.transitions[3].timetype.name(), "+05")); + try std.testing.expectEqual(tz.transitions[4].ts, 1268251224); // 2010-03-10 20:00:00 UTC +} + +test "legacy" { + // Taken from Slackware 8.0, from 2001 + const data = @embedFile("tz/europe_vatican.tzif"); + var in_stream = std.io.fixedBufferStream(data); + + var tz = try std.Tz.parse(std.testing.allocator, in_stream.reader()); + defer tz.deinit(); + + try std.testing.expectEqual(tz.transitions.len, 170); + try std.testing.expect(std.mem.eql(u8, tz.transitions[69].timetype.name(), "CET")); + try std.testing.expectEqual(tz.transitions[123].ts, 1414285200); // 2014-10-26 01:00:00 UTC +} diff --git a/lib/std/tz/antarctica_davis.tzif b/lib/std/tz/antarctica_davis.tzif Binary files differnew file mode 100644 index 0000000000..662aa0098a --- /dev/null +++ b/lib/std/tz/antarctica_davis.tzif diff --git a/lib/std/tz/asia_tokyo.tzif b/lib/std/tz/asia_tokyo.tzif Binary files differnew file mode 100644 index 0000000000..15d780f7a8 --- /dev/null +++ b/lib/std/tz/asia_tokyo.tzif diff --git a/lib/std/tz/europe_vatican.tzif b/lib/std/tz/europe_vatican.tzif Binary files differnew file mode 100644 index 0000000000..fe55064d1d --- /dev/null +++ b/lib/std/tz/europe_vatican.tzif |
