aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorErsikan <julien.philippon@epitech.eu>2021-03-14 18:07:09 +0100
committerErsikan <julien.philippon@epitech.eu>2021-03-17 10:27:26 +0100
commit8942243f7a825e42c16c8d210f5f9dc3baa76b2f (patch)
treeae67b24aeacd992e1a382e349d9b51d474bf3307
parenta3540000909bdc6a59ba07c85d21afeb3a7e54e2 (diff)
downloadzig-8942243f7a825e42c16c8d210f5f9dc3baa76b2f.tar.gz
zig-8942243f7a825e42c16c8d210f5f9dc3baa76b2f.zip
zig fmt: factorize source file reading and decoding
Now reading a source file and decoding it from UTF-16LE to UTF-8 is done in a single function. Error messages are improved, and an error is emitted when the source file has a BOM not supported (UTF-16BE, UTF-32). Please note that the BOM of UTF-32 is composed of the same bytes as the BOM of UTF-16 followed by a null character. Therefore a source file in UTF-16LE starting with a null byte will be interpreted as an UTF-32, and rejeted because of an invalid format. In pratice this is not a problem, as the code would have been rejected later anyway because of the null character.
-rw-r--r--src/main.zig102
1 files changed, 55 insertions, 47 deletions
diff --git a/src/main.zig b/src/main.zig
index 1625b7213f..be227a2895 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -2637,6 +2637,50 @@ fn argvCmd(allocator: *Allocator, argv: []const []const u8) ![]u8 {
return cmd.toOwnedSlice();
}
+fn readSourceFileToEndAlloc(allocator: *mem.Allocator, input: *const fs.File, size_hint: ?usize) ![]const u8 {
+ const source_code = input.readToEndAllocOptions(
+ allocator,
+ max_src_size,
+ size_hint,
+ @alignOf(u16),
+ null,
+ ) catch |err| switch (err) {
+ error.ConnectionResetByPeer => unreachable,
+ error.ConnectionTimedOut => unreachable,
+ error.NotOpenForReading => unreachable,
+ else => |e| return e,
+ };
+ errdefer allocator.free(source_code);
+
+ // Detect unsupported file types with their Byte Order Mark
+ const unsupported_boms = [_][]const u8{
+ "\xff\xfe\x00\x00", // UTF-32 little endian
+ "\xfe\xff\x00\x00", // UTF-32 big endian
+ "\xfe\xff", // UTF-16 big endian
+ };
+ for (unsupported_boms) |bom| {
+ if (mem.startsWith(u8, source_code, bom)) {
+ return error.UnsupportedEncoding;
+ }
+ }
+
+ // If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
+ if (mem.startsWith(u8, source_code, "\xff\xfe")) {
+ const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
+ const source_code_utf8 = std.unicode.utf16leToUtf8Alloc(allocator, source_code_utf16_le) catch |err| switch (err) {
+ error.DanglingSurrogateHalf => error.UnsupportedEncoding,
+ error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
+ error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
+ else => |e| return e,
+ };
+
+ allocator.free(source_code);
+ return source_code_utf8;
+ }
+
+ return source_code;
+}
+
pub const usage_fmt =
\\Usage: zig fmt [file]...
\\
@@ -2709,20 +2753,8 @@ pub fn cmdFmt(gpa: *Allocator, args: []const []const u8) !void {
}
const stdin = io.getStdIn();
-
- const source_code = blk: {
- const source_code = try stdin.readToEndAllocOptions(gpa, max_src_size, null, @alignOf(u16), null);
- errdefer gpa.free(source_code);
-
- // If the file starts with a UTF-16 BOM, translate it to UTF-8
- if (mem.startsWith(u8, source_code, "\xff\xfe")) {
- const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
- const source_code_utf8 = try std.unicode.utf16leToUtf8Alloc(gpa, source_code_utf16_le);
- gpa.free(source_code);
- break :blk source_code_utf8;
- } else {
- break :blk source_code;
- }
+ const source_code = readSourceFileToEndAlloc(gpa, &stdin, null) catch |err| {
+ fatal("unable to read stdin: {s}", .{err});
};
defer gpa.free(source_code);
@@ -2798,7 +2830,7 @@ const FmtError = error{
EndOfStream,
Unseekable,
NotOpenForWriting,
- UnknownTextFormat,
+ UnsupportedEncoding,
} || fs.File.OpenError;
fn fmtPath(fmt: *Fmt, file_path: []const u8, check_mode: bool, dir: fs.Dir, sub_path: []const u8) FmtError!void {
@@ -2864,40 +2896,16 @@ fn fmtPathFile(
if (stat.kind == .Directory)
return error.IsDir;
- const source_code = blk: {
- const source_code = source_file.readToEndAllocOptions(
- fmt.gpa,
- max_src_size,
- std.math.cast(usize, stat.size) catch return error.FileTooBig,
- @alignOf(u16),
- null,
- ) catch |err| switch (err) {
- error.ConnectionResetByPeer => unreachable,
- error.ConnectionTimedOut => unreachable,
- error.NotOpenForReading => unreachable,
- else => |e| return e,
- };
- source_file.close();
- file_closed = true;
- errdefer fmt.gpa.free(source_code);
-
- // If the file starts with a UTF-16 BOM, translate it to UTF-8
- if (mem.eql(u8, source_code[0..2], "\xff\xfe")) {
- const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
- const source_code_utf8 = std.unicode.utf16leToUtf8Alloc(fmt.gpa, source_code_utf16_le) catch |err| return switch (err) {
- error.DanglingSurrogateHalf => FmtError.UnknownTextFormat,
- error.ExpectedSecondSurrogateHalf => FmtError.UnknownTextFormat,
- error.UnexpectedSecondSurrogateHalf => FmtError.UnknownTextFormat,
- else => |e| e,
- };
- fmt.gpa.free(source_code);
- break :blk source_code_utf8;
- } else {
- break :blk source_code;
- }
- };
+ const source_code = try readSourceFileToEndAlloc(
+ fmt.gpa,
+ &source_file,
+ std.math.cast(usize, stat.size) catch return error.FileTooBig,
+ );
defer fmt.gpa.free(source_code);
+ source_file.close();
+ file_closed = true;
+
// Add to set after no longer possible to get error.IsDir.
if (try fmt.seen.fetchPut(stat.inode, {})) |_| return;