diff options
Diffstat (limited to 'libc/process_headers.zig')
| -rw-r--r-- | libc/process_headers.zig | 627 |
1 files changed, 627 insertions, 0 deletions
diff --git a/libc/process_headers.zig b/libc/process_headers.zig new file mode 100644 index 0000000000..8e8655ca12 --- /dev/null +++ b/libc/process_headers.zig @@ -0,0 +1,627 @@ +// To get started, run this tool with no args and read the help message. +// +// The build systems of musl-libc and glibc require specifying a single target +// architecture. Meanwhile, Zig supports out-of-the-box cross compilation for +// every target. So the process to create libc headers that Zig ships is to use +// this tool. +// First, use the musl/glibc build systems to create installations of all the +// targets in the `libc_targets` variable. +// Next, run this tool to create a new directory which puts .h files into +// <arch> subdirectories, with `generic` being files that apply to all architectures. +// You'll then have to manually update Zig source repo with these new files. + +const std = @import("std"); +const builtin = @import("builtin"); +const Arch = builtin.Arch; +const Abi = builtin.Abi; +const Os = builtin.Os; +const assert = std.debug.assert; + +const LibCTarget = struct { + name: []const u8, + zig_arch: ?@TagType(Arch), + zig_abi: ?Abi, +}; + +const libc_targets = []LibCTarget{ + LibCTarget{ + .name = "aarch64_be-linux-gnu", + .zig_arch = Arch.aarch64_be, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "aarch64-linux-gnu", + .zig_arch = Arch.aarch64, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "aarch64-linux-gnu-disable-multi-arch", + .zig_arch = Arch.aarch64, + .zig_abi = null, + }, + LibCTarget{ + .name = "alpha-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "armeb-linux-gnueabi", + .zig_arch = Arch.armeb, + .zig_abi = Abi.gnueabi, + }, + LibCTarget{ + .name = "armeb-linux-gnueabi-be8", + .zig_arch = Arch.armeb, + .zig_abi = null, + }, + LibCTarget{ + .name = "armeb-linux-gnueabihf", + .zig_arch = Arch.armeb, + .zig_abi = Abi.gnueabihf, + }, + LibCTarget{ + .name = "armeb-linux-gnueabihf-be8", + .zig_arch = Arch.armeb, + .zig_abi = null, + }, + LibCTarget{ + .name = "arm-linux-gnueabi", + .zig_arch = Arch.arm, + .zig_abi = Abi.gnueabi, + }, + LibCTarget{ + .name = "arm-linux-gnueabihf", + .zig_arch = Arch.arm, + .zig_abi = Abi.gnueabihf, + }, + LibCTarget{ + .name = "arm-linux-gnueabihf-v7a", + .zig_arch = Arch.arm, + .zig_abi = null, + }, + LibCTarget{ + .name = "arm-linux-gnueabihf-v7a-disable-multi-arch", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "hppa-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "i486-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "i586-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "i686-gnu", + .zig_arch = Arch.i386, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "i686-linux-gnu", + .zig_arch = Arch.i386, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "i686-linux-gnu-disable-multi-arch", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "i686-linux-gnu-enable-obsolete", + .zig_arch = Arch.i386, + .zig_abi = null, + }, + LibCTarget{ + .name = "i686-linux-gnu-static-pie", + .zig_arch = Arch.i386, + .zig_abi = null, + }, + LibCTarget{ + .name = "ia64-linux-gnu", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "m68k-linux-gnu", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "m68k-linux-gnu-coldfire", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "m68k-linux-gnu-coldfire-soft", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "microblazeel-linux-gnu", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "microblaze-linux-gnu", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n32", + .zig_arch = Arch.mips64el, + .zig_abi = Abi.gnuabin32, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n32-nan2008", + .zig_arch = Arch.mips64el, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n32-nan2008-soft", + .zig_arch = Arch.mips64el, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n32-soft", + .zig_arch = Arch.mips64el, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n64", + .zig_arch = Arch.mips64el, + .zig_abi = Abi.gnuabi64, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n64-nan2008", + .zig_arch = Arch.mips64el, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n64-nan2008-soft", + .zig_arch = Arch.mips64el, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64el-linux-gnu-n64-soft", + .zig_arch = Arch.mips64el, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n32", + .zig_arch = Arch.mips64, + .zig_abi = Abi.gnuabin32, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n32-nan2008", + .zig_arch = Arch.mips64, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n32-nan2008-soft", + .zig_arch = Arch.mips64, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n32-soft", + .zig_arch = Arch.mips64, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n64", + .zig_arch = Arch.mips64, + .zig_abi = Abi.gnuabi64, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n64-nan2008", + .zig_arch = Arch.mips64, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n64-nan2008-soft", + .zig_arch = Arch.mips64, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips64-linux-gnu-n64-soft", + .zig_arch = Arch.mips64, + .zig_abi = null, + }, + LibCTarget{ + .name = "mipsel-linux-gnu", + .zig_arch = Arch.mipsel, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "mipsel-linux-gnu-nan2008", + .zig_arch = Arch.mipsel, + .zig_abi = null, + }, + LibCTarget{ + .name = "mipsel-linux-gnu-nan2008-soft", + .zig_arch = Arch.mipsel, + .zig_abi = null, + }, + LibCTarget{ + .name = "mipsel-linux-gnu-soft", + .zig_arch = Arch.mipsel, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips-linux-gnu", + .zig_arch = Arch.mips, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "mips-linux-gnu-nan2008", + .zig_arch = Arch.mips, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips-linux-gnu-nan2008-soft", + .zig_arch = Arch.mips, + .zig_abi = null, + }, + LibCTarget{ + .name = "mips-linux-gnu-soft", + .zig_arch = Arch.mips, + .zig_abi = null, + }, + LibCTarget{ + .name = "nios2-linux-gnu", + .zig_arch = Arch.nios2, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "powerpc64le-linux-gnu", + .zig_arch = Arch.powerpc64le, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "powerpc64-linux-gnu", + .zig_arch = Arch.powerpc64, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "powerpc-linux-gnu", + .zig_arch = Arch.powerpc, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "powerpc-linux-gnu-power4", + .zig_arch = Arch.powerpc, + .zig_abi = null, + }, + LibCTarget{ + .name = "powerpc-linux-gnu-soft", + .zig_arch = Arch.powerpc, + .zig_abi = null, + }, + LibCTarget{ + .name = "powerpc-linux-gnuspe", + .zig_arch = Arch.powerpc, + .zig_abi = null, + }, + LibCTarget{ + .name = "powerpc-linux-gnuspe-e500v1", + .zig_arch = Arch.powerpc, + .zig_abi = null, + }, + LibCTarget{ + .name = "riscv64-linux-gnu-rv64imac-lp64", + .zig_arch = Arch.riscv64, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "riscv64-linux-gnu-rv64imafdc-lp64", + .zig_arch = Arch.riscv64, + .zig_abi = null, + }, + LibCTarget{ + .name = "riscv64-linux-gnu-rv64imafdc-lp64d", + .zig_arch = Arch.riscv64, + .zig_abi = null, + }, + LibCTarget{ + .name = "s390-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "s390x-linux-gnu", + .zig_arch = Arch.s390x, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "sh3eb-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "sh3-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "sh4eb-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "sh4eb-linux-gnu-soft", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "sh4-linux-gnu", + .zig_arch = null, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "sh4-linux-gnu-soft", + .zig_arch = null, + .zig_abi = null, + }, + LibCTarget{ + .name = "sparc64-linux-gnu", + .zig_arch = Arch.sparc, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "sparc64-linux-gnu-disable-multi-arch", + .zig_arch = Arch.sparc, + .zig_abi = null, + }, + LibCTarget{ + .name = "sparcv9-linux-gnu", + .zig_arch = Arch.sparcv9, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "sparcv9-linux-gnu-disable-multi-arch", + .zig_arch = Arch.sparcv9, + .zig_abi = null, + }, + LibCTarget{ + .name = "x86_64-linux-gnu", + .zig_arch = Arch.x86_64, + .zig_abi = Abi.gnu, + }, + LibCTarget{ + .name = "x86_64-linux-gnu-disable-multi-arch", + .zig_arch = Arch.x86_64, + .zig_abi = null, + }, + LibCTarget{ + .name = "x86_64-linux-gnu-enable-obsolete", + .zig_arch = Arch.x86_64, + .zig_abi = null, + }, + LibCTarget{ + .name = "x86_64-linux-gnu-static-pie", + .zig_arch = Arch.x86_64, + .zig_abi = null, + }, + LibCTarget{ + .name = "x86_64-linux-gnu-x32", + .zig_arch = Arch.x86_64, + .zig_abi = Abi.gnux32, + }, + LibCTarget{ + .name = "x86_64-linux-gnu-x32-static-pie", + .zig_arch = Arch.x86_64, + .zig_abi = null, + }, +}; + +const DestTarget = struct { + arch: @TagType(Arch), + os: Os, + abi: Abi, + + fn hash(a: DestTarget) u32 { + return @enumToInt(a.arch) +% + (@enumToInt(a.os) *% u32(4202347608)) +% + (@enumToInt(a.abi) *% u32(4082223418)); + } + + fn eql(a: DestTarget, b: DestTarget) bool { + return a.arch == b.arch and + a.os == b.os and + a.abi == b.abi; + } +}; + +const Contents = struct { + bytes: []const u8, + hit_count: usize, + hash: []const u8, + is_generic: bool, + + fn hitCountLessThan(lhs: *const Contents, rhs: *const Contents) bool { + return lhs.hit_count < rhs.hit_count; + } +}; + +const HashToContents = std.AutoHashMap([]const u8, Contents); +const TargetToHash = std.HashMap(DestTarget, []const u8, DestTarget.hash, DestTarget.eql); +const PathTable = std.AutoHashMap([]const u8, *TargetToHash); + +pub fn main() !void { + var direct_allocator = std.heap.DirectAllocator.init(); + var arena = std.heap.ArenaAllocator.init(&direct_allocator.allocator); + const allocator = &arena.allocator; + const args = try std.os.argsAlloc(allocator); + var search_paths = std.ArrayList([]const u8).init(allocator); + var opt_out_dir: ?[]const u8 = null; + + var arg_i: usize = 1; + while (arg_i < args.len) : (arg_i += 1) { + if (std.mem.eql(u8, args[arg_i], "--help")) + usageAndExit(args[0]); + if (arg_i + 1 >= args.len) { + std.debug.warn("expected argument after '{}'\n", args[arg_i]); + usageAndExit(args[0]); + } + + if (std.mem.eql(u8, args[arg_i], "--search-path")) { + try search_paths.append(args[arg_i + 1]); + } else if (std.mem.eql(u8, args[arg_i], "--out")) { + assert(opt_out_dir == null); + opt_out_dir = args[arg_i + 1]; + } else { + std.debug.warn("unrecognized argument: {}\n", args[arg_i]); + usageAndExit(args[0]); + } + + arg_i += 1; + } + + const out_dir = opt_out_dir orelse usageAndExit(args[0]); + + var path_table = PathTable.init(allocator); + var hash_to_contents = HashToContents.init(allocator); + var max_bytes_saved: usize = 0; + var total_bytes: usize = 0; + + var hasher = std.crypto.Sha256.init(); + + for (libc_targets) |libc_target| { + const dest_target = DestTarget{ + .arch = libc_target.zig_arch orelse continue, + .abi = libc_target.zig_abi orelse continue, + .os = builtin.Os.linux, + }; + search: for (search_paths.toSliceConst()) |search_path| { + const target_include_dir = try std.os.path.join( + allocator, + [][]const u8{ search_path, libc_target.name, "usr", "include" }, + ); + var dir_stack = std.ArrayList([]const u8).init(allocator); + try dir_stack.append(target_include_dir); + + while (dir_stack.popOrNull()) |full_dir_name| { + var dir = std.os.Dir.open(allocator, full_dir_name) catch |err| switch (err) { + error.FileNotFound => continue :search, + error.AccessDenied => continue :search, + else => return err, + }; + defer dir.close(); + + while (try dir.next()) |entry| { + const full_path = try std.os.path.join(allocator, [][]const u8{ full_dir_name, entry.name }); + switch (entry.kind) { + std.os.Dir.Entry.Kind.Directory => try dir_stack.append(full_path), + std.os.Dir.Entry.Kind.File => { + const rel_path = try std.os.path.relative(allocator, target_include_dir, full_path); + const raw_bytes = try std.io.readFileAlloc(allocator, full_path); + const trimmed = std.mem.trim(u8, raw_bytes, " \r\n\t"); + total_bytes += raw_bytes.len; + const hash = try allocator.alloc(u8, 32); + hasher.reset(); + hasher.update(rel_path); + hasher.update(trimmed); + hasher.final(hash); + const gop = try hash_to_contents.getOrPut(hash); + if (gop.found_existing) { + max_bytes_saved += raw_bytes.len; + gop.kv.value.hit_count += 1; + std.debug.warn( + "duplicate: {} {} ({Bi2})\n", + libc_target.name, + rel_path, + raw_bytes.len, + ); + } else { + gop.kv.value = Contents{ + .bytes = trimmed, + .hit_count = 1, + .hash = hash, + .is_generic = false, + }; + } + const path_gop = try path_table.getOrPut(rel_path); + const target_to_hash = if (path_gop.found_existing) path_gop.kv.value else blk: { + const ptr = try allocator.create(TargetToHash); + ptr.* = TargetToHash.init(allocator); + path_gop.kv.value = ptr; + break :blk ptr; + }; + assert((try target_to_hash.put(dest_target, hash)) == null); + }, + else => std.debug.warn("warning: weird file: {}\n", full_path), + } + } + } + break; + } else { + std.debug.warn("warning: libc target not found: {}\n", libc_target.name); + } + } + std.debug.warn("summary: {Bi2} could be reduced to {Bi2}\n", total_bytes, total_bytes - max_bytes_saved); + try std.os.makePath(allocator, out_dir); + + var missed_opportunity_bytes: usize = 0; + // iterate path_table. for each path, put all the hashes into a list. sort by hit_count. + // the hash with the highest hit_count gets to be the "generic" one. everybody else + // gets their header in a separate arch directory. + var path_it = path_table.iterator(); + while (path_it.next()) |path_kv| { + var contents_list = std.ArrayList(*Contents).init(allocator); + { + var hash_it = path_kv.value.iterator(); + while (hash_it.next()) |hash_kv| { + const contents = &hash_to_contents.get(hash_kv.value).?.value; + try contents_list.append(contents); + } + } + std.sort.sort(*Contents, contents_list.toSlice(), Contents.hitCountLessThan); + var best_contents = contents_list.popOrNull().?; + if (best_contents.hit_count > 1) { + // worth it to make it generic + const full_path = try std.os.path.join(allocator, [][]const u8{ out_dir, "generic", path_kv.key }); + try std.os.makePath(allocator, std.os.path.dirname(full_path).?); + try std.io.writeFile(full_path, best_contents.bytes); + best_contents.is_generic = true; + while (contents_list.popOrNull()) |contender| { + if (contender.hit_count > 1) { + const this_missed_bytes = contender.hit_count * contender.bytes.len; + missed_opportunity_bytes += this_missed_bytes; + std.debug.warn("Missed opportunity ({Bi2}): {}\n", this_missed_bytes, path_kv.key); + } else break; + } + } + var hash_it = path_kv.value.iterator(); + while (hash_it.next()) |hash_kv| { + const contents = &hash_to_contents.get(hash_kv.value).?.value; + if (contents.is_generic) continue; + + const dest_target = hash_kv.key; + const out_subpath = try std.fmt.allocPrint( + allocator, + "{}-{}-{}", + @tagName(dest_target.arch), + @tagName(dest_target.os), + @tagName(dest_target.abi), + ); + const full_path = try std.os.path.join(allocator, [][]const u8{ out_dir, out_subpath, path_kv.key }); + try std.os.makePath(allocator, std.os.path.dirname(full_path).?); + try std.io.writeFile(full_path, contents.bytes); + } + } +} + +fn usageAndExit(arg0: []const u8) noreturn { + std.debug.warn("Usage: {} [--search-path <dir>] --out <dir>\n", arg0); + std.debug.warn("--search-path can be used any number of times.\n"); + std.debug.warn(" subdirectories of search paths look like, e.g. x86_64-linux-gnu\n"); + std.debug.warn("--out is a dir that will be created, and populated with the results\n"); + std.os.exit(1); +} |
