Merge remote-tracking branch 'origin/master' into llvm6

author: Andrew Kelley <superjoe30@gmail.com> 2017-12-23 12:00:25 -0500
committer: Andrew Kelley <superjoe30@gmail.com> 2017-12-23 12:00:25 -0500
commit: fe660462837231353b846bf398637ca84f67bfc9 (patch)
tree: 37336cfcf44810d73d111a57207b843a01fd205f /std
parent: fe39ca01bcbee0077b21d5ddc2776df974e8c6d3 (diff)
parent: 39c7bd24e4f768b23074b8634ac637b175b7639f (diff)
download: zig-fe660462837231353b846bf398637ca84f67bfc9.tar.gz
zig-fe660462837231353b846bf398637ca84f67bfc9.zip
95 files changed, 2754 insertions, 1436 deletions
diff --git a/std/array_list.zig b/std/array_list.zig
index 65b7e023e2..04db4dd280 100644
--- a/std/array_list.zig
+++ b/std/array_list.zig
@@ -3,42 +3,46 @@ const assert = debug.assert;
 const mem = @import("mem.zig");
 const Allocator = mem.Allocator;
 
-pub fn ArrayList(comptime T: type) -> type{
-    struct {
+pub fn ArrayList(comptime T: type) -> type {
+    return AlignedArrayList(T, @alignOf(T));
+}
+
+pub fn AlignedArrayList(comptime T: type, comptime A: u29) -> type{
+    return struct {
         const Self = this;
 
         /// Use toSlice instead of slicing this directly, because if you don't
         /// specify the end position of the slice, this will potentially give
         /// you uninitialized memory.
-        items: []T,
+        items: []align(A) T,
         len: usize,
         allocator: &Allocator,
 
         /// Deinitialize with `deinit` or use `toOwnedSlice`.
         pub fn init(allocator: &Allocator) -> Self {
-            Self {
-                .items = []T{},
+            return Self {
+                .items = []align(A) T{},
                 .len = 0,
                 .allocator = allocator,
-            }
+            };
         }
 
         pub fn deinit(l: &Self) {
             l.allocator.free(l.items);
         }
 
-        pub fn toSlice(l: &Self) -> []T {
+        pub fn toSlice(l: &Self) -> []align(A) T {
             return l.items[0..l.len];
         }
 
-        pub fn toSliceConst(l: &const Self) -> []const T {
+        pub fn toSliceConst(l: &const Self) -> []align(A) const T {
             return l.items[0..l.len];
         }
 
         /// ArrayList takes ownership of the passed in slice. The slice must have been
         /// allocated with `allocator`.
         /// Deinitialize with `deinit` or use `toOwnedSlice`.
-        pub fn fromOwnedSlice(allocator: &Allocator, slice: []T) -> Self {
+        pub fn fromOwnedSlice(allocator: &Allocator, slice: []align(A) T) -> Self {
             return Self {
                 .items = slice,
                 .len = slice.len,
@@ -47,9 +51,9 @@ pub fn ArrayList(comptime T: type) -> type{
         }
 
         /// The caller owns the returned memory. ArrayList becomes empty.
-        pub fn toOwnedSlice(self: &Self) -> []T {
+        pub fn toOwnedSlice(self: &Self) -> []align(A) T {
             const allocator = self.allocator;
-            const result = allocator.shrink(T, self.items, self.len);
+            const result = allocator.alignedShrink(T, A, self.items, self.len);
             *self = init(allocator);
             return result;
         }
@@ -59,7 +63,7 @@ pub fn ArrayList(comptime T: type) -> type{
             *new_item_ptr = *item;
         }
 
-        pub fn appendSlice(l: &Self, items: []const T) -> %void {
+        pub fn appendSlice(l: &Self, items: []align(A) const T) -> %void {
             %return l.ensureCapacity(l.len + items.len);
             mem.copy(T, l.items[l.len..], items);
             l.len += items.len;
@@ -82,7 +86,7 @@ pub fn ArrayList(comptime T: type) -> type{
                 better_capacity += better_capacity / 2 + 8;
                 if (better_capacity >= new_capacity) break;
             }
-            l.items = %return l.allocator.realloc(T, l.items, better_capacity);
+            l.items = %return l.allocator.alignedRealloc(T, A, l.items, better_capacity);
         }
 
         pub fn addOne(l: &Self) -> %&T {
@@ -97,7 +101,13 @@ pub fn ArrayList(comptime T: type) -> type{
             self.len -= 1;
             return self.items[self.len];
         }
-    }
+
+        pub fn popOrNull(self: &Self) -> ?T {
+            if (self.len == 0)
+                return null;
+            return self.pop();
+        }
+    };
 }
 
 test "basic ArrayList test" {
diff --git a/std/base64.zig b/std/base64.zig
index 25e438c4fb..840643b565 100644
--- a/std/base64.zig
+++ b/std/base64.zig
@@ -193,7 +193,7 @@ pub const Base64DecoderWithIgnore = struct {
     /// Decoding more data than can fit in dest results in error.OutputTooSmall. See also ::calcSizeUpperBound.
     /// Returns the number of bytes writen to dest.
     pub fn decode(decoder_with_ignore: &const Base64DecoderWithIgnore, dest: []u8, source: []const u8) -> %usize {
-        const decoder = &const decoder_with_ignore.decoder;
+        const decoder = &decoder_with_ignore.decoder;
 
         var src_cursor: usize = 0;
         var dest_cursor: usize = 0;
diff --git a/std/buf_map.zig b/std/buf_map.zig
index e913b8765d..2a9bd77660 100644
--- a/std/buf_map.zig
+++ b/std/buf_map.zig
@@ -42,6 +42,11 @@ pub const BufMap = struct {
         }
     }
 
+    pub fn get(self: &BufMap, key: []const u8) -> ?[]const u8 {
+        const entry = self.hash_map.get(key) ?? return null;
+        return entry.value;
+    }
+
     pub fn delete(self: &BufMap, key: []const u8) {
         const entry = self.hash_map.remove(key) ?? return;
         self.free(entry.key);
diff --git a/std/buffer.zig b/std/buffer.zig
index 96abaeb762..4f5d281f48 100644
--- a/std/buffer.zig
+++ b/std/buffer.zig
@@ -30,9 +30,9 @@ pub const Buffer = struct {
     /// * ::replaceContentsBuffer
     /// * ::resize
     pub fn initNull(allocator: &Allocator) -> Buffer {
-        Buffer {
+        return Buffer {
             .list = ArrayList(u8).init(allocator),
-        }
+        };
     }
 
     /// Must deinitialize with deinit.
@@ -98,14 +98,17 @@ pub const Buffer = struct {
         mem.copy(u8, self.list.toSlice()[old_len..], m);
     }
 
+    // TODO: remove, use OutStream for this
     pub fn appendFormat(self: &Buffer, comptime format: []const u8, args: ...) -> %void {
         return fmt.format(self, append, format, args);
     }
 
+    // TODO: remove, use OutStream for this
     pub fn appendByte(self: &Buffer, byte: u8) -> %void {
         return self.appendByteNTimes(byte, 1);
     }
 
+    // TODO: remove, use OutStream for this
     pub fn appendByteNTimes(self: &Buffer, byte: u8, count: usize) -> %void {
         var prev_size: usize = self.len();
         %return self.resize(prev_size + count);
@@ -117,7 +120,7 @@ pub const Buffer = struct {
     }
 
     pub fn eql(self: &const Buffer, m: []const u8) -> bool {
-        mem.eql(u8, self.toSliceConst(), m)
+        return mem.eql(u8, self.toSliceConst(), m);
     }
 
     pub fn startsWith(self: &const Buffer, m: []const u8) -> bool {
@@ -136,6 +139,11 @@ pub const Buffer = struct {
         %return self.resize(m.len);
         mem.copy(u8, self.list.toSlice(), m);
     }
+
+    /// For passing to C functions.
+    pub fn ptr(self: &const Buffer) -> &u8 {
+        return self.list.items.ptr;
+    }
 };
 
 test "simple Buffer" {
diff --git a/std/build.zig b/std/build.zig
index 9bdc4b3076..0a23a77f80 100644
--- a/std/build.zig
+++ b/std/build.zig
@@ -221,11 +221,11 @@ pub const Builder = struct {
     }
 
     pub fn version(self: &const Builder, major: u32, minor: u32, patch: u32) -> Version {
-        Version {
+        return Version {
             .major = major,
             .minor = minor,
             .patch = patch,
-        }
+        };
     }
 
     pub fn addCIncludePath(self: &Builder, path: []const u8) {
@@ -432,16 +432,16 @@ pub const Builder = struct {
         const release_safe = self.option(bool, "release-safe", "optimizations on and safety on") ?? false;
         const release_fast = self.option(bool, "release-fast", "optimizations on and safety off") ?? false;
 
-        const mode = if (release_safe and !release_fast) {
+        const mode = if (release_safe and !release_fast)
             builtin.Mode.ReleaseSafe
-        } else if (release_fast and !release_safe) {
+        else if (release_fast and !release_safe)
             builtin.Mode.ReleaseFast
-        } else if (!release_fast and !release_safe) {
+        else if (!release_fast and !release_safe)
             builtin.Mode.Debug
-        } else {
+        else x: {
             warn("Both -Drelease-safe and -Drelease-fast specified");
             self.markInvalidUserInput();
-            builtin.Mode.Debug
+            break :x builtin.Mode.Debug;
         };
         self.release_mode = mode;
         return mode;
@@ -506,7 +506,7 @@ pub const Builder = struct {
     }
 
     fn typeToEnum(comptime T: type) -> TypeId {
-        switch (@typeId(T)) {
+        return switch (@typeId(T)) {
             builtin.TypeId.Int => TypeId.Int,
             builtin.TypeId.Float => TypeId.Float,
             builtin.TypeId.Bool => TypeId.Bool,
@@ -515,7 +515,7 @@ pub const Builder = struct {
                 []const []const u8 => TypeId.List,
                 else => @compileError("Unsupported type: " ++ @typeName(T)),
             },
-        }
+        };
     }
 
     fn markInvalidUserInput(self: &Builder) {
@@ -590,8 +590,7 @@ pub const Builder = struct {
 
                 return error.UncleanExit;
             },
-        };
-
+        }
     }
 
     pub fn makePath(self: &Builder, path: []const u8) -> %void {
@@ -662,13 +661,70 @@ pub const Builder = struct {
         if (builtin.environ == builtin.Environ.msvc) {
             return "cl.exe";
         } else {
-            return os.getEnvVarOwned(self.allocator, "CC") %% |err| {
-                if (err == error.EnvironmentVariableNotFound) {
+            return os.getEnvVarOwned(self.allocator, "CC") %% |err| 
+                if (err == error.EnvironmentVariableNotFound)
                     ([]const u8)("cc")
-                } else {
-                    debug.panic("Unable to get environment variable: {}", err);
+                else
+                    debug.panic("Unable to get environment variable: {}", err)
+            ;
+        }
+    }
+
+    pub fn findProgram(self: &Builder, names: []const []const u8, paths: []const []const u8) -> %[]const u8 {
+        const exe_extension = (Target { .Native = {}}).exeFileExt();
+        if (self.env_map.get("PATH")) |PATH| {
+            for (names) |name| {
+                if (os.path.isAbsolute(name)) {
+                    return name;
                 }
-            };
+                var it = mem.split(PATH, []u8{os.path.delimiter});
+                while (it.next()) |path| {
+                    const full_path = %return os.path.join(self.allocator, path, self.fmt("{}{}", name, exe_extension));
+                    if (os.path.real(self.allocator, full_path)) |real_path| {
+                        return real_path;
+                    } else |_| {
+                        continue;
+                    }
+                }
+            }
+        }
+        for (names) |name| {
+            if (os.path.isAbsolute(name)) {
+                return name;
+            }
+            for (paths) |path| {
+                const full_path = %return os.path.join(self.allocator, path, self.fmt("{}{}", name, exe_extension));
+                if (os.path.real(self.allocator, full_path)) |real_path| {
+                    return real_path;
+                } else |_| {
+                    continue;
+                }
+            }
+        }
+        return error.FileNotFound;
+    }
+
+    pub fn exec(self: &Builder, argv: []const []const u8) -> []u8 {
+        const max_output_size = 100 * 1024;
+        const result = os.ChildProcess.exec(self.allocator, argv, null, null, max_output_size) %% |err| {
+            std.debug.panic("Unable to spawn {}: {}", argv[0], @errorName(err));
+        };
+        switch (result.term) {
+            os.ChildProcess.Term.Exited => |code| {
+                if (code != 0) {
+                    warn("The following command exited with error code {}:\n", code);
+                    printCmd(null, argv);
+                    warn("stderr:{}\n", result.stderr);
+                    std.debug.panic("command failed");
+                }
+                return result.stdout;
+            },
+            else => {
+                warn("The following command terminated unexpectedly:\n");
+                printCmd(null, argv);
+                warn("stderr:{}\n", result.stderr);
+                std.debug.panic("command failed");
+            },
         }
     }
 };
@@ -755,6 +811,7 @@ pub const LibExeObjStep = struct {
     is_zig: bool,
     cflags: ArrayList([]const u8),
     include_dirs: ArrayList([]const u8),
+    lib_paths: ArrayList([]const u8),
     disable_libc: bool,
     frameworks: BufSet,
 
@@ -844,7 +901,7 @@ pub const LibExeObjStep = struct {
             .kind = kind,
             .root_src = root_src,
             .name = name,
-            .target = Target { .Native = {} },
+            .target = Target.Native,
             .linker_script = null,
             .link_libs = BufSet.init(builder.allocator),
             .frameworks = BufSet.init(builder.allocator),
@@ -865,6 +922,7 @@ pub const LibExeObjStep = struct {
             .cflags = ArrayList([]const u8).init(builder.allocator),
             .source_files = undefined,
             .include_dirs = ArrayList([]const u8).init(builder.allocator),
+            .lib_paths = ArrayList([]const u8).init(builder.allocator),
             .object_src = undefined,
             .disable_libc = true,
         };
@@ -879,7 +937,7 @@ pub const LibExeObjStep = struct {
             .kind = kind,
             .version = *version,
             .static = static,
-            .target = Target { .Native = {} },
+            .target = Target.Native,
             .cflags = ArrayList([]const u8).init(builder.allocator),
             .source_files = ArrayList([]const u8).init(builder.allocator),
             .object_files = ArrayList([]const u8).init(builder.allocator),
@@ -888,6 +946,7 @@ pub const LibExeObjStep = struct {
             .frameworks = BufSet.init(builder.allocator),
             .full_path_libs = ArrayList([]const u8).init(builder.allocator),
             .include_dirs = ArrayList([]const u8).init(builder.allocator),
+            .lib_paths = ArrayList([]const u8).init(builder.allocator),
             .output_path = null,
             .out_filename = undefined,
             .major_only_filename = undefined,
@@ -1018,11 +1077,10 @@ pub const LibExeObjStep = struct {
     }
 
     pub fn getOutputPath(self: &LibExeObjStep) -> []const u8 {
-        if (self.output_path) |output_path| {
+        return if (self.output_path) |output_path|
             output_path
-        } else {
-            %%os.path.join(self.builder.allocator, self.builder.cache_root, self.out_filename)
-        }
+        else
+            %%os.path.join(self.builder.allocator, self.builder.cache_root, self.out_filename);
     }
 
     pub fn setOutputHPath(self: &LibExeObjStep, file_path: []const u8) {
@@ -1035,11 +1093,10 @@ pub const LibExeObjStep = struct {
     }
 
     pub fn getOutputHPath(self: &LibExeObjStep) -> []const u8 {
-        if (self.output_h_path) |output_h_path| {
+        return if (self.output_h_path) |output_h_path|
             output_h_path
-        } else {
-            %%os.path.join(self.builder.allocator, self.builder.cache_root, self.out_h_filename)
-        }
+        else
+            %%os.path.join(self.builder.allocator, self.builder.cache_root, self.out_h_filename);
     }
 
     pub fn addAssemblyFile(self: &LibExeObjStep, path: []const u8) {
@@ -1069,11 +1126,14 @@ pub const LibExeObjStep = struct {
         %%self.include_dirs.append(self.builder.cache_root);
     }
 
-    // TODO put include_dirs in zig command line
     pub fn addIncludeDir(self: &LibExeObjStep, path: []const u8) {
         %%self.include_dirs.append(path);
     }
 
+    pub fn addLibPath(self: &LibExeObjStep, path: []const u8) {
+        %%self.lib_paths.append(path);
+    }
+
     pub fn addPackagePath(self: &LibExeObjStep, name: []const u8, pkg_index_path: []const u8) {
         assert(self.is_zig);
 
@@ -1222,6 +1282,11 @@ pub const LibExeObjStep = struct {
             %%zig_args.append("--pkg-end");
         }
 
+        for (self.include_dirs.toSliceConst()) |include_path| {
+            %%zig_args.append("-isystem");
+            %%zig_args.append(self.builder.pathFromRoot(include_path));
+        }
+
         for (builder.include_paths.toSliceConst()) |include_path| {
             %%zig_args.append("-isystem");
             %%zig_args.append(builder.pathFromRoot(include_path));
@@ -1232,6 +1297,11 @@ pub const LibExeObjStep = struct {
             %%zig_args.append(rpath);
         }
 
+        for (self.lib_paths.toSliceConst()) |lib_path| {
+            %%zig_args.append("--library-path");
+            %%zig_args.append(lib_path);
+        }
+
         for (builder.lib_paths.toSliceConst()) |lib_path| {
             %%zig_args.append("--library-path");
             %%zig_args.append(lib_path);
@@ -1544,7 +1614,7 @@ pub const TestStep = struct {
 
     pub fn init(builder: &Builder, root_src: []const u8) -> TestStep {
         const step_name = builder.fmt("test {}", root_src);
-        TestStep {
+        return TestStep {
             .step = Step.init(step_name, builder.allocator, make),
             .builder = builder,
             .root_src = root_src,
@@ -1555,7 +1625,7 @@ pub const TestStep = struct {
             .link_libs = BufSet.init(builder.allocator),
             .target = Target { .Native = {} },
             .exec_cmd_args = null,
-        }
+        };
     }
 
     pub fn setVerbose(self: &TestStep, value: bool) {
@@ -1862,16 +1932,16 @@ pub const Step = struct {
     done_flag: bool,
 
     pub fn init(name: []const u8, allocator: &Allocator, makeFn: fn (&Step)->%void) -> Step {
-        Step {
+        return Step {
             .name = name,
             .makeFn = makeFn,
             .dependencies = ArrayList(&Step).init(allocator),
             .loop_flag = false,
             .done_flag = false,
-        }
+        };
     }
     pub fn initNoOp(name: []const u8, allocator: &Allocator) -> Step {
-        init(name, allocator, makeNoOp)
+        return init(name, allocator, makeNoOp);
     }
 
     pub fn make(self: &Step) -> %void {
diff --git a/std/c/index.zig b/std/c/index.zig
index 2ac867ee71..e04b990633 100644
--- a/std/c/index.zig
+++ b/std/c/index.zig
@@ -48,3 +48,4 @@ pub extern "c" fn setregid(rgid: c_uint, egid: c_uint) -> c_int;
 pub extern "c" fn malloc(usize) -> ?&c_void;
 pub extern "c" fn realloc(&c_void, usize) -> ?&c_void;
 pub extern "c" fn free(&c_void);
+pub extern "c" fn posix_memalign(memptr: &&c_void, alignment: usize, size: usize) -> c_int;
diff --git a/std/cstr.zig b/std/cstr.zig
index e29f90fc01..445f7ab892 100644
--- a/std/cstr.zig
+++ b/std/cstr.zig
@@ -17,7 +17,7 @@ pub fn cmp(a: &const u8, b: &const u8) -> i8 {
         return -1;
     } else {
         return 0;
-    };
+    }
 }
 
 pub fn toSliceConst(str: &const u8) -> []const u8 {
diff --git a/std/debug.zig b/std/debug.zig
index a2bea9eddd..a683b5ae15 100644
--- a/std/debug.zig
+++ b/std/debug.zig
@@ -32,7 +32,7 @@ fn getStderrStream() -> %&io.OutStream {
         const st = &stderr_file_out_stream.stream;
         stderr_stream = st;
         return st;
-    };
+    }
 }
 
 /// Tries to print a stack trace to stderr, unbuffered, and ignores any error returned.
@@ -52,9 +52,9 @@ pub fn assert(ok: bool) {
         // we insert an explicit call to @panic instead of unreachable.
         // TODO we should use `assertOrPanic` in tests and remove this logic.
         if (builtin.is_test) {
-            @panic("assertion failure")
+            @panic("assertion failure");
         } else {
-            unreachable // assertion failure
+            unreachable; // assertion failure
         }
     }
 }
@@ -96,8 +96,6 @@ const WHITE = "\x1b[37;1m";
 const DIM = "\x1b[2m";
 const RESET = "\x1b[0m";
 
-pub var user_main_fn: ?fn() -> %void = null;
-
 error PathNotFound;
 error InvalidDebugInfo;
 
@@ -113,6 +111,7 @@ pub fn writeStackTrace(out_stream: &io.OutStream, allocator: &mem.Allocator, tty
                 .debug_abbrev = undefined,
                 .debug_str = undefined,
                 .debug_line = undefined,
+                .debug_ranges = null,
                 .abbrev_table_list = ArrayList(AbbrevTableHeader).init(allocator),
                 .compile_unit_list = ArrayList(CompileUnit).init(allocator),
             };
@@ -127,6 +126,7 @@ pub fn writeStackTrace(out_stream: &io.OutStream, allocator: &mem.Allocator, tty
             st.debug_abbrev = (%return st.elf.findSection(".debug_abbrev")) ?? return error.MissingDebugInfo;
             st.debug_str = (%return st.elf.findSection(".debug_str")) ?? return error.MissingDebugInfo;
             st.debug_line = (%return st.elf.findSection(".debug_line")) ?? return error.MissingDebugInfo;
+            st.debug_ranges = (%return st.elf.findSection(".debug_ranges"));
             %return scanAllCompileUnits(st);
 
             var ignored_count: usize = 0;
@@ -144,7 +144,7 @@ pub fn writeStackTrace(out_stream: &io.OutStream, allocator: &mem.Allocator, tty
                 // at compile time. I'll call it issue #313
                 const ptr_hex = if (@sizeOf(usize) == 4) "0x{x8}" else "0x{x16}";
 
-                const compile_unit = findCompileUnit(st, return_address) ?? {
+                const compile_unit = findCompileUnit(st, return_address) %% {
                     %return out_stream.print("???:?:?: " ++ DIM ++ ptr_hex ++ " in ??? (???)" ++ RESET ++ "\n    ???\n\n",
                         return_address);
                     continue;
@@ -175,7 +175,7 @@ pub fn writeStackTrace(out_stream: &io.OutStream, allocator: &mem.Allocator, tty
                             return_address, compile_unit_name);
                     },
                     else => return err,
-                };
+                }
             }
         },
         builtin.ObjectFormat.coff => {
@@ -233,6 +233,7 @@ const ElfStackTrace = struct {
     debug_abbrev: &elf.SectionHeader,
     debug_str: &elf.SectionHeader,
     debug_line: &elf.SectionHeader,
+    debug_ranges: ?&elf.SectionHeader,
     abbrev_table_list: ArrayList(AbbrevTableHeader),
     compile_unit_list: ArrayList(CompileUnit),
 
@@ -333,6 +334,15 @@ const Die = struct {
         };
     }
 
+    fn getAttrSecOffset(self: &const Die, id: u64) -> %u64 {
+        const form_value = self.getAttr(id) ?? return error.MissingDebugInfo;
+        return switch (*form_value) {
+            FormValue.Const => |value| value.asUnsignedLe(),
+            FormValue.SecOffset => |value| value,
+            else => error.InvalidDebugInfo,
+        };
+    }
+
     fn getAttrUnsignedLe(self: &const Die, id: u64) -> %u64 {
         const form_value = self.getAttr(id) ?? return error.MissingDebugInfo;
         return switch (*form_value) {
@@ -347,7 +357,7 @@ const Die = struct {
             FormValue.String => |value| value,
             FormValue.StrPtr => |offset| getString(st, offset),
             else => error.InvalidDebugInfo,
-        }
+        };
     }
 };
 
@@ -393,7 +403,7 @@ const LineNumberProgram = struct {
     pub fn init(is_stmt: bool, include_dirs: []const []const u8,
         file_entries: &ArrayList(FileEntry), target_address: usize) -> LineNumberProgram
     {
-        LineNumberProgram {
+        return LineNumberProgram {
             .address = 0,
             .file = 1,
             .line = 1,
@@ -411,7 +421,7 @@ const LineNumberProgram = struct {
             .prev_is_stmt = undefined,
             .prev_basic_block = undefined,
             .prev_end_sequence = undefined,
-        }
+        };
     }
 
     pub fn checkLineMatch(self: &LineNumberProgram) -> %?LineInfo {
@@ -420,14 +430,11 @@ const LineNumberProgram = struct {
                 return error.MissingDebugInfo;
             } else if (self.prev_file - 1 >= self.file_entries.len) {
                 return error.InvalidDebugInfo;
-            } else {
-                &self.file_entries.items[self.prev_file - 1]
-            };
+            } else &self.file_entries.items[self.prev_file - 1];
+
             const dir_name = if (file_entry.dir_index >= self.include_dirs.len) {
                 return error.InvalidDebugInfo;
-            } else {
-                self.include_dirs[file_entry.dir_index]
-            };
+            } else self.include_dirs[file_entry.dir_index];
             const file_name = %return os.path.join(self.file_entries.allocator, dir_name, file_entry.file_name);
             %defer self.file_entries.allocator.free(file_name);
             return LineInfo {
@@ -484,28 +491,21 @@ fn parseFormValueBlock(allocator: &mem.Allocator, in_stream: &io.InStream, size:
 }
 
 fn parseFormValueConstant(allocator: &mem.Allocator, in_stream: &io.InStream, signed: bool, size: usize) -> %FormValue {
-    FormValue { .Const = Constant {
+    return FormValue { .Const = Constant {
         .signed = signed,
         .payload = %return readAllocBytes(allocator, in_stream, size),
-    }}
+    }};
 }
 
 fn parseFormValueDwarfOffsetSize(in_stream: &io.InStream, is_64: bool) -> %u64 {
-    return if (is_64) {
-        %return in_stream.readIntLe(u64)
-    } else {
-        u64(%return in_stream.readIntLe(u32))
-    };
+    return if (is_64) %return in_stream.readIntLe(u64)
+    else u64(%return in_stream.readIntLe(u32)) ;
 }
 
 fn parseFormValueTargetAddrSize(in_stream: &io.InStream) -> %u64 {
-    return if (@sizeOf(usize) == 4) {
-        u64(%return in_stream.readIntLe(u32))
-    } else if (@sizeOf(usize) == 8) {
-        %return in_stream.readIntLe(u64)
-    } else {
-        unreachable;
-    };
+    return if (@sizeOf(usize) == 4) u64(%return in_stream.readIntLe(u32))
+    else if (@sizeOf(usize) == 8) %return in_stream.readIntLe(u64)
+    else unreachable;
 }
 
 fn parseFormValueRefLen(allocator: &mem.Allocator, in_stream: &io.InStream, size: usize) -> %FormValue {
@@ -524,9 +524,9 @@ fn parseFormValue(allocator: &mem.Allocator, in_stream: &io.InStream, form_id: u
         DW.FORM_block1 => parseFormValueBlock(allocator, in_stream, 1),
         DW.FORM_block2 => parseFormValueBlock(allocator, in_stream, 2),
         DW.FORM_block4 => parseFormValueBlock(allocator, in_stream, 4),
-        DW.FORM_block => {
+        DW.FORM_block => x: {
             const block_len = %return readULeb128(in_stream);
-            parseFormValueBlockLen(allocator, in_stream, block_len)
+            return parseFormValueBlockLen(allocator, in_stream, block_len);
         },
         DW.FORM_data1 => parseFormValueConstant(allocator, in_stream, false, 1),
         DW.FORM_data2 => parseFormValueConstant(allocator, in_stream, false, 2),
@@ -535,7 +535,7 @@ fn parseFormValue(allocator: &mem.Allocator, in_stream: &io.InStream, form_id: u
         DW.FORM_udata, DW.FORM_sdata => {
             const block_len = %return readULeb128(in_stream);
             const signed = form_id == DW.FORM_sdata;
-            parseFormValueConstant(allocator, in_stream, signed, block_len)
+            return parseFormValueConstant(allocator, in_stream, signed, block_len);
         },
         DW.FORM_exprloc => {
             const size = %return readULeb128(in_stream);
@@ -552,7 +552,7 @@ fn parseFormValue(allocator: &mem.Allocator, in_stream: &io.InStream, form_id: u
         DW.FORM_ref8 => parseFormValueRef(allocator, in_stream, u64),
         DW.FORM_ref_udata => {
             const ref_len = %return readULeb128(in_stream);
-            parseFormValueRefLen(allocator, in_stream, ref_len)
+            return parseFormValueRefLen(allocator, in_stream, ref_len);
         },
 
         DW.FORM_ref_addr => FormValue { .RefAddr = %return parseFormValueDwarfOffsetSize(in_stream, is_64) },
@@ -562,10 +562,10 @@ fn parseFormValue(allocator: &mem.Allocator, in_stream: &io.InStream, form_id: u
         DW.FORM_strp => FormValue { .StrPtr = %return parseFormValueDwarfOffsetSize(in_stream, is_64) },
         DW.FORM_indirect => {
             const child_form_id = %return readULeb128(in_stream);
-            parseFormValue(allocator, in_stream, child_form_id, is_64)
+            return parseFormValue(allocator, in_stream, child_form_id, is_64);
         },
         else => error.InvalidDebugInfo,
-    }
+    };
 }
 
 fn parseAbbrevTable(st: &ElfStackTrace) -> %AbbrevTable {
@@ -842,11 +842,9 @@ fn scanAllCompileUnits(st: &ElfStackTrace) -> %void {
         const version = %return in_stream.readInt(st.elf.endian, u16);
         if (version < 2 or version > 5) return error.InvalidDebugInfo;
 
-        const debug_abbrev_offset = if (is_64) {
-            %return in_stream.readInt(st.elf.endian, u64)
-        } else {
-            %return in_stream.readInt(st.elf.endian, u32)
-        };
+        const debug_abbrev_offset =
+            if (is_64) %return in_stream.readInt(st.elf.endian, u64)
+            else %return in_stream.readInt(st.elf.endian, u32);
 
         const address_size = %return in_stream.readByte();
         if (address_size != @sizeOf(usize)) return error.InvalidDebugInfo;
@@ -862,28 +860,28 @@ fn scanAllCompileUnits(st: &ElfStackTrace) -> %void {
         if (compile_unit_die.tag_id != DW.TAG_compile_unit)
             return error.InvalidDebugInfo;
 
-        const pc_range = {
+        const pc_range = x: {
             if (compile_unit_die.getAttrAddr(DW.AT_low_pc)) |low_pc| {
                 if (compile_unit_die.getAttr(DW.AT_high_pc)) |high_pc_value| {
                     const pc_end = switch (*high_pc_value) {
                         FormValue.Address => |value| value,
-                        FormValue.Const => |value| {
+                        FormValue.Const => |value| b: {
                             const offset = %return value.asUnsignedLe();
-                            low_pc + offset
+                            break :b (low_pc + offset);
                         },
                         else => return error.InvalidDebugInfo,
                     };
-                    PcRange {
+                    break :x PcRange {
                         .start = low_pc,
                         .end = pc_end,
-                    }
+                    };
                 } else {
-                    null
+                    break :x null;
                 }
             } else |err| {
                 if (err != error.MissingDebugInfo)
                     return err;
-                null
+                break :x null;
             }
         };
 
@@ -900,25 +898,51 @@ fn scanAllCompileUnits(st: &ElfStackTrace) -> %void {
     }
 }
 
-fn findCompileUnit(st: &ElfStackTrace, target_address: u64) -> ?&const CompileUnit {
+fn findCompileUnit(st: &ElfStackTrace, target_address: u64) -> %&const CompileUnit {
+    var in_file_stream = io.FileInStream.init(&st.self_exe_file);
+    const in_stream = &in_file_stream.stream;
     for (st.compile_unit_list.toSlice()) |*compile_unit| {
         if (compile_unit.pc_range) |range| {
             if (target_address >= range.start and target_address < range.end)
                 return compile_unit;
         }
+        if (compile_unit.die.getAttrSecOffset(DW.AT_ranges)) |ranges_offset| {
+            var base_address: usize = 0;
+            if (st.debug_ranges) |debug_ranges| {
+                %return st.self_exe_file.seekTo(debug_ranges.offset + ranges_offset);
+                while (true) {
+                    const begin_addr = %return in_stream.readIntLe(usize);
+                    const end_addr = %return in_stream.readIntLe(usize);
+                    if (begin_addr == 0 and end_addr == 0) {
+                        break;
+                    }
+                    if (begin_addr == @maxValue(usize)) {
+                        base_address = begin_addr;
+                        continue;
+                    }
+                    if (target_address >= begin_addr and target_address < end_addr) {
+                        return compile_unit;
+                    }
+                }
+            }
+        } else |err| {
+            if (err != error.MissingDebugInfo)
+                return err;
+            continue;
+        }
     }
-    return null;
+    return error.MissingDebugInfo;
 }
 
 fn readInitialLength(in_stream: &io.InStream, is_64: &bool) -> %u64 {
     const first_32_bits = %return in_stream.readIntLe(u32);
     *is_64 = (first_32_bits == 0xffffffff);
-    return if (*is_64) {
-        %return in_stream.readIntLe(u64)
+    if (*is_64) {
+        return in_stream.readIntLe(u64);
     } else {
         if (first_32_bits >= 0xfffffff0) return error.InvalidDebugInfo;
-        u64(first_32_bits)
-    };
+        return u64(first_32_bits);
+    }
 }
 
 fn readULeb128(in_stream: &io.InStream) -> %u64 {
@@ -965,40 +989,62 @@ fn readILeb128(in_stream: &io.InStream) -> %i64 {
     }
 }
 
-pub const global_allocator = &global_allocator_state;
-var global_allocator_state = mem.Allocator {
-    .allocFn = globalAlloc,
-    .reallocFn = globalRealloc,
-    .freeFn = globalFree,
-};
+pub const global_allocator = &global_fixed_allocator.allocator;
+var global_fixed_allocator = mem.FixedBufferAllocator.init(global_allocator_mem[0..]);
+var global_allocator_mem: [100 * 1024]u8 = undefined;
 
-var some_mem: [100 * 1024]u8 = undefined;
-var some_mem_index: usize = 0;
-
-error OutOfMemory;
+/// Allocator that fails after N allocations, useful for making sure out of
+/// memory conditions are handled correctly.
+pub const FailingAllocator = struct {
+    allocator: mem.Allocator,
+    index: usize,
+    fail_index: usize,
+    internal_allocator: &mem.Allocator,
+    allocated_bytes: usize,
+
+    pub fn init(allocator: &mem.Allocator, fail_index: usize) -> FailingAllocator {
+        return FailingAllocator {
+            .internal_allocator = allocator,
+            .fail_index = fail_index,
+            .index = 0,
+            .allocated_bytes = 0,
+            .allocator = mem.Allocator {
+                .allocFn = alloc,
+                .reallocFn = realloc,
+                .freeFn = free,
+            },
+        };
+    }
 
-fn globalAlloc(self: &mem.Allocator, n: usize, alignment: usize) -> %[]u8 {
-    const addr = @ptrToInt(&some_mem[some_mem_index]);
-    const rem = @rem(addr, alignment);
-    const march_forward_bytes = if (rem == 0) 0 else (alignment - rem);
-    const adjusted_index = some_mem_index + march_forward_bytes;
-    const end_index = adjusted_index + n;
-    if (end_index > some_mem.len) {
-        return error.OutOfMemory;
+    fn alloc(allocator: &mem.Allocator, n: usize, alignment: u29) -> %[]u8 {
+        const self = @fieldParentPtr(FailingAllocator, "allocator", allocator);
+        if (self.index == self.fail_index) {
+            return error.OutOfMemory;
+        }
+        self.index += 1;
+        const result = %return self.internal_allocator.allocFn(self.internal_allocator, n, alignment);
+        self.allocated_bytes += result.len;
+        return result;
     }
-    const result = some_mem[adjusted_index .. end_index];
-    some_mem_index = end_index;
-    return result;
-}
 
-fn globalRealloc(self: &mem.Allocator, old_mem: []u8, new_size: usize, alignment: usize) -> %[]u8 {
-    if (new_size <= old_mem.len) {
-        return old_mem[0..new_size];
-    } else {
-        const result = %return globalAlloc(self, new_size, alignment);
-        @memcpy(result.ptr, old_mem.ptr, old_mem.len);
+    fn realloc(allocator: &mem.Allocator, old_mem: []u8, new_size: usize, alignment: u29) -> %[]u8 {
+        const self = @fieldParentPtr(FailingAllocator, "allocator", allocator);
+        if (new_size <= old_mem.len) {
+            self.allocated_bytes -= old_mem.len - new_size;
+            return self.internal_allocator.reallocFn(self.internal_allocator, old_mem, new_size, alignment);
+        }
+        if (self.index == self.fail_index) {
+            return error.OutOfMemory;
+        }
+        self.index += 1;
+        const result = %return self.internal_allocator.reallocFn(self.internal_allocator, old_mem, new_size, alignment);
+        self.allocated_bytes += new_size - old_mem.len;
         return result;
     }
-}
 
-fn globalFree(self: &mem.Allocator, memory: []u8) { }
+    fn free(allocator: &mem.Allocator, bytes: []u8) {
+        const self = @fieldParentPtr(FailingAllocator, "allocator", allocator);
+        self.allocated_bytes -= bytes.len;
+        return self.internal_allocator.freeFn(self.internal_allocator, bytes);
+    }
+};
diff --git a/std/elf.zig b/std/elf.zig
index f7be236d15..60b0119894 100644
--- a/std/elf.zig
+++ b/std/elf.zig
@@ -188,39 +188,39 @@ pub const Elf = struct {
         if (elf.is_64) {
             if (sh_entry_size != 64) return error.InvalidFormat;
 
-            for (elf.section_headers) |*section| {
-                section.name         = %return in.readInt(elf.endian, u32);
-                section.sh_type      = %return in.readInt(elf.endian, u32);
-                section.flags        = %return in.readInt(elf.endian, u64);
-                section.addr         = %return in.readInt(elf.endian, u64);
-                section.offset       = %return in.readInt(elf.endian, u64);
-                section.size         = %return in.readInt(elf.endian, u64);
-                section.link         = %return in.readInt(elf.endian, u32);
-                section.info         = %return in.readInt(elf.endian, u32);
-                section.addr_align   = %return in.readInt(elf.endian, u64);
-                section.ent_size     = %return in.readInt(elf.endian, u64);
+            for (elf.section_headers) |*elf_section| {
+                elf_section.name         = %return in.readInt(elf.endian, u32);
+                elf_section.sh_type      = %return in.readInt(elf.endian, u32);
+                elf_section.flags        = %return in.readInt(elf.endian, u64);
+                elf_section.addr         = %return in.readInt(elf.endian, u64);
+                elf_section.offset       = %return in.readInt(elf.endian, u64);
+                elf_section.size         = %return in.readInt(elf.endian, u64);
+                elf_section.link         = %return in.readInt(elf.endian, u32);
+                elf_section.info         = %return in.readInt(elf.endian, u32);
+                elf_section.addr_align   = %return in.readInt(elf.endian, u64);
+                elf_section.ent_size     = %return in.readInt(elf.endian, u64);
             }
         } else {
             if (sh_entry_size != 40) return error.InvalidFormat;
 
-            for (elf.section_headers) |*section| {
+            for (elf.section_headers) |*elf_section| {
                 // TODO (multiple occurences) allow implicit cast from %u32 -> %u64 ?
-                section.name = %return in.readInt(elf.endian, u32);
-                section.sh_type = %return in.readInt(elf.endian, u32);
-                section.flags = u64(%return in.readInt(elf.endian, u32));
-                section.addr = u64(%return in.readInt(elf.endian, u32));
-                section.offset = u64(%return in.readInt(elf.endian, u32));
-                section.size = u64(%return in.readInt(elf.endian, u32));
-                section.link = %return in.readInt(elf.endian, u32);
-                section.info = %return in.readInt(elf.endian, u32);
-                section.addr_align = u64(%return in.readInt(elf.endian, u32));
-                section.ent_size = u64(%return in.readInt(elf.endian, u32));
+                elf_section.name = %return in.readInt(elf.endian, u32);
+                elf_section.sh_type = %return in.readInt(elf.endian, u32);
+                elf_section.flags = u64(%return in.readInt(elf.endian, u32));
+                elf_section.addr = u64(%return in.readInt(elf.endian, u32));
+                elf_section.offset = u64(%return in.readInt(elf.endian, u32));
+                elf_section.size = u64(%return in.readInt(elf.endian, u32));
+                elf_section.link = %return in.readInt(elf.endian, u32);
+                elf_section.info = %return in.readInt(elf.endian, u32);
+                elf_section.addr_align = u64(%return in.readInt(elf.endian, u32));
+                elf_section.ent_size = u64(%return in.readInt(elf.endian, u32));
             }
         }
 
-        for (elf.section_headers) |*section| {
-            if (section.sh_type != SHT_NOBITS) {
-                const file_end_offset = %return math.add(u64, section.offset, section.size);
+        for (elf.section_headers) |*elf_section| {
+            if (elf_section.sh_type != SHT_NOBITS) {
+                const file_end_offset = %return math.add(u64, elf_section.offset, elf_section.size);
                 if (stream_end < file_end_offset) return error.InvalidFormat;
             }
         }
@@ -243,29 +243,27 @@ pub const Elf = struct {
         var file_stream = io.FileInStream.init(elf.in_file);
         const in = &file_stream.stream;
 
-        for (elf.section_headers) |*section| {
-            if (section.sh_type == SHT_NULL) continue;
+        section_loop: for (elf.section_headers) |*elf_section| {
+            if (elf_section.sh_type == SHT_NULL) continue;
 
-            const name_offset = elf.string_section.offset + section.name;
+            const name_offset = elf.string_section.offset + elf_section.name;
             %return elf.in_file.seekTo(name_offset);
 
             for (name) |expected_c| {
                 const target_c = %return in.readByte();
-                if (target_c == 0 or expected_c != target_c) goto next_section;
+                if (target_c == 0 or expected_c != target_c) continue :section_loop;
             }
 
             {
                 const null_byte = %return in.readByte();
-                if (null_byte == 0) return section;
+                if (null_byte == 0) return elf_section;
             }
-
-            next_section:
         }
 
         return null;
     }
 
-    pub fn seekToSection(elf: &Elf, section: &SectionHeader) -> %void {
-        %return elf.in_file.seekTo(section.offset);
+    pub fn seekToSection(elf: &Elf, elf_section: &SectionHeader) -> %void {
+        %return elf.in_file.seekTo(elf_section.offset);
     }
 };
diff --git a/std/endian.zig b/std/endian.zig
index 2dc6b8d34e..9f2d2c8dcd 100644
--- a/std/endian.zig
+++ b/std/endian.zig
@@ -2,15 +2,15 @@ const mem = @import("mem.zig");
 const builtin = @import("builtin");
 
 pub fn swapIfLe(comptime T: type, x: T) -> T {
-    swapIf(false, T, x)
+    return swapIf(false, T, x);
 }
 
 pub fn swapIfBe(comptime T: type, x: T) -> T {
-    swapIf(true, T, x)
+    return swapIf(true, T, x);
 }
 
 pub fn swapIf(endian: builtin.Endian, comptime T: type, x: T) -> T {
-    if (builtin.endian == endian) swap(T, x) else x
+    return if (builtin.endian == endian) swap(T, x) else x;
 }
 
 pub fn swap(comptime T: type, x: T) -> T {
diff --git a/std/fmt/errol/enum3.zig b/std/fmt/errol/enum3.zig
index 93861cce74..feb84da9a4 100644
--- a/std/fmt/errol/enum3.zig
+++ b/std/fmt/errol/enum3.zig
@@ -439,10 +439,10 @@ const Slab = struct {
 };
 
 fn slab(str: []const u8, exp: i32) -> Slab {
-    Slab {
+    return Slab {
         .str = str,
         .exp = exp,
-    }
+    };
 }
 
 pub const enum3_data = []Slab {
diff --git a/std/fmt/index.zig b/std/fmt/index.zig
index 59376e0458..fef968a1d5 100644
--- a/std/fmt/index.zig
+++ b/std/fmt/index.zig
@@ -251,11 +251,10 @@ pub fn formatFloat(value: var, context: var, output: fn(@typeOf(context), []cons
     %return output(context, float_decimal.digits[0..1]);
     %return output(context, ".");
     if (float_decimal.digits.len > 1) {
-        const num_digits = if (@typeOf(value) == f32) {
+        const num_digits = if (@typeOf(value) == f32)
             math.min(usize(9), float_decimal.digits.len)
-        } else {
-            float_decimal.digits.len
-        };
+        else
+            float_decimal.digits.len;
         %return output(context, float_decimal.digits[1 .. num_digits]);
     } else {
         %return output(context, "0");
@@ -372,6 +371,10 @@ test "fmt.parseInt" {
     assert(%%parseInt(i32, "-10", 10) == -10);
     assert(%%parseInt(i32, "+10", 10) == 10);
     assert(if (parseInt(i32, " 10", 10)) |_| false else |err| err == error.InvalidChar);
+    assert(if (parseInt(i32, "10 ", 10)) |_| false else |err| err == error.InvalidChar);
+    assert(if (parseInt(u32, "-10", 10)) |_| false else |err| err == error.InvalidChar);
+    assert(%%parseInt(u8, "255", 10) == 255);
+    assert(if (parseInt(u8, "256", 10)) |_| false else |err| err == error.Overflow);
 }
 
 pub fn parseUnsigned(comptime T: type, buf: []const u8, radix: u8) -> %T {
@@ -413,14 +416,16 @@ const BufPrintContext = struct {
     remaining: []u8,
 };
 
+error BufferTooSmall;
 fn bufPrintWrite(context: &BufPrintContext, bytes: []const u8) -> %void {
+    if (context.remaining.len < bytes.len) return error.BufferTooSmall;
     mem.copy(u8, context.remaining, bytes);
     context.remaining = context.remaining[bytes.len..];
 }
 
-pub fn bufPrint(buf: []u8, comptime fmt: []const u8, args: ...) -> []u8 {
+pub fn bufPrint(buf: []u8, comptime fmt: []const u8, args: ...) -> %[]u8 {
     var context = BufPrintContext { .remaining = buf, };
-    %%format(&context, bufPrintWrite, fmt, args);
+    %return format(&context, bufPrintWrite, fmt, args);
     return buf[0..buf.len - context.remaining.len];
 }
 
@@ -476,31 +481,31 @@ test "fmt.format" {
     {
         var buf1: [32]u8 = undefined;
         const value: ?i32 = 1234;
-        const result = bufPrint(buf1[0..], "nullable: {}\n", value);
+        const result = %%bufPrint(buf1[0..], "nullable: {}\n", value);
         assert(mem.eql(u8, result, "nullable: 1234\n"));
     }
     {
         var buf1: [32]u8 = undefined;
         const value: ?i32 = null;
-        const result = bufPrint(buf1[0..], "nullable: {}\n", value);
+        const result = %%bufPrint(buf1[0..], "nullable: {}\n", value);
         assert(mem.eql(u8, result, "nullable: null\n"));
     }
     {
         var buf1: [32]u8 = undefined;
         const value: %i32 = 1234;
-        const result = bufPrint(buf1[0..], "error union: {}\n", value);
+        const result = %%bufPrint(buf1[0..], "error union: {}\n", value);
         assert(mem.eql(u8, result, "error union: 1234\n"));
     }
     {
         var buf1: [32]u8 = undefined;
         const value: %i32 = error.InvalidChar;
-        const result = bufPrint(buf1[0..], "error union: {}\n", value);
+        const result = %%bufPrint(buf1[0..], "error union: {}\n", value);
         assert(mem.eql(u8, result, "error union: error.InvalidChar\n"));
     }
     {
         var buf1: [32]u8 = undefined;
         const value: u3 = 0b101;
-        const result = bufPrint(buf1[0..], "u3: {}\n", value);
+        const result = %%bufPrint(buf1[0..], "u3: {}\n", value);
         assert(mem.eql(u8, result, "u3: 5\n"));
     }
 
@@ -510,28 +515,28 @@ test "fmt.format" {
         {
             var buf1: [32]u8 = undefined;
             const value: f32 = 12.34;
-            const result = bufPrint(buf1[0..], "f32: {}\n", value);
+            const result = %%bufPrint(buf1[0..], "f32: {}\n", value);
             assert(mem.eql(u8, result, "f32: 1.23400001e1\n"));
         }
         {
             var buf1: [32]u8 = undefined;
             const value: f64 = -12.34e10;
-            const result = bufPrint(buf1[0..], "f64: {}\n", value);
+            const result = %%bufPrint(buf1[0..], "f64: {}\n", value);
             assert(mem.eql(u8, result, "f64: -1.234e11\n"));
         }
         {
             var buf1: [32]u8 = undefined;
-            const result = bufPrint(buf1[0..], "f64: {}\n", math.nan_f64);
+            const result = %%bufPrint(buf1[0..], "f64: {}\n", math.nan_f64);
             assert(mem.eql(u8, result, "f64: NaN\n"));
         }
         {
             var buf1: [32]u8 = undefined;
-            const result = bufPrint(buf1[0..], "f64: {}\n", math.inf_f64);
+            const result = %%bufPrint(buf1[0..], "f64: {}\n", math.inf_f64);
             assert(mem.eql(u8, result, "f64: Infinity\n"));
         }
         {
             var buf1: [32]u8 = undefined;
-            const result = bufPrint(buf1[0..], "f64: {}\n", -math.inf_f64);
+            const result = %%bufPrint(buf1[0..], "f64: {}\n", -math.inf_f64);
             assert(mem.eql(u8, result, "f64: -Infinity\n"));
         }
     }
diff --git a/std/hash_map.zig b/std/hash_map.zig
index d124d7b573..837ee07423 100644
--- a/std/hash_map.zig
+++ b/std/hash_map.zig
@@ -12,7 +12,7 @@ pub fn HashMap(comptime K: type, comptime V: type,
     comptime hash: fn(key: K)->u32,
     comptime eql: fn(a: K, b: K)->bool) -> type
 {
-    struct {
+    return struct {
         entries: []Entry,
         size: usize,
         max_distance_from_start_index: usize,
@@ -51,19 +51,19 @@ pub fn HashMap(comptime K: type, comptime V: type,
                         return entry;
                     }
                 }
-                unreachable // no next item
+                unreachable; // no next item
             }
         };
 
         pub fn init(allocator: &Allocator) -> Self {
-            Self {
+            return Self {
                 .entries = []Entry{},
                 .allocator = allocator,
                 .size = 0,
                 .max_distance_from_start_index = 0,
                 // it doesn't actually matter what we set this to since we use wrapping integer arithmetic
                 .modification_count = undefined,
-            }
+            };
         }
 
         pub fn deinit(hm: &Self) {
@@ -133,7 +133,7 @@ pub fn HashMap(comptime K: type, comptime V: type,
                     entry.distance_from_start_index -= 1;
                     entry = next_entry;
                 }
-                unreachable // shifting everything in the table
+                unreachable; // shifting everything in the table
             }}
             return null;
         }
@@ -169,7 +169,7 @@ pub fn HashMap(comptime K: type, comptime V: type,
             const start_index = hm.keyToIndex(key);
             var roll_over: usize = 0;
             var distance_from_start_index: usize = 0;
-            while (roll_over < hm.entries.len) : ({roll_over += 1; distance_from_start_index += 1}) {
+            while (roll_over < hm.entries.len) : ({roll_over += 1; distance_from_start_index += 1;}) {
                 const index = (start_index + roll_over) % hm.entries.len;
                 const entry = &hm.entries[index];
 
@@ -210,7 +210,7 @@ pub fn HashMap(comptime K: type, comptime V: type,
                 };
                 return result;
             }
-            unreachable // put into a full map
+            unreachable; // put into a full map
         }
 
         fn internalGet(hm: &Self, key: K) -> ?&Entry {
@@ -228,7 +228,7 @@ pub fn HashMap(comptime K: type, comptime V: type,
         fn keyToIndex(hm: &Self, key: K) -> usize {
             return usize(hash(key)) % hm.entries.len;
         }
-    }
+    };
 }
 
 test "basicHashMapTest" {
@@ -251,9 +251,9 @@ test "basicHashMapTest" {
 }
 
 fn hash_i32(x: i32) -> u32 {
-    @bitCast(u32, x)
+    return @bitCast(u32, x);
 }
 
 fn eql_i32(a: i32, b: i32) -> bool {
-    a == b
+    return a == b;
 }
diff --git a/std/heap.zig b/std/heap.zig
index d0bf8ab871..ec447c1aa8 100644
--- a/std/heap.zig
+++ b/std/heap.zig
@@ -10,30 +10,28 @@ const Allocator = mem.Allocator;
 
 error OutOfMemory;
 
-pub var c_allocator = Allocator {
+pub const c_allocator = &c_allocator_state;
+var c_allocator_state = Allocator {
     .allocFn = cAlloc,
     .reallocFn = cRealloc,
     .freeFn = cFree,
 };
 
-fn cAlloc(self: &Allocator, n: usize, alignment: usize) -> %[]u8 {
-    if (c.malloc(usize(n))) |buf| {
+fn cAlloc(self: &Allocator, n: usize, alignment: u29) -> %[]u8 {
+    return if (c.malloc(usize(n))) |buf|
         @ptrCast(&u8, buf)[0..n]
-    } else {
-        error.OutOfMemory
-    }
+    else
+        error.OutOfMemory;
 }
 
-fn cRealloc(self: &Allocator, old_mem: []u8, new_size: usize, alignment: usize) -> %[]u8 {
-    if (new_size <= old_mem.len) {
-        old_mem[0..new_size]
+fn cRealloc(self: &Allocator, old_mem: []u8, new_size: usize, alignment: u29) -> %[]u8 {
+    const old_ptr = @ptrCast(&c_void, old_mem.ptr);
+    if (c.realloc(old_ptr, new_size)) |buf| {
+        return @ptrCast(&u8, buf)[0..new_size];
+    } else if (new_size <= old_mem.len) {
+        return old_mem[0..new_size];
     } else {
-        const old_ptr = @ptrCast(&c_void, old_mem.ptr);
-        if (c.realloc(old_ptr, usize(new_size))) |buf| {
-            @ptrCast(&u8, buf)[0..new_size]
-        } else {
-            error.OutOfMemory
-        }
+        return error.OutOfMemory;
     }
 }
 
@@ -106,7 +104,7 @@ pub const IncrementingAllocator = struct {
         return self.bytes.len - self.end_index;
     }
 
-    fn alloc(allocator: &Allocator, n: usize, alignment: usize) -> %[]u8 {
+    fn alloc(allocator: &Allocator, n: usize, alignment: u29) -> %[]u8 {
         const self = @fieldParentPtr(IncrementingAllocator, "allocator", allocator);
         const addr = @ptrToInt(&self.bytes[self.end_index]);
         const rem = @rem(addr, alignment);
@@ -121,7 +119,7 @@ pub const IncrementingAllocator = struct {
         return result;
     }
 
-    fn realloc(allocator: &Allocator, old_mem: []u8, new_size: usize, alignment: usize) -> %[]u8 {
+    fn realloc(allocator: &Allocator, old_mem: []u8, new_size: usize, alignment: u29) -> %[]u8 {
         if (new_size <= old_mem.len) {
             return old_mem[0..new_size];
         } else {
diff --git a/std/index.zig b/std/index.zig
index 8033eee142..323eee203e 100644
--- a/std/index.zig
+++ b/std/index.zig
@@ -1,7 +1,9 @@
 pub const ArrayList = @import("array_list.zig").ArrayList;
+pub const AlignedArrayList = @import("array_list.zig").AlignedArrayList;
 pub const BufMap = @import("buf_map.zig").BufMap;
 pub const BufSet = @import("buf_set.zig").BufSet;
 pub const Buffer = @import("buffer.zig").Buffer;
+pub const BufferOutStream = @import("buffer.zig").BufferOutStream;
 pub const HashMap = @import("hash_map.zig").HashMap;
 pub const LinkedList = @import("linked_list.zig").LinkedList;
 
@@ -26,12 +28,12 @@ pub const sort = @import("sort.zig");
 
 test "std" {
     // run tests from these
-    _ = @import("array_list.zig").ArrayList;
-    _ = @import("buf_map.zig").BufMap;
-    _ = @import("buf_set.zig").BufSet;
-    _ = @import("buffer.zig").Buffer;
-    _ = @import("hash_map.zig").HashMap;
-    _ = @import("linked_list.zig").LinkedList;
+    _ = @import("array_list.zig");
+    _ = @import("buf_map.zig");
+    _ = @import("buf_set.zig");
+    _ = @import("buffer.zig");
+    _ = @import("hash_map.zig");
+    _ = @import("linked_list.zig");
 
     _ = @import("base64.zig");
     _ = @import("build.zig");
diff --git a/std/io.zig b/std/io.zig
index c86ebed326..cbf2e0c216 100644
--- a/std/io.zig
+++ b/std/io.zig
@@ -50,35 +50,32 @@ error Unseekable;
 error EndOfFile;
 
 pub fn getStdErr() -> %File {
-    const handle = if (is_windows) {
+    const handle = if (is_windows)
         %return os.windowsGetStdHandle(system.STD_ERROR_HANDLE)
-    } else if (is_posix) {
+    else if (is_posix)
         system.STDERR_FILENO
-    } else {
-        unreachable
-    };
+    else
+        unreachable;
     return File.openHandle(handle);
 }
 
 pub fn getStdOut() -> %File {
-    const handle = if (is_windows) {
+    const handle = if (is_windows)
         %return os.windowsGetStdHandle(system.STD_OUTPUT_HANDLE)
-    } else if (is_posix) {
+    else if (is_posix)
         system.STDOUT_FILENO
-    } else {
-        unreachable
-    };
+    else
+        unreachable;
     return File.openHandle(handle);
 }
 
 pub fn getStdIn() -> %File {
-    const handle = if (is_windows) {
+    const handle = if (is_windows)
         %return os.windowsGetStdHandle(system.STD_INPUT_HANDLE)
-    } else if (is_posix) {
+    else if (is_posix)
         system.STDIN_FILENO
-    } else {
-        unreachable
-    };
+    else
+        unreachable;
     return File.openHandle(handle);
 }
 
@@ -261,7 +258,7 @@ pub const File = struct {
                     system.EBADF => error.BadFd,
                     system.ENOMEM => error.SystemResources,
                     else => os.unexpectedErrorPosix(err),
-                }
+                };
             }
 
             return usize(stat.size);
@@ -481,6 +478,14 @@ pub const OutStream = struct {
         const slice = (&byte)[0..1];
         return self.writeFn(self, slice);
     }
+
+    pub fn writeByteNTimes(self: &OutStream, byte: u8, n: usize) -> %void {
+        const slice = (&byte)[0..1];
+        var i: usize = 0;
+        while (i < n) : (i += 1) {
+            %return self.writeFn(self, slice);
+        }
+    }
 };
 
 /// `path` may need to be copied in memory to add a null terminating byte. In this case
@@ -493,6 +498,20 @@ pub fn writeFile(path: []const u8, data: []const u8, allocator: ?&mem.Allocator)
     %return file.write(data);
 }
 
+/// On success, caller owns returned buffer.
+pub fn readFileAlloc(path: []const u8, allocator: &mem.Allocator) -> %[]u8 {
+    var file = %return File.openRead(path, allocator);
+    defer file.close();
+
+    const size = %return file.getEndPos();
+    const buf = %return allocator.alloc(u8, size);
+    %defer allocator.free(buf);
+
+    var adapter = FileInStream.init(&file);
+    %return adapter.stream.readNoEof(buf);
+    return buf;
+}
+
 pub const BufferedInStream = BufferedInStreamCustom(os.page_size);
 
 pub fn BufferedInStreamCustom(comptime buffer_size: usize) -> type {
@@ -619,3 +638,24 @@ pub fn BufferedOutStreamCustom(comptime buffer_size: usize) -> type {
         }
     };
 }
+
+/// Implementation of OutStream trait for Buffer
+pub const BufferOutStream = struct {
+    buffer: &Buffer,
+    stream: OutStream,
+
+    pub fn init(buffer: &Buffer) -> BufferOutStream {
+        return BufferOutStream {
+            .buffer = buffer,
+            .stream = OutStream {
+                .writeFn = writeFn,
+            },
+        };
+    }
+
+    fn writeFn(out_stream: &OutStream, bytes: []const u8) -> %void {
+        const self = @fieldParentPtr(BufferOutStream, "stream", out_stream);
+        return self.buffer.append(bytes);
+    }
+};
+
diff --git a/std/linked_list.zig b/std/linked_list.zig
index cbcef793de..f4b6d274c9 100644
--- a/std/linked_list.zig
+++ b/std/linked_list.zig
@@ -5,7 +5,7 @@ const Allocator = mem.Allocator;
 
 /// Generic doubly linked list.
 pub fn LinkedList(comptime T: type) -> type {
-    struct {
+    return struct {
         const Self = this;
 
         /// Node inside the linked list wrapping the actual data.
@@ -15,11 +15,11 @@ pub fn LinkedList(comptime T: type) -> type {
             data: T,
 
             pub fn init(data: &const T) -> Node {
-                Node {
+                return Node {
                     .prev = null,
                     .next = null,
                     .data = *data,
-                }
+                };
             }
         };
 
@@ -32,11 +32,11 @@ pub fn LinkedList(comptime T: type) -> type {
         /// Returns:
         ///     An empty linked list.
         pub fn init() -> Self {
-            Self {
+            return Self {
                 .first = null,
                 .last  = null,
                 .len   = 0,
-            }
+            };
         }
 
         /// Insert a new node after an existing one.
@@ -166,7 +166,7 @@ pub fn LinkedList(comptime T: type) -> type {
         /// Returns:
         ///     A pointer to the new node.
         pub fn allocateNode(list: &Self, allocator: &Allocator) -> %&Node {
-            allocator.create(Node)
+            return allocator.create(Node);
         }
 
         /// Deallocate a node.
@@ -191,7 +191,7 @@ pub fn LinkedList(comptime T: type) -> type {
             *node = Node.init(data);
             return node;
         }
-    }
+    };
 }
 
 test "basic linked list test" {
diff --git a/std/math/acos.zig b/std/math/acos.zig
index 941643db82..7690497da5 100644
--- a/std/math/acos.zig
+++ b/std/math/acos.zig
@@ -7,11 +7,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn acos(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(acos32, x),
-        f64 => @inlineCall(acos64, x),
+    return switch (T) {
+        f32 => acos32(x),
+        f64 => acos64(x),
         else => @compileError("acos not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn r32(z: f32) -> f32 {
@@ -22,7 +22,7 @@ fn r32(z: f32) -> f32 {
 
     const p = z * (pS0 + z * (pS1 + z * pS2));
     const q = 1.0 + z * qS1;
-    p / q
+    return p / q;
 }
 
 fn acos32(x: f32) -> f32 {
@@ -69,7 +69,7 @@ fn acos32(x: f32) -> f32 {
     const df = @bitCast(f32, jx & 0xFFFFF000);
     const c = (z - df * df) / (s + df);
     const w = r32(z) * s + c;
-    2 * (df + w)
+    return 2 * (df + w);
 }
 
 fn r64(z: f64) -> f64 {
@@ -86,7 +86,7 @@ fn r64(z: f64) -> f64 {
 
     const p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
     const q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
-    p / q
+    return p / q;
 }
 
 fn acos64(x: f64) -> f64 {
@@ -138,7 +138,7 @@ fn acos64(x: f64) -> f64 {
     const df = @bitCast(f64, jx & 0xFFFFFFFF00000000);
     const c = (z - df * df) / (s + df);
     const w = r64(z) * s + c;
-    2 * (df + w)
+    return 2 * (df + w);
 }
 
 test "math.acos" {
diff --git a/std/math/acosh.zig b/std/math/acosh.zig
index 9f43edeb5d..3b2c2c0f31 100644
--- a/std/math/acosh.zig
+++ b/std/math/acosh.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn acosh(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(acosh32, x),
-        f64 => @inlineCall(acosh64, x),
+    return switch (T) {
+        f32 => acosh32(x),
+        f64 => acosh64(x),
         else => @compileError("acosh not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // acosh(x) = log(x + sqrt(x * x - 1))
@@ -23,15 +23,15 @@ fn acosh32(x: f32) -> f32 {
 
     // |x| < 2, invalid if x < 1 or nan
     if (i < 0x3F800000 + (1 << 23)) {
-        math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)))
+        return math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
     }
     // |x| < 0x1p12
     else if (i < 0x3F800000 + (12 << 23)) {
-        math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)))
+        return math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)));
     }
     // |x| >= 0x1p12
     else {
-        math.ln(x) + 0.693147180559945309417232121458176568
+        return math.ln(x) + 0.693147180559945309417232121458176568;
     }
 }
 
@@ -41,15 +41,15 @@ fn acosh64(x: f64) -> f64 {
 
     // |x| < 2, invalid if x < 1 or nan
     if (e < 0x3FF + 1) {
-        math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)))
+        return math.log1p(x - 1 + math.sqrt((x - 1) * (x - 1) + 2 * (x - 1)));
     }
     // |x| < 0x1p26
     else if (e < 0x3FF + 26) {
-        math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)))
+        return math.ln(2 * x - 1 / (x + math.sqrt(x * x - 1)));
     }
     // |x| >= 0x1p26 or nan
     else {
-        math.ln(x) + 0.693147180559945309417232121458176568
+        return math.ln(x) + 0.693147180559945309417232121458176568;
     }
 }
 
diff --git a/std/math/asin.zig b/std/math/asin.zig
index b0368b5d66..c5d24c35e5 100644
--- a/std/math/asin.zig
+++ b/std/math/asin.zig
@@ -8,11 +8,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn asin(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(asin32, x),
-        f64 => @inlineCall(asin64, x),
+    return switch (T) {
+        f32 => asin32(x),
+        f64 => asin64(x),
         else => @compileError("asin not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn r32(z: f32) -> f32 {
@@ -23,7 +23,7 @@ fn r32(z: f32) -> f32 {
 
     const p = z * (pS0 + z * (pS1 + z * pS2));
     const q = 1.0 + z * qS1;
-    p / q
+    return p / q;
 }
 
 fn asin32(x: f32) -> f32 {
@@ -58,9 +58,9 @@ fn asin32(x: f32) -> f32 {
     const fx = pio2 - 2 * (s + s * r32(z));
 
     if (hx >> 31 != 0) {
-        -fx
+        return -fx;
     } else {
-        fx
+        return fx;
     }
 }
 
@@ -78,7 +78,7 @@ fn r64(z: f64) -> f64 {
 
     const p = z * (pS0 + z * (pS1 + z * (pS2 + z * (pS3 + z * (pS4 + z * pS5)))));
     const q = 1.0 + z * (qS1 + z * (qS2 + z * (qS3 + z * qS4)));
-    p / q
+    return p / q;
 }
 
 fn asin64(x: f64) -> f64 {
@@ -119,7 +119,7 @@ fn asin64(x: f64) -> f64 {
 
     // |x| > 0.975
     if (ix >= 0x3FEF3333) {
-        fx = pio2_hi - 2 * (s + s * r)
+        fx = pio2_hi - 2 * (s + s * r);
     } else {
         const jx = @bitCast(u64, s);
         const df = @bitCast(f64, jx & 0xFFFFFFFF00000000);
@@ -128,9 +128,9 @@ fn asin64(x: f64) -> f64 {
     }
 
     if (hx >> 31 != 0) {
-        -fx
+        return -fx;
     } else {
-        fx
+        return fx;
     }
 }
 
diff --git a/std/math/asinh.zig b/std/math/asinh.zig
index dab047036a..f963dae77b 100644
--- a/std/math/asinh.zig
+++ b/std/math/asinh.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn asinh(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(asinh32, x),
-        f64 => @inlineCall(asinh64, x),
+    return switch (T) {
+        f32 => asinh32(x),
+        f64 => asinh64(x),
         else => @compileError("asinh not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // asinh(x) = sign(x) * log(|x| + sqrt(x * x + 1)) ~= x - x^3/6 + o(x^5)
@@ -46,7 +46,7 @@ fn asinh32(x: f32) -> f32 {
         math.forceEval(x + 0x1.0p120);
     }
 
-    if (s != 0) -rx else rx
+    return if (s != 0) -rx else rx;
 }
 
 fn asinh64(x: f64) -> f64 {
@@ -77,7 +77,7 @@ fn asinh64(x: f64) -> f64 {
         math.forceEval(x + 0x1.0p120);
     }
 
-    if (s != 0) -rx else rx
+    return if (s != 0) -rx else rx;
 }
 
 test "math.asinh" {
diff --git a/std/math/atan.zig b/std/math/atan.zig
index 5f3e207960..cf244eb762 100644
--- a/std/math/atan.zig
+++ b/std/math/atan.zig
@@ -8,11 +8,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn atan(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(atan32, x),
-        f64 => @inlineCall(atan64, x),
+    return switch (T) {
+        f32 => atan32(x),
+        f64 => atan64(x),
         else => @compileError("atan not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn atan32(x_: f32) -> f32 {
@@ -99,11 +99,11 @@ fn atan32(x_: f32) -> f32 {
     const s1 = z * (aT[0] + w * (aT[2] + w * aT[4]));
     const s2 = w * (aT[1] + w * aT[3]);
 
-    if (id == null) {
-        x - x * (s1 + s2)
+    if (id) |id_value| {
+        const zz = atanhi[id_value] - ((x * (s1 + s2) - atanlo[id_value]) - x);
+        return if (sign != 0) -zz else zz;
     } else {
-        const zz = atanhi[??id] - ((x * (s1 + s2) - atanlo[??id]) - x);
-        if (sign != 0) -zz else zz
+        return x - x * (s1 + s2);
     }
 }
 
@@ -198,16 +198,16 @@ fn atan64(x_: f64) -> f64 {
     const s1 = z * (aT[0] + w * (aT[2] + w * (aT[4] + w * (aT[6] + w * (aT[8] + w * aT[10])))));
     const s2 = w * (aT[1] + w * (aT[3] + w * (aT[5] + w * (aT[7] + w * aT[9]))));
 
-    if (id == null) {
-        x - x * (s1 + s2)
+    if (id) |id_value| {
+        const zz = atanhi[id_value] - ((x * (s1 + s2) - atanlo[id_value]) - x);
+        return if (sign != 0) -zz else zz;
     } else {
-        const zz = atanhi[??id] - ((x * (s1 + s2) - atanlo[??id]) - x);
-        if (sign != 0) -zz else zz
+        return x - x * (s1 + s2);
     }
 }
 
 test "math.atan" {
-    assert(atan(f32(0.2)) == atan32(0.2));
+    assert(@bitCast(u32, atan(f32(0.2))) == @bitCast(u32, atan32(0.2)));
     assert(atan(f64(0.2)) == atan64(0.2));
 }
 
diff --git a/std/math/atan2.zig b/std/math/atan2.zig
index 0e566837ce..b3af18cd9e 100644
--- a/std/math/atan2.zig
+++ b/std/math/atan2.zig
@@ -22,11 +22,11 @@ const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
 
 fn atan2(comptime T: type, x: T, y: T) -> T {
-    switch (T) {
-        f32 => @inlineCall(atan2_32, x, y),
-        f64 => @inlineCall(atan2_64, x, y),
+    return switch (T) {
+        f32 => atan2_32(x, y),
+        f64 => atan2_64(x, y),
         else => @compileError("atan2 not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn atan2_32(y: f32, x: f32) -> f32 {
@@ -97,11 +97,11 @@ fn atan2_32(y: f32, x: f32) -> f32 {
     }
 
     // z = atan(|y / x|) with correct underflow
-    var z = {
+    var z = z: {
         if ((m & 2) != 0 and iy + (26 << 23) < ix) {
-            0.0
+            break :z 0.0;
         } else {
-            math.atan(math.fabs(y / x))
+            break :z math.atan(math.fabs(y / x));
         }
     };
 
@@ -187,11 +187,11 @@ fn atan2_64(y: f64, x: f64) -> f64 {
     }
 
     // z = atan(|y / x|) with correct underflow
-    var z = {
+    var z = z: {
         if ((m & 2) != 0 and iy +% (64 << 20) < ix) {
-            0.0
+            break :z 0.0;
         } else {
-            math.atan(math.fabs(y / x))
+            break :z math.atan(math.fabs(y / x));
         }
     };
 
diff --git a/std/math/atanh.zig b/std/math/atanh.zig
index 8fe5ab55a7..13de90279b 100644
--- a/std/math/atanh.zig
+++ b/std/math/atanh.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn atanh(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(atanh_32, x),
-        f64 => @inlineCall(atanh_64, x),
+    return switch (T) {
+        f32 => atanh_32(x),
+        f64 => atanh_64(x),
         else => @compileError("atanh not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // atanh(x) = log((1 + x) / (1 - x)) / 2 = log1p(2x / (1 - x)) / 2 ~= x + x^3 / 3 + o(x^5)
@@ -32,7 +32,7 @@ fn atanh_32(x: f32) -> f32 {
         if (u < 0x3F800000 - (32 << 23)) {
             // underflow
             if (u < (1 << 23)) {
-                math.forceEval(y * y)
+                math.forceEval(y * y);
             }
         }
         // |x| < 0.5
@@ -43,7 +43,7 @@ fn atanh_32(x: f32) -> f32 {
         y = 0.5 * math.log1p(2 * (y / (1 - y)));
     }
 
-    if (s != 0) -y else y
+    return if (s != 0) -y else y;
 }
 
 fn atanh_64(x: f64) -> f64 {
@@ -72,7 +72,7 @@ fn atanh_64(x: f64) -> f64 {
         y = 0.5 * math.log1p(2 * (y / (1 - y)));
     }
 
-    if (s != 0) -y else y
+    return if (s != 0) -y else y;
 }
 
 test "math.atanh" {
diff --git a/std/math/cbrt.zig b/std/math/cbrt.zig
index 273939c6b6..a8df75dff2 100644
--- a/std/math/cbrt.zig
+++ b/std/math/cbrt.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn cbrt(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(cbrt32, x),
-        f64 => @inlineCall(cbrt64, x),
+    return switch (T) {
+        f32 => cbrt32(x),
+        f64 => cbrt64(x),
         else => @compileError("cbrt not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn cbrt32(x: f32) -> f32 {
@@ -53,7 +53,7 @@ fn cbrt32(x: f32) -> f32 {
     r = t * t * t;
     t = t * (f64(x) + x + r) / (x + r + r);
 
-    f32(t)
+    return f32(t);
 }
 
 fn cbrt64(x: f64) -> f64 {
@@ -109,7 +109,7 @@ fn cbrt64(x: f64) -> f64 {
     var w = t + t;
     q = (q - t) / (w + q);
 
-    t + t * q
+    return t + t * q;
 }
 
 test "math.cbrt" {
diff --git a/std/math/ceil.zig b/std/math/ceil.zig
index 2c7b28cc93..8e27132e25 100644
--- a/std/math/ceil.zig
+++ b/std/math/ceil.zig
@@ -10,11 +10,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn ceil(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(ceil32, x),
-        f64 => @inlineCall(ceil64, x),
+    return switch (T) {
+        f32 => ceil32(x),
+        f64 => ceil64(x),
         else => @compileError("ceil not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn ceil32(x: f32) -> f32 {
@@ -39,13 +39,13 @@ fn ceil32(x: f32) -> f32 {
             u += m;
         }
         u &= ~m;
-        @bitCast(f32, u)
+        return @bitCast(f32, u);
     } else {
         math.forceEval(x + 0x1.0p120);
         if (u >> 31 != 0) {
             return -0.0;
         } else {
-            1.0
+            return 1.0;
         }
     }
 }
@@ -70,14 +70,14 @@ fn ceil64(x: f64) -> f64 {
     if (e <= 0x3FF-1) {
         math.forceEval(y);
         if (u >> 63 != 0) {
-            return -0.0;    // Compiler requires return.
+            return -0.0;
         } else {
-            1.0
+            return 1.0;
         }
     } else if (y < 0) {
-        x + y + 1
+        return x + y + 1;
     } else {
-        x + y
+        return x + y;
     }
 }
 
diff --git a/std/math/copysign.zig b/std/math/copysign.zig
index 109ee8c632..05205e1a0e 100644
--- a/std/math/copysign.zig
+++ b/std/math/copysign.zig
@@ -2,11 +2,11 @@ const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
 
 pub fn copysign(comptime T: type, x: T, y: T) -> T {
-    switch (T) {
-        f32 => @inlineCall(copysign32, x, y),
-        f64 => @inlineCall(copysign64, x, y),
+    return switch (T) {
+        f32 => copysign32(x, y),
+        f64 => copysign64(x, y),
         else => @compileError("copysign not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn copysign32(x: f32, y: f32) -> f32 {
@@ -15,7 +15,7 @@ fn copysign32(x: f32, y: f32) -> f32 {
 
     const h1 = ux & (@maxValue(u32) / 2);
     const h2 = uy & (u32(1) << 31);
-    @bitCast(f32, h1 | h2)
+    return @bitCast(f32, h1 | h2);
 }
 
 fn copysign64(x: f64, y: f64) -> f64 {
@@ -24,7 +24,7 @@ fn copysign64(x: f64, y: f64) -> f64 {
 
     const h1 = ux & (@maxValue(u64) / 2);
     const h2 = uy & (u64(1) << 63);
-    @bitCast(f64, h1 | h2)
+    return @bitCast(f64, h1 | h2);
 }
 
 test "math.copysign" {
diff --git a/std/math/cos.zig b/std/math/cos.zig
index 4b5c9193c3..4c3b8e1282 100644
--- a/std/math/cos.zig
+++ b/std/math/cos.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn cos(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(cos32, x),
-        f64 => @inlineCall(cos64, x),
+    return switch (T) {
+        f32 => cos32(x),
+        f64 => cos64(x),
         else => @compileError("cos not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // sin polynomial coefficients
@@ -73,18 +73,18 @@ fn cos32(x_: f32) -> f32 {
     const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
     const w = z * z;
 
-    const r = {
+    const r = r: {
         if (j == 1 or j == 2) {
-            z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))))
+            break :r z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))));
         } else {
-            1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))))
+            break :r 1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))));
         }
     };
 
     if (sign) {
-        -r
+        return -r;
     } else {
-        r
+        return r;
     }
 }
 
@@ -124,18 +124,18 @@ fn cos64(x_: f64) -> f64 {
     const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
     const w = z * z;
 
-    const r = {
+    const r = r: {
         if (j == 1 or j == 2) {
-            z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))))
+            break :r z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))));
         } else {
-            1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))))
+            break :r 1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))));
         }
     };
 
     if (sign) {
-        -r
+        return -r;
     } else {
-        r
+        return r;
     }
 }
 
diff --git a/std/math/cosh.zig b/std/math/cosh.zig
index 34c647eaad..6d40d71b8d 100644
--- a/std/math/cosh.zig
+++ b/std/math/cosh.zig
@@ -11,11 +11,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn cosh(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(cosh32, x),
-        f64 => @inlineCall(cosh64, x),
+    return switch (T) {
+        f32 => cosh32(x),
+        f64 => cosh64(x),
         else => @compileError("cosh not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // cosh(x) = (exp(x) + 1 / exp(x)) / 2
@@ -43,7 +43,7 @@ fn cosh32(x: f32) -> f32 {
     }
 
     // |x| > log(FLT_MAX) or nan
-    expo2(ax)
+    return expo2(ax);
 }
 
 fn cosh64(x: f64) -> f64 {
@@ -76,7 +76,7 @@ fn cosh64(x: f64) -> f64 {
     }
 
     // |x| > log(CBL_MAX) or nan
-    expo2(ax)
+    return expo2(ax);
 }
 
 test "math.cosh" {
diff --git a/std/math/exp.zig b/std/math/exp.zig
index 87c0a1882e..6e591daea3 100644
--- a/std/math/exp.zig
+++ b/std/math/exp.zig
@@ -8,11 +8,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn exp(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(exp32, x),
-        f64 => @inlineCall(exp64, x),
+    return switch (T) {
+        f32 => exp32(x),
+        f64 => exp64(x),
         else => @compileError("exp not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn exp32(x_: f32) -> f32 {
@@ -86,9 +86,9 @@ fn exp32(x_: f32) -> f32 {
     const y = 1 + (x * c / (2 - c) - lo + hi);
 
     if (k == 0) {
-        y
+        return y;
     } else {
-        math.scalbn(y, k)
+        return math.scalbn(y, k);
     }
 }
 
@@ -172,9 +172,9 @@ fn exp64(x_: f64) -> f64 {
     const y = 1 + (x * c / (2 - c) - lo + hi);
 
     if (k == 0) {
-        y
+        return y;
     } else {
-        math.scalbn(y, k)
+        return math.scalbn(y, k);
     }
 }
 
diff --git a/std/math/exp2.zig b/std/math/exp2.zig
index 111c1dad64..6061f32391 100644
--- a/std/math/exp2.zig
+++ b/std/math/exp2.zig
@@ -8,11 +8,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn exp2(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(exp2_32, x),
-        f64 => @inlineCall(exp2_64, x),
+    return switch (T) {
+        f32 => exp2_32(x),
+        f64 => exp2_64(x),
         else => @compileError("exp2 not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 const exp2ft = []const f64 {
@@ -88,7 +88,7 @@ fn exp2_32(x: f32) -> f32 {
     var r: f64 = exp2ft[i0];
     const t: f64 = r * z;
     r = r + t * (P1 + z * P2) + t * (z * z) * (P3 + z * P4);
-    f32(r * uk)
+    return f32(r * uk);
 }
 
 const exp2dt = []f64 {
@@ -414,7 +414,7 @@ fn exp2_64(x: f64) -> f64 {
     z -= exp2dt[2 * i0 + 1];
     const r = t + t * z * (P1 + z * (P2 + z * (P3 + z * (P4 + z * P5))));
 
-    math.scalbn(r, ik)
+    return math.scalbn(r, ik);
 }
 
 test "math.exp2" {
diff --git a/std/math/expm1.zig b/std/math/expm1.zig
index ef0b2766b1..fbe1841030 100644
--- a/std/math/expm1.zig
+++ b/std/math/expm1.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn expm1(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(expm1_32, x),
-        f64 => @inlineCall(expm1_64, x),
+    return switch (T) {
+        f32 => expm1_32(x),
+        f64 => expm1_64(x),
         else => @compileError("exp1m not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn expm1_32(x_: f32) -> f32 {
diff --git a/std/math/expo2.zig b/std/math/expo2.zig
index e0d8a8130d..93111391a2 100644
--- a/std/math/expo2.zig
+++ b/std/math/expo2.zig
@@ -2,11 +2,11 @@ const math = @import("index.zig");
 
 pub fn expo2(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
+    return switch (T) {
         f32 => expo2f(x),
         f64 => expo2d(x),
         else => @compileError("expo2 not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn expo2f(x: f32) -> f32 {
@@ -15,7 +15,7 @@ fn expo2f(x: f32) -> f32 {
 
     const u = (0x7F + k / 2) << 23;
     const scale = @bitCast(f32, u);
-    math.exp(x - kln2) * scale * scale
+    return math.exp(x - kln2) * scale * scale;
 }
 
 fn expo2d(x: f64) -> f64 {
@@ -24,5 +24,5 @@ fn expo2d(x: f64) -> f64 {
 
     const u = (0x3FF + k / 2) << 20;
     const scale = @bitCast(f64, u64(u) << 32);
-    math.exp(x - kln2) * scale * scale
+    return math.exp(x - kln2) * scale * scale;
 }
diff --git a/std/math/fabs.zig b/std/math/fabs.zig
index e9bd3eb689..cf1b8f1f14 100644
--- a/std/math/fabs.zig
+++ b/std/math/fabs.zig
@@ -8,23 +8,23 @@ const assert = @import("../debug.zig").assert;
 
 pub fn fabs(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(fabs32, x),
-        f64 => @inlineCall(fabs64, x),
+    return switch (T) {
+        f32 => fabs32(x),
+        f64 => fabs64(x),
         else => @compileError("fabs not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn fabs32(x: f32) -> f32 {
     var u = @bitCast(u32, x);
     u &= 0x7FFFFFFF;
-    @bitCast(f32, u)
+    return @bitCast(f32, u);
 }
 
 fn fabs64(x: f64) -> f64 {
     var u = @bitCast(u64, x);
     u &= @maxValue(u64) >> 1;
-    @bitCast(f64, u)
+    return @bitCast(f64, u);
 }
 
 test "math.fabs" {
diff --git a/std/math/floor.zig b/std/math/floor.zig
index 85ee2b4923..d7de45e9d0 100644
--- a/std/math/floor.zig
+++ b/std/math/floor.zig
@@ -10,11 +10,11 @@ const math = @import("index.zig");
 
 pub fn floor(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(floor32, x),
-        f64 => @inlineCall(floor64, x),
+    return switch (T) {
+        f32 => floor32(x),
+        f64 => floor64(x),
         else => @compileError("floor not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn floor32(x: f32) -> f32 {
@@ -40,13 +40,13 @@ fn floor32(x: f32) -> f32 {
         if (u >> 31 != 0) {
             u += m;
         }
-        @bitCast(f32, u & ~m)
+        return @bitCast(f32, u & ~m);
     } else {
         math.forceEval(x + 0x1.0p120);
         if (u >> 31 == 0) {
-            return 0.0; // Compiler requires return
+            return 0.0;
         } else {
-            -1.0
+            return -1.0;
         }
     }
 }
@@ -71,14 +71,14 @@ fn floor64(x: f64) -> f64 {
     if (e <= 0x3FF-1) {
         math.forceEval(y);
         if (u >> 63 != 0) {
-            return -1.0;    // Compiler requires return.
+            return -1.0;
         } else {
-            0.0
+            return 0.0;
         }
     } else if (y > 0) {
-        x + y - 1
+        return x + y - 1;
     } else {
-        x + y
+        return x + y;
     }
 }
 
diff --git a/std/math/fma.zig b/std/math/fma.zig
index 8dbbc39ed0..8e5adc80b2 100644
--- a/std/math/fma.zig
+++ b/std/math/fma.zig
@@ -2,11 +2,11 @@ const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
 
 pub fn fma(comptime T: type, x: T, y: T, z: T) -> T {
-    switch (T) {
-        f32 => @inlineCall(fma32, x, y, z),
-        f64 => @inlineCall(fma64, x, y ,z),
+    return switch (T) {
+        f32 => fma32(x, y, z),
+        f64 => fma64(x, y ,z),
         else => @compileError("fma not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn fma32(x: f32, y: f32, z: f32) -> f32 {
@@ -16,10 +16,10 @@ fn fma32(x: f32, y: f32, z: f32) -> f32 {
     const e = (u >> 52) & 0x7FF;
 
     if ((u & 0x1FFFFFFF) != 0x10000000 or e == 0x7FF or xy_z - xy == z) {
-        f32(xy_z)
+        return f32(xy_z);
     } else {
         // TODO: Handle inexact case with double-rounding
-        f32(xy_z)
+        return f32(xy_z);
     }
 }
 
@@ -64,9 +64,9 @@ fn fma64(x: f64, y: f64, z: f64) -> f64 {
 
     const adj = add_adjusted(r.lo, xy.lo);
     if (spread + math.ilogb(r.hi) > -1023) {
-        math.scalbn(r.hi + adj, spread)
+        return math.scalbn(r.hi + adj, spread);
     } else {
-        add_and_denorm(r.hi, adj, spread)
+        return add_and_denorm(r.hi, adj, spread);
     }
 }
 
@@ -77,7 +77,7 @@ fn dd_add(a: f64, b: f64) -> dd {
     ret.hi = a + b;
     const s = ret.hi - a;
     ret.lo = (a - (ret.hi - s)) + (b - s);
-    ret
+    return ret;
 }
 
 fn dd_mul(a: f64, b: f64) -> dd {
@@ -99,7 +99,7 @@ fn dd_mul(a: f64, b: f64) -> dd {
 
     ret.hi = p + q;
     ret.lo = p - ret.hi + q + la * lb;
-    ret
+    return ret;
 }
 
 fn add_adjusted(a: f64, b: f64) -> f64 {
@@ -113,7 +113,7 @@ fn add_adjusted(a: f64, b: f64) -> f64 {
             sum.hi = @bitCast(f64, uhii);
         }
     }
-    sum.hi
+    return sum.hi;
 }
 
 fn add_and_denorm(a: f64, b: f64, scale: i32) -> f64 {
@@ -127,7 +127,7 @@ fn add_and_denorm(a: f64, b: f64, scale: i32) -> f64 {
             sum.hi = @bitCast(f64, uhii);
         }
     }
-    math.scalbn(sum.hi, scale)
+    return math.scalbn(sum.hi, scale);
 }
 
 test "math.fma" {
diff --git a/std/math/frexp.zig b/std/math/frexp.zig
index a40a6dcc39..1a317a2c80 100644
--- a/std/math/frexp.zig
+++ b/std/math/frexp.zig
@@ -8,21 +8,21 @@ const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
 
 fn frexp_result(comptime T: type) -> type {
-    struct {
+    return struct {
         significand: T,
         exponent: i32,
-    }
+    };
 }
 pub const frexp32_result = frexp_result(f32);
 pub const frexp64_result = frexp_result(f64);
 
 pub fn frexp(x: var) -> frexp_result(@typeOf(x)) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(frexp32, x),
-        f64 => @inlineCall(frexp64, x),
+    return switch (T) {
+        f32 => frexp32(x),
+        f64 => frexp64(x),
         else => @compileError("frexp not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn frexp32(x: f32) -> frexp32_result {
@@ -59,7 +59,7 @@ fn frexp32(x: f32) -> frexp32_result {
     y &= 0x807FFFFF;
     y |= 0x3F000000;
     result.significand = @bitCast(f32, y);
-    result
+    return result;
 }
 
 fn frexp64(x: f64) -> frexp64_result {
@@ -96,7 +96,7 @@ fn frexp64(x: f64) -> frexp64_result {
     y &= 0x800FFFFFFFFFFFFF;
     y |= 0x3FE0000000000000;
     result.significand = @bitCast(f64, y);
-    result
+    return result;
 }
 
 test "math.frexp" {
diff --git a/std/math/hypot.zig b/std/math/hypot.zig
index ee8ce0f518..9b09ed53a4 100644
--- a/std/math/hypot.zig
+++ b/std/math/hypot.zig
@@ -9,11 +9,11 @@ const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
 
 pub fn hypot(comptime T: type, x: T, y: T) -> T {
-    switch (T) {
-        f32 => @inlineCall(hypot32, x, y),
-        f64 => @inlineCall(hypot64, x, y),
+    return switch (T) {
+        f32 => hypot32(x, y),
+        f64 => hypot64(x, y),
         else => @compileError("hypot not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn hypot32(x: f32, y: f32) -> f32 {
@@ -48,7 +48,7 @@ fn hypot32(x: f32, y: f32) -> f32 {
         yy *= 0x1.0p-90;
     }
 
-    z * math.sqrt(f32(f64(x) * x + f64(y) * y))
+    return z * math.sqrt(f32(f64(x) * x + f64(y) * y));
 }
 
 fn sq(hi: &f64, lo: &f64, x: f64) {
@@ -109,7 +109,7 @@ fn hypot64(x: f64, y: f64) -> f64 {
     sq(&hx, &lx, x);
     sq(&hy, &ly, y);
 
-    z * math.sqrt(ly + lx + hy + hx)
+    return z * math.sqrt(ly + lx + hy + hx);
 }
 
 test "math.hypot" {
diff --git a/std/math/ilogb.zig b/std/math/ilogb.zig
index 2a0ea7fc5f..41a1e2d83f 100644
--- a/std/math/ilogb.zig
+++ b/std/math/ilogb.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn ilogb(x: var) -> i32 {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(ilogb32, x),
-        f64 => @inlineCall(ilogb64, x),
+    return switch (T) {
+        f32 => ilogb32(x),
+        f64 => ilogb64(x),
         else => @compileError("ilogb not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // NOTE: Should these be exposed publically?
@@ -53,7 +53,7 @@ fn ilogb32(x: f32) -> i32 {
         }
     }
 
-    e - 0x7F
+    return e - 0x7F;
 }
 
 fn ilogb64(x: f64) -> i32 {
@@ -88,7 +88,7 @@ fn ilogb64(x: f64) -> i32 {
         }
     }
 
-    e - 0x3FF
+    return e - 0x3FF;
 }
 
 test "math.ilogb" {
diff --git a/std/math/index.zig b/std/math/index.zig
index 8a583fb8e9..1991864f69 100644
--- a/std/math/index.zig
+++ b/std/math/index.zig
@@ -36,7 +36,7 @@ pub const inf = @import("inf.zig").inf;
 
 pub fn approxEq(comptime T: type, x: T, y: T, epsilon: T) -> bool {
     assert(@typeId(T) == TypeId.Float);
-    fabs(x - y) < epsilon
+    return fabs(x - y) < epsilon;
 }
 
 // TODO: Hide the following in an internal module.
@@ -174,14 +174,8 @@ test "math" {
 }
 
 
-pub const Cmp = enum {
-    Less,
-    Equal,
-    Greater,
-};
-
 pub fn min(x: var, y: var) -> @typeOf(x + y) {
-    if (x < y) x else y
+    return if (x < y) x else y;
 }
 
 test "math.min" {
@@ -189,7 +183,7 @@ test "math.min" {
 }
 
 pub fn max(x: var, y: var) -> @typeOf(x + y) {
-    if (x > y) x else y
+    return if (x > y) x else y;
 }
 
 test "math.max" {
@@ -199,19 +193,19 @@ test "math.max" {
 error Overflow;
 pub fn mul(comptime T: type, a: T, b: T) -> %T {
     var answer: T = undefined;
-    if (@mulWithOverflow(T, a, b, &answer)) error.Overflow else answer
+    return if (@mulWithOverflow(T, a, b, &answer)) error.Overflow else answer;
 }
 
 error Overflow;
 pub fn add(comptime T: type, a: T, b: T) -> %T {
     var answer: T = undefined;
-    if (@addWithOverflow(T, a, b, &answer)) error.Overflow else answer
+    return if (@addWithOverflow(T, a, b, &answer)) error.Overflow else answer;
 }
 
 error Overflow;
 pub fn sub(comptime T: type, a: T, b: T) -> %T {
     var answer: T = undefined;
-    if (@subWithOverflow(T, a, b, &answer)) error.Overflow else answer
+    return if (@subWithOverflow(T, a, b, &answer)) error.Overflow else answer;
 }
 
 pub fn negate(x: var) -> %@typeOf(x) {
@@ -221,7 +215,7 @@ pub fn negate(x: var) -> %@typeOf(x) {
 error Overflow;
 pub fn shlExact(comptime T: type, a: T, shift_amt: Log2Int(T)) -> %T {
     var answer: T = undefined;
-    if (@shlWithOverflow(T, a, shift_amt, &answer)) error.Overflow else answer
+    return if (@shlWithOverflow(T, a, shift_amt, &answer)) error.Overflow else answer;
 }
 
 /// Shifts left. Overflowed bits are truncated.
@@ -273,7 +267,7 @@ test "math.shr" {
 }
 
 pub fn Log2Int(comptime T: type) -> type {
-    @IntType(false, log2(T.bit_count))
+    return @IntType(false, log2(T.bit_count));
 }
 
 test "math overflow functions" {
@@ -522,3 +516,28 @@ pub fn cast(comptime T: type, x: var) -> %T {
         return T(x);
     }
 }
+
+pub fn floorPowerOfTwo(comptime T: type, value: T) -> T {
+    var x = value;
+
+    comptime var i = 1;
+    inline while(T.bit_count > i) : (i *= 2) {
+        x |= (x >> i);
+    }
+
+    return x - (x >> 1);
+}
+
+test "math.floorPowerOfTwo" {
+    testFloorPowerOfTwo();
+    comptime testFloorPowerOfTwo();
+}
+
+fn testFloorPowerOfTwo() {
+    assert(floorPowerOfTwo(u32, 63) == 32);
+    assert(floorPowerOfTwo(u32, 64) == 64);
+    assert(floorPowerOfTwo(u32, 65) == 64);
+    assert(floorPowerOfTwo(u4, 7) == 4);
+    assert(floorPowerOfTwo(u4, 8) == 8);
+    assert(floorPowerOfTwo(u4, 9) == 8);
+}
diff --git a/std/math/inf.zig b/std/math/inf.zig
index ed559c313e..546e6f3af8 100644
--- a/std/math/inf.zig
+++ b/std/math/inf.zig
@@ -2,9 +2,9 @@ const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
 
 pub fn inf(comptime T: type) -> T {
-    switch (T) {
+    return switch (T) {
         f32 => @bitCast(f32, math.inf_u32),
         f64 => @bitCast(f64, math.inf_u64),
         else => @compileError("inf not implemented for " ++ @typeName(T)),
-    }
+    };
 }
diff --git a/std/math/isfinite.zig b/std/math/isfinite.zig
index 6dbf984721..d44d373cf1 100644
--- a/std/math/isfinite.zig
+++ b/std/math/isfinite.zig
@@ -6,11 +6,11 @@ pub fn isFinite(x: var) -> bool {
     switch (T) {
         f32 => {
             const bits = @bitCast(u32, x);
-            bits & 0x7FFFFFFF < 0x7F800000
+            return bits & 0x7FFFFFFF < 0x7F800000;
         },
         f64 => {
             const bits = @bitCast(u64, x);
-            bits & (@maxValue(u64) >> 1) < (0x7FF << 52)
+            return bits & (@maxValue(u64) >> 1) < (0x7FF << 52);
         },
         else => {
             @compileError("isFinite not implemented for " ++ @typeName(T));
diff --git a/std/math/isinf.zig b/std/math/isinf.zig
index b388fabf10..98c90e72a9 100644
--- a/std/math/isinf.zig
+++ b/std/math/isinf.zig
@@ -6,11 +6,11 @@ pub fn isInf(x: var) -> bool {
     switch (T) {
         f32 => {
             const bits = @bitCast(u32, x);
-            bits & 0x7FFFFFFF == 0x7F800000
+            return bits & 0x7FFFFFFF == 0x7F800000;
         },
         f64 => {
             const bits = @bitCast(u64, x);
-            bits & (@maxValue(u64) >> 1) == (0x7FF << 52)
+            return bits & (@maxValue(u64) >> 1) == (0x7FF << 52);
         },
         else => {
             @compileError("isInf not implemented for " ++ @typeName(T));
@@ -22,10 +22,10 @@ pub fn isPositiveInf(x: var) -> bool {
     const T = @typeOf(x);
     switch (T) {
         f32 => {
-            @bitCast(u32, x) == 0x7F800000
+            return @bitCast(u32, x) == 0x7F800000;
         },
         f64 => {
-            @bitCast(u64, x) == 0x7FF << 52
+            return @bitCast(u64, x) == 0x7FF << 52;
         },
         else => {
             @compileError("isPositiveInf not implemented for " ++ @typeName(T));
@@ -37,10 +37,10 @@ pub fn isNegativeInf(x: var) -> bool {
     const T = @typeOf(x);
     switch (T) {
         f32 => {
-            @bitCast(u32, x) == 0xFF800000
+            return @bitCast(u32, x) == 0xFF800000;
         },
         f64 => {
-            @bitCast(u64, x) == 0xFFF << 52
+            return @bitCast(u64, x) == 0xFFF << 52;
         },
         else => {
             @compileError("isNegativeInf not implemented for " ++ @typeName(T));
diff --git a/std/math/isnan.zig b/std/math/isnan.zig
index 8bcb200a6a..e996a72693 100644
--- a/std/math/isnan.zig
+++ b/std/math/isnan.zig
@@ -6,11 +6,11 @@ pub fn isNan(x: var) -> bool {
     switch (T) {
         f32 => {
             const bits = @bitCast(u32, x);
-            bits & 0x7FFFFFFF > 0x7F800000
+            return bits & 0x7FFFFFFF > 0x7F800000;
         },
         f64 => {
             const bits = @bitCast(u64, x);
-            (bits & (@maxValue(u64) >> 1)) > (u64(0x7FF) << 52)
+            return (bits & (@maxValue(u64) >> 1)) > (u64(0x7FF) << 52);
         },
         else => {
             @compileError("isNan not implemented for " ++ @typeName(T));
@@ -21,7 +21,7 @@ pub fn isNan(x: var) -> bool {
 // Note: A signalling nan is identical to a standard right now by may have a different bit
 // representation in the future when required.
 pub fn isSignalNan(x: var) -> bool {
-    isNan(x)
+    return isNan(x);
 }
 
 test "math.isNan" {
diff --git a/std/math/isnormal.zig b/std/math/isnormal.zig
index b212d204de..f815c2680b 100644
--- a/std/math/isnormal.zig
+++ b/std/math/isnormal.zig
@@ -6,11 +6,11 @@ pub fn isNormal(x: var) -> bool {
     switch (T) {
         f32 => {
             const bits = @bitCast(u32, x);
-            (bits + 0x00800000) & 0x7FFFFFFF >= 0x01000000
+            return (bits + 0x00800000) & 0x7FFFFFFF >= 0x01000000;
         },
         f64 => {
             const bits = @bitCast(u64, x);
-            (bits + (1 << 52)) & (@maxValue(u64) >> 1) >= (1 << 53)
+            return (bits + (1 << 52)) & (@maxValue(u64) >> 1) >= (1 << 53);
         },
         else => {
             @compileError("isNormal not implemented for " ++ @typeName(T));
diff --git a/std/math/ln.zig b/std/math/ln.zig
index 7ced6238ef..4ded89d328 100644
--- a/std/math/ln.zig
+++ b/std/math/ln.zig
@@ -14,7 +14,7 @@ pub fn ln(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
     switch (@typeId(T)) {
         TypeId.FloatLiteral => {
-            return @typeOf(1.0)(ln_64(x))
+            return @typeOf(1.0)(ln_64(x));
         },
         TypeId.Float => {
             return switch (T) {
@@ -84,7 +84,7 @@ pub fn ln_32(x_: f32) -> f32 {
     const hfsq = 0.5 * f * f;
     const dk = f32(k);
 
-    s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi
+    return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
 }
 
 pub fn ln_64(x_: f64) -> f64 {
@@ -116,7 +116,7 @@ pub fn ln_64(x_: f64) -> f64 {
         // subnormal, scale x
         k -= 54;
         x *= 0x1.0p54;
-        hx = u32(@bitCast(u64, ix) >> 32)
+        hx = u32(@bitCast(u64, ix) >> 32);
     }
     else if (hx >= 0x7FF00000) {
         return x;
@@ -142,7 +142,7 @@ pub fn ln_64(x_: f64) -> f64 {
     const R = t2 + t1;
     const dk = f64(k);
 
-    s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi
+    return s * (hfsq + R) + dk * ln2_lo - hfsq + f + dk * ln2_hi;
 }
 
 test "math.ln" {
diff --git a/std/math/log.zig b/std/math/log.zig
index 075cecc890..1ff226f7e5 100644
--- a/std/math/log.zig
+++ b/std/math/log.zig
@@ -29,7 +29,7 @@ pub fn log(comptime T: type, base: T, x: T) -> T {
                 f32 => return f32(math.ln(f64(x)) / math.ln(f64(base))),
                 f64 => return math.ln(x) / math.ln(f64(base)),
                 else => @compileError("log not implemented for " ++ @typeName(T)),
-            };
+            }
         },
 
         else => {
diff --git a/std/math/log10.zig b/std/math/log10.zig
index 75e1203ad4..73168dec8d 100644
--- a/std/math/log10.zig
+++ b/std/math/log10.zig
@@ -14,7 +14,7 @@ pub fn log10(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
     switch (@typeId(T)) {
         TypeId.FloatLiteral => {
-            return @typeOf(1.0)(log10_64(x))
+            return @typeOf(1.0)(log10_64(x));
         },
         TypeId.Float => {
             return switch (T) {
@@ -90,7 +90,7 @@ pub fn log10_32(x_: f32) -> f32 {
     const lo = f - hi - hfsq + s * (hfsq + R);
     const dk = f32(k);
 
-    dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi
+    return dk * log10_2lo + (lo + hi) * ivln10lo + lo * ivln10hi + hi * ivln10hi + dk * log10_2hi;
 }
 
 pub fn log10_64(x_: f64) -> f64 {
@@ -124,7 +124,7 @@ pub fn log10_64(x_: f64) -> f64 {
         // subnormal, scale x
         k -= 54;
         x *= 0x1.0p54;
-        hx = u32(@bitCast(u64, x) >> 32)
+        hx = u32(@bitCast(u64, x) >> 32);
     }
     else if (hx >= 0x7FF00000) {
         return x;
@@ -167,7 +167,7 @@ pub fn log10_64(x_: f64) -> f64 {
     val_lo += (y - ww) + val_hi;
     val_hi = ww;
 
-    val_lo + val_hi
+    return val_lo + val_hi;
 }
 
 test "math.log10" {
diff --git a/std/math/log1p.zig b/std/math/log1p.zig
index 803726fdcc..b369385038 100644
--- a/std/math/log1p.zig
+++ b/std/math/log1p.zig
@@ -11,11 +11,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn log1p(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(log1p_32, x),
-        f64 => @inlineCall(log1p_64, x),
+    return switch (T) {
+        f32 => log1p_32(x),
+        f64 => log1p_64(x),
         else => @compileError("log1p not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn log1p_32(x: f32) -> f32 {
@@ -91,7 +91,7 @@ fn log1p_32(x: f32) -> f32 {
     const hfsq = 0.5 * f * f;
     const dk = f32(k);
 
-    s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi
+    return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
 }
 
 fn log1p_64(x: f64) -> f64 {
@@ -172,7 +172,7 @@ fn log1p_64(x: f64) -> f64 {
     const R = t2 + t1;
     const dk = f64(k);
 
-    s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi
+    return s * (hfsq + R) + (dk * ln2_lo + c) - hfsq + f + dk * ln2_hi;
 }
 
 test "math.log1p" {
diff --git a/std/math/log2.zig b/std/math/log2.zig
index 2199d6bfa1..1b38a9ecee 100644
--- a/std/math/log2.zig
+++ b/std/math/log2.zig
@@ -14,7 +14,7 @@ pub fn log2(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
     switch (@typeId(T)) {
         TypeId.FloatLiteral => {
-            return @typeOf(1.0)(log2_64(x))
+            return @typeOf(1.0)(log2_64(x));
         },
         TypeId.Float => {
             return switch (T) {
@@ -26,7 +26,7 @@ pub fn log2(x: var) -> @typeOf(x) {
         TypeId.IntLiteral => comptime {
             var result = 0;
             var x_shifted = x;
-            while ({x_shifted >>= 1; x_shifted != 0}) : (result += 1) {}
+            while (b: {x_shifted >>= 1; break :b x_shifted != 0;}) : (result += 1) {}
             return result;
         },
         TypeId.Int => {
@@ -94,7 +94,7 @@ pub fn log2_32(x_: f32) -> f32 {
     u &= 0xFFFFF000;
     hi = @bitCast(f32, u);
     const lo = f - hi - hfsq + s * (hfsq + R);
-    (lo + hi) * ivln2lo + lo * ivln2hi + hi * ivln2hi + f32(k)
+    return (lo + hi) * ivln2lo + lo * ivln2hi + hi * ivln2hi + f32(k);
 }
 
 pub fn log2_64(x_: f64) -> f64 {
@@ -165,7 +165,7 @@ pub fn log2_64(x_: f64) -> f64 {
     val_lo += (y - ww) + val_hi;
     val_hi = ww;
 
-    val_lo + val_hi
+    return val_lo + val_hi;
 }
 
 test "math.log2" {
diff --git a/std/math/modf.zig b/std/math/modf.zig
index 25eab7f99d..72730b67d7 100644
--- a/std/math/modf.zig
+++ b/std/math/modf.zig
@@ -7,21 +7,21 @@ const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
 
 fn modf_result(comptime T: type) -> type {
-    struct {
+    return struct {
         fpart: T,
         ipart: T,
-    }
+    };
 }
 pub const modf32_result = modf_result(f32);
 pub const modf64_result = modf_result(f64);
 
 pub fn modf(x: var) -> modf_result(@typeOf(x)) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(modf32, x),
-        f64 => @inlineCall(modf64, x),
+    return switch (T) {
+        f32 => modf32(x),
+        f64 => modf64(x),
         else => @compileError("modf not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn modf32(x: f32) -> modf32_result {
@@ -66,7 +66,7 @@ fn modf32(x: f32) -> modf32_result {
     const uf = @bitCast(f32, u & ~mask);
     result.ipart = uf;
     result.fpart = x - uf;
-    result
+    return result;
 }
 
 fn modf64(x: f64) -> modf64_result {
@@ -110,7 +110,7 @@ fn modf64(x: f64) -> modf64_result {
     const uf = @bitCast(f64, u & ~mask);
     result.ipart = uf;
     result.fpart = x - uf;
-    result
+    return result;
 }
 
 test "math.modf" {
diff --git a/std/math/nan.zig b/std/math/nan.zig
index a4899d6b82..e92bb04cb9 100644
--- a/std/math/nan.zig
+++ b/std/math/nan.zig
@@ -1,19 +1,19 @@
 const math = @import("index.zig");
 
 pub fn nan(comptime T: type) -> T {
-    switch (T) {
+    return switch (T) {
         f32 => @bitCast(f32, math.nan_u32),
         f64 => @bitCast(f64, math.nan_u64),
         else => @compileError("nan not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // Note: A signalling nan is identical to a standard right now by may have a different bit
 // representation in the future when required.
 pub fn snan(comptime T: type) -> T {
-    switch (T) {
+    return switch (T) {
         f32 => @bitCast(f32, math.nan_u32),
         f64 => @bitCast(f64, math.nan_u64),
         else => @compileError("snan not implemented for " ++ @typeName(T)),
-    }
+    };
 }
diff --git a/std/math/pow.zig b/std/math/pow.zig
index 85c3a71d56..55a2cd8c3e 100644
--- a/std/math/pow.zig
+++ b/std/math/pow.zig
@@ -166,12 +166,12 @@ pub fn pow(comptime T: type, x: T, y: T) -> T {
         ae = -ae;
     }
 
-    math.scalbn(a1, ae)
+    return math.scalbn(a1, ae);
 }
 
 fn isOddInteger(x: f64) -> bool {
     const r = math.modf(x);
-    r.fpart == 0.0 and i64(r.ipart) & 1 == 1
+    return r.fpart == 0.0 and i64(r.ipart) & 1 == 1;
 }
 
 test "math.pow" {
diff --git a/std/math/round.zig b/std/math/round.zig
index a16bedc3f5..1e193fb1d7 100644
--- a/std/math/round.zig
+++ b/std/math/round.zig
@@ -10,11 +10,11 @@ const math = @import("index.zig");
 
 pub fn round(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(round32, x),
-        f64 => @inlineCall(round64, x),
+    return switch (T) {
+        f32 => round32(x),
+        f64 => round64(x),
         else => @compileError("round not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn round32(x_: f32) -> f32 {
@@ -48,9 +48,9 @@ fn round32(x_: f32) -> f32 {
     }
 
     if (u >> 31 != 0) {
-        -y
+        return -y;
     } else {
-        y
+        return y;
     }
 }
 
@@ -85,9 +85,9 @@ fn round64(x_: f64) -> f64 {
     }
 
     if (u >> 63 != 0) {
-        -y
+        return -y;
     } else {
-        y
+        return y;
     }
 }
 
diff --git a/std/math/scalbn.zig b/std/math/scalbn.zig
index 6e82194494..0be6a3d47e 100644
--- a/std/math/scalbn.zig
+++ b/std/math/scalbn.zig
@@ -3,11 +3,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn scalbn(x: var, n: i32) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(scalbn32, x, n),
-        f64 => @inlineCall(scalbn64, x, n),
+    return switch (T) {
+        f32 => scalbn32(x, n),
+        f64 => scalbn64(x, n),
         else => @compileError("scalbn not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn scalbn32(x: f32, n_: i32) -> f32 {
@@ -37,7 +37,7 @@ fn scalbn32(x: f32, n_: i32) -> f32 {
     }
 
     const u = u32(n +% 0x7F) << 23;
-    y * @bitCast(f32, u)
+    return y * @bitCast(f32, u);
 }
 
 fn scalbn64(x: f64, n_: i32) -> f64 {
@@ -67,7 +67,7 @@ fn scalbn64(x: f64, n_: i32) -> f64 {
     }
 
     const u = u64(n +% 0x3FF) << 52;
-    y * @bitCast(f64, u)
+    return y * @bitCast(f64, u);
 }
 
 test "math.scalbn" {
diff --git a/std/math/signbit.zig b/std/math/signbit.zig
index 75b087f539..b8ccecfa1b 100644
--- a/std/math/signbit.zig
+++ b/std/math/signbit.zig
@@ -3,21 +3,21 @@ const assert = @import("../debug.zig").assert;
 
 pub fn signbit(x: var) -> bool {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(signbit32, x),
-        f64 => @inlineCall(signbit64, x),
+    return switch (T) {
+        f32 => signbit32(x),
+        f64 => signbit64(x),
         else => @compileError("signbit not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn signbit32(x: f32) -> bool {
     const bits = @bitCast(u32, x);
-    bits >> 31 != 0
+    return bits >> 31 != 0;
 }
 
 fn signbit64(x: f64) -> bool {
     const bits = @bitCast(u64, x);
-    bits >> 63 != 0
+    return bits >> 63 != 0;
 }
 
 test "math.signbit" {
diff --git a/std/math/sin.zig b/std/math/sin.zig
index 508bf5fae4..392bef1bc0 100644
--- a/std/math/sin.zig
+++ b/std/math/sin.zig
@@ -10,11 +10,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn sin(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(sin32, x),
-        f64 => @inlineCall(sin64, x),
+    return switch (T) {
+        f32 => sin32(x),
+        f64 => sin64(x),
         else => @compileError("sin not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // sin polynomial coefficients
@@ -75,18 +75,18 @@ fn sin32(x_: f32) -> f32 {
     const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
     const w = z * z;
 
-    const r = {
+    const r = r: {
         if (j == 1 or j == 2) {
-            1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))))
+            break :r 1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))));
         } else {
-            z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))))
+            break :r z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))));
         }
     };
 
     if (sign) {
-        -r
+        return -r;
     } else {
-        r
+        return r;
     }
 }
 
@@ -127,25 +127,25 @@ fn sin64(x_: f64) -> f64 {
     const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
     const w = z * z;
 
-    const r = {
+    const r = r: {
         if (j == 1 or j == 2) {
-            1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))))
+            break :r 1.0 - 0.5 * w + w * w * (C5 + w * (C4 + w * (C3 + w * (C2 + w * (C1 + w * C0)))));
         } else {
-            z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))))
+            break :r z + z * w * (S5 + w * (S4 + w * (S3 + w * (S2 + w * (S1 + w * S0)))));
         }
     };
 
     if (sign) {
-        -r
+        return -r;
     } else {
-        r
+        return r;
     }
 }
 
 test "math.sin" {
     assert(sin(f32(0.0)) == sin32(0.0));
     assert(sin(f64(0.0)) == sin64(0.0));
-    assert(comptime {math.sin(f64(2))} == math.sin(f64(2)));
+    assert(comptime (math.sin(f64(2))) == math.sin(f64(2)));
 }
 
 test "math.sin32" {
diff --git a/std/math/sinh.zig b/std/math/sinh.zig
index 32f67a49a8..4c575f10ec 100644
--- a/std/math/sinh.zig
+++ b/std/math/sinh.zig
@@ -11,11 +11,11 @@ const expo2 = @import("expo2.zig").expo2;
 
 pub fn sinh(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(sinh32, x),
-        f64 => @inlineCall(sinh64, x),
+    return switch (T) {
+        f32 => sinh32(x),
+        f64 => sinh64(x),
         else => @compileError("sinh not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // sinh(x) = (exp(x) - 1 / exp(x)) / 2
@@ -49,7 +49,7 @@ fn sinh32(x: f32) -> f32 {
     }
 
     // |x| > log(FLT_MAX) or nan
-    2 * h * expo2(ax)
+    return 2 * h * expo2(ax);
 }
 
 fn sinh64(x: f64) -> f64 {
@@ -83,7 +83,7 @@ fn sinh64(x: f64) -> f64 {
     }
 
     // |x| > log(DBL_MAX) or nan
-    2 * h * expo2(ax)
+    return 2 * h * expo2(ax);
 }
 
 test "math.sinh" {
diff --git a/std/math/sqrt.zig b/std/math/sqrt.zig
index bd1bb1fca4..263e616617 100644
--- a/std/math/sqrt.zig
+++ b/std/math/sqrt.zig
@@ -7,12 +7,34 @@
 
 const math = @import("index.zig");
 const assert = @import("../debug.zig").assert;
+const builtin = @import("builtin");
+const TypeId = builtin.TypeId;
 
-pub fn sqrt(x: var) -> @typeOf(x) {
+pub fn sqrt(x: var) -> (if (@typeId(@typeOf(x)) == TypeId.Int) @IntType(false, @typeOf(x).bit_count / 2) else @typeOf(x)) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(sqrt32, x),
-        f64 => @inlineCall(sqrt64, x),
+    switch (@typeId(T)) {
+        TypeId.FloatLiteral => {
+            return T(sqrt64(x));
+        },
+        TypeId.Float => {
+            return switch (T) {
+                f32 => sqrt32(x),
+                f64 => sqrt64(x),
+                else => @compileError("sqrt not implemented for " ++ @typeName(T)),
+            };
+        },
+        TypeId.IntLiteral => comptime {
+            if (x > @maxValue(u128)) {
+                @compileError("sqrt not implemented for comptime_int greater than 128 bits");
+            }
+            if (x < 0) {
+                @compileError("sqrt on negative number");
+            }
+            return T(sqrt_int(u128, x));
+        },
+        TypeId.Int => {
+            return sqrt_int(T, x);
+        },
         else => @compileError("sqrt not implemented for " ++ @typeName(T)),
     }
 }
@@ -42,7 +64,7 @@ fn sqrt32(x: f32) -> f32 {
         // subnormal
         var i: i32 = 0;
         while (ix & 0x00800000 == 0) : (i += 1) {
-            ix <<= 1
+            ix <<= 1;
         }
         m -= i - 1;
     }
@@ -90,7 +112,7 @@ fn sqrt32(x: f32) -> f32 {
 
     ix = (q >> 1) + 0x3f000000;
     ix += m << 23;
-    @bitCast(f32, ix)
+    return @bitCast(f32, ix);
 }
 
 // NOTE: The original code is full of implicit signed -> unsigned assumptions and u32 wraparound
@@ -131,7 +153,7 @@ fn sqrt64(x: f64) -> f64 {
         // subnormal
         var i: u32 = 0;
         while (ix0 & 0x00100000 == 0) : (i += 1) {
-            ix0 <<= 1
+            ix0 <<= 1;
         }
         m -= i32(i) - 1;
         ix0 |= ix1 >> u5(32 - i);
@@ -223,7 +245,7 @@ fn sqrt64(x: f64) -> f64 {
     iix0 = iix0 +% (m << 20);
 
     const uz = (u64(iix0) << 32) | ix1;
-    @bitCast(f64, uz)
+    return @bitCast(f64, uz);
 }
 
 test "math.sqrt" {
@@ -274,3 +296,35 @@ test "math.sqrt64.special" {
     assert(math.isNan(sqrt64(-1.0)));
     assert(math.isNan(sqrt64(math.nan(f64))));
 }
+
+fn sqrt_int(comptime T: type, value: T) -> @IntType(false, T.bit_count / 2) {
+    var op = value;
+    var res: T = 0;
+    var one: T = 1 << (T.bit_count - 2);
+
+    // "one" starts at the highest power of four <= than the argument.
+    while (one > op) {
+        one >>= 2;
+    }
+
+    while (one != 0) {
+        if (op >= res + one) {
+            op -= res + one;
+            res += 2 * one;
+        }
+        res >>= 1;
+        one >>= 2;
+    }
+
+    const ResultType = @IntType(false, T.bit_count / 2);
+    return ResultType(res);
+}
+
+test "math.sqrt_int" {
+    assert(sqrt_int(u32, 3) == 1);
+    assert(sqrt_int(u32, 4) == 2);
+    assert(sqrt_int(u32, 5) == 2);
+    assert(sqrt_int(u32, 8) == 2);
+    assert(sqrt_int(u32, 9) == 3);
+    assert(sqrt_int(u32, 10) == 3);
+}
diff --git a/std/math/tan.zig b/std/math/tan.zig
index 6ac30fa667..ff53a758b4 100644
--- a/std/math/tan.zig
+++ b/std/math/tan.zig
@@ -10,11 +10,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn tan(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(tan32, x),
-        f64 => @inlineCall(tan64, x),
+    return switch (T) {
+        f32 => tan32(x),
+        f64 => tan64(x),
         else => @compileError("tan not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 const Tp0 = -1.30936939181383777646E4;
@@ -62,11 +62,11 @@ fn tan32(x_: f32) -> f32 {
     const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
     const w = z * z;
 
-    var r = {
+    var r = r: {
         if (w > 1e-14) {
-            z + z * (w * ((Tp0 * w + Tp1) * w + Tp2) / ((((w + Tq1) * w + Tq2) * w + Tq3) * w + Tq4))
+            break :r z + z * (w * ((Tp0 * w + Tp1) * w + Tp2) / ((((w + Tq1) * w + Tq2) * w + Tq3) * w + Tq4));
         } else {
-            z
+            break :r z;
         }
     };
 
@@ -77,7 +77,7 @@ fn tan32(x_: f32) -> f32 {
         r = -r;
     }
 
-    r
+    return r;
 }
 
 fn tan64(x_: f64) -> f64 {
@@ -111,11 +111,11 @@ fn tan64(x_: f64) -> f64 {
     const z = ((x - y * pi4a) - y * pi4b) - y * pi4c;
     const w = z * z;
 
-    var r = {
+    var r = r: {
         if (w > 1e-14) {
-            z + z * (w * ((Tp0 * w + Tp1) * w + Tp2) / ((((w + Tq1) * w + Tq2) * w + Tq3) * w + Tq4))
+            break :r z + z * (w * ((Tp0 * w + Tp1) * w + Tp2) / ((((w + Tq1) * w + Tq2) * w + Tq3) * w + Tq4));
         } else {
-            z
+            break :r z;
         }
     };
 
@@ -126,7 +126,7 @@ fn tan64(x_: f64) -> f64 {
         r = -r;
     }
 
-    r
+    return r;
 }
 
 test "math.tan" {
diff --git a/std/math/tanh.zig b/std/math/tanh.zig
index d9704f458a..7715029361 100644
--- a/std/math/tanh.zig
+++ b/std/math/tanh.zig
@@ -11,11 +11,11 @@ const expo2 = @import("expo2.zig").expo2;
 
 pub fn tanh(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(tanh32, x),
-        f64 => @inlineCall(tanh64, x),
+    return switch (T) {
+        f32 => tanh32(x),
+        f64 => tanh64(x),
         else => @compileError("tanh not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 // tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
@@ -59,9 +59,9 @@ fn tanh32(x: f32) -> f32 {
     }
 
     if (u >> 31 != 0) {
-        -t
+        return -t;
     } else {
-        t
+        return t;
     }
 }
 
@@ -104,9 +104,9 @@ fn tanh64(x: f64) -> f64 {
     }
 
     if (u >> 63 != 0) {
-        -t
+        return -t;
     } else {
-        t
+        return t;
     }
 }
 
diff --git a/std/math/trunc.zig b/std/math/trunc.zig
index 937a8155e6..81eacb30ba 100644
--- a/std/math/trunc.zig
+++ b/std/math/trunc.zig
@@ -9,11 +9,11 @@ const assert = @import("../debug.zig").assert;
 
 pub fn trunc(x: var) -> @typeOf(x) {
     const T = @typeOf(x);
-    switch (T) {
-        f32 => @inlineCall(trunc32, x),
-        f64 => @inlineCall(trunc64, x),
+    return switch (T) {
+        f32 => trunc32(x),
+        f64 => trunc64(x),
         else => @compileError("trunc not implemented for " ++ @typeName(T)),
-    }
+    };
 }
 
 fn trunc32(x: f32) -> f32 {
@@ -30,10 +30,10 @@ fn trunc32(x: f32) -> f32 {
 
     m = u32(@maxValue(u32)) >> u5(e);
     if (u & m == 0) {
-        x
+        return x;
     } else {
         math.forceEval(x + 0x1p120);
-        @bitCast(f32, u & ~m)
+        return @bitCast(f32, u & ~m);
     }
 }
 
@@ -51,10 +51,10 @@ fn trunc64(x: f64) -> f64 {
 
     m = u64(@maxValue(u64)) >> u6(e);
     if (u & m == 0) {
-        x
+        return x;
     } else {
         math.forceEval(x + 0x1p120);
-        @bitCast(f64, u & ~m)
+        return @bitCast(f64, u & ~m);
     }
 }
 
diff --git a/std/mem.zig b/std/mem.zig
index 815e122812..7438eba70a 100644
--- a/std/mem.zig
+++ b/std/mem.zig
@@ -3,26 +3,31 @@ const assert = debug.assert;
 const math = @import("math/index.zig");
 const builtin = @import("builtin");
 
-pub const Cmp = math.Cmp;
+error OutOfMemory;
 
 pub const Allocator = struct {
     /// Allocate byte_count bytes and return them in a slice, with the
-    /// slicer's pointer aligned at least to alignment bytes.
-    allocFn: fn (self: &Allocator, byte_count: usize, alignment: usize) -> %[]u8,
+    /// slice's pointer aligned at least to alignment bytes.
+    /// The returned newly allocated memory is undefined.
+    allocFn: fn (self: &Allocator, byte_count: usize, alignment: u29) -> %[]u8,
 
-    /// Guaranteed: `old_mem.len` is the same as what was returned from allocFn or reallocFn.
-    /// Guaranteed: alignment >= alignment of old_mem.ptr
+    /// If `new_byte_count > old_mem.len`:
+    /// * `old_mem.len` is the same as what was returned from allocFn or reallocFn.
+    /// * alignment >= alignment of old_mem.ptr
     ///
-    /// If `new_byte_count` is less than or equal to `old_mem.len` this function must
-    /// return successfully.
-    reallocFn: fn (self: &Allocator, old_mem: []u8, new_byte_count: usize, alignment: usize) -> %[]u8,
+    /// If `new_byte_count <= old_mem.len`:
+    /// * this function must return successfully. 
+    /// * alignment <= alignment of old_mem.ptr
+    ///
+    /// The returned newly allocated memory is undefined.
+    reallocFn: fn (self: &Allocator, old_mem: []u8, new_byte_count: usize, alignment: u29) -> %[]u8,
 
     /// Guaranteed: `old_mem.len` is the same as what was returned from `allocFn` or `reallocFn`
     freeFn: fn (self: &Allocator, old_mem: []u8),
 
     fn create(self: &Allocator, comptime T: type) -> %&T {
         const slice = %return self.alloc(T, 1);
-        &slice[0]
+        return &slice[0];
     }
 
     fn destroy(self: &Allocator, ptr: var) {
@@ -30,28 +35,52 @@ pub const Allocator = struct {
     }
 
     fn alloc(self: &Allocator, comptime T: type, n: usize) -> %[]T {
+        return self.alignedAlloc(T, @alignOf(T), n);
+    }
+
+    fn alignedAlloc(self: &Allocator, comptime T: type, comptime alignment: u29,
+        n: usize) -> %[]align(alignment) T
+    {
         const byte_count = %return math.mul(usize, @sizeOf(T), n);
-        const byte_slice = %return self.allocFn(self, byte_count, @alignOf(T));
-        ([]T)(@alignCast(@alignOf(T), byte_slice))
+        const byte_slice = %return self.allocFn(self, byte_count, alignment);
+        // This loop should get optimized out in ReleaseFast mode
+        for (byte_slice) |*byte| {
+            *byte = undefined;
+        }
+        return ([]align(alignment) T)(@alignCast(alignment, byte_slice));
     }
 
     fn realloc(self: &Allocator, comptime T: type, old_mem: []T, n: usize) -> %[]T {
+        return self.alignedRealloc(T, @alignOf(T), @alignCast(@alignOf(T), old_mem), n);
+    }
+
+    fn alignedRealloc(self: &Allocator, comptime T: type, comptime alignment: u29,
+        old_mem: []align(alignment) T, n: usize) -> %[]align(alignment) T
+    {
         if (old_mem.len == 0) {
             return self.alloc(T, n);
         }
 
-        // Assert that old_mem.ptr is properly aligned.
-        const aligned_old_mem = @alignCast(@alignOf(T), old_mem);
-
+        const old_byte_slice = ([]u8)(old_mem);
         const byte_count = %return math.mul(usize, @sizeOf(T), n);
-        const byte_slice = %return self.reallocFn(self, ([]u8)(aligned_old_mem), byte_count, @alignOf(T));
-        return ([]T)(@alignCast(@alignOf(T), byte_slice));
+        const byte_slice = %return self.reallocFn(self, old_byte_slice, byte_count, alignment);
+        // This loop should get optimized out in ReleaseFast mode
+        for (byte_slice[old_byte_slice.len..]) |*byte| {
+            *byte = undefined;
+        }
+        return ([]T)(@alignCast(alignment, byte_slice));
     }
 
     /// Reallocate, but `n` must be less than or equal to `old_mem.len`.
     /// Unlike `realloc`, this function cannot fail.
     /// Shrinking to 0 is the same as calling `free`.
     fn shrink(self: &Allocator, comptime T: type, old_mem: []T, n: usize) -> []T {
+        return self.alignedShrink(T, @alignOf(T), @alignCast(@alignOf(T), old_mem), n);
+    }
+
+    fn alignedShrink(self: &Allocator, comptime T: type, comptime alignment: u29,
+        old_mem: []align(alignment) T, n: usize) -> []align(alignment) T
+    {
         if (n == 0) {
             self.free(old_mem);
             return old_mem[0..0];
@@ -59,15 +88,12 @@ pub const Allocator = struct {
 
         assert(n <= old_mem.len);
 
-        // Assert that old_mem.ptr is properly aligned.
-        const aligned_old_mem = @alignCast(@alignOf(T), old_mem);
-
         // Here we skip the overflow checking on the multiplication because
         // n <= old_mem.len and the multiplication didn't overflow for that operation.
         const byte_count = @sizeOf(T) * n;
 
-        const byte_slice = %%self.reallocFn(self, ([]u8)(aligned_old_mem), byte_count, @alignOf(T));
-        return ([]T)(@alignCast(@alignOf(T), byte_slice));
+        const byte_slice = %%self.reallocFn(self, ([]u8)(old_mem), byte_count, alignment);
+        return ([]align(alignment) T)(@alignCast(alignment, byte_slice));
     }
 
     fn free(self: &Allocator, memory: var) {
@@ -79,6 +105,51 @@ pub const Allocator = struct {
     }
 };
 
+pub const FixedBufferAllocator = struct {
+    allocator: Allocator,
+    end_index: usize,
+    buffer: []u8,
+
+    pub fn init(buffer: []u8) -> FixedBufferAllocator {
+        return FixedBufferAllocator {
+            .allocator = Allocator {
+                .allocFn = alloc,
+                .reallocFn = realloc,
+                .freeFn = free,
+            },
+            .buffer = buffer,
+            .end_index = 0,
+        };
+    }
+
+    fn alloc(allocator: &Allocator, n: usize, alignment: u29) -> %[]u8 {
+        const self = @fieldParentPtr(FixedBufferAllocator, "allocator", allocator);
+        const addr = @ptrToInt(&self.buffer[self.end_index]);
+        const rem = @rem(addr, alignment);
+        const march_forward_bytes = if (rem == 0) 0 else (alignment - rem);
+        const adjusted_index = self.end_index + march_forward_bytes;
+        const new_end_index = adjusted_index + n;
+        if (new_end_index > self.buffer.len) {
+            return error.OutOfMemory;
+        }
+        const result = self.buffer[adjusted_index .. new_end_index];
+        self.end_index = new_end_index;
+        return result;
+    }
+
+    fn realloc(allocator: &Allocator, old_mem: []u8, new_size: usize, alignment: u29) -> %[]u8 {
+        if (new_size <= old_mem.len) {
+            return old_mem[0..new_size];
+        } else {
+            const result = %return alloc(allocator, new_size, alignment);
+            copy(u8, result, old_mem);
+            return result;
+        }
+    }
+
+    fn free(allocator: &Allocator, bytes: []u8) { }
+};
+
 
 /// Copy all of source into dest at position 0.
 /// dest.len must be >= source.len.
@@ -95,17 +166,24 @@ pub fn set(comptime T: type, dest: []T, value: T) {
     for (dest) |*d| *d = value;
 }
 
-/// Return < 0, == 0, or > 0 if memory a is less than, equal to, or greater than,
-/// memory b, respectively.
-pub fn cmp(comptime T: type, a: []const T, b: []const T) -> Cmp {
-    const n = math.min(a.len, b.len);
+/// Returns true if lhs < rhs, false otherwise
+pub fn lessThan(comptime T: type, lhs: []const T, rhs: []const T) -> bool {
+    const n = math.min(lhs.len, rhs.len);
     var i: usize = 0;
     while (i < n) : (i += 1) {
-        if (a[i] == b[i]) continue;
-        return if (a[i] > b[i]) Cmp.Greater else if (a[i] < b[i]) Cmp.Less else Cmp.Equal;
+        if (lhs[i] == rhs[i]) continue;
+        return lhs[i] < rhs[i];
     }
 
-    return if (a.len > b.len) Cmp.Greater else if (a.len < b.len) Cmp.Less else Cmp.Equal;
+    return lhs.len < rhs.len;
+}
+
+test "mem.lessThan" {
+    assert(lessThan(u8, "abcd", "bee"));
+    assert(!lessThan(u8, "abc", "abc"));
+    assert(lessThan(u8, "abc", "abc0"));
+    assert(!lessThan(u8, "", ""));
+    assert(lessThan(u8, "", "a"));
 }
 
 /// Compares two slices and returns whether they are equal.
@@ -276,11 +354,11 @@ pub fn eql_slice_u8(a: []const u8, b: []const u8) -> bool {
 /// split("   abc def    ghi  ", " ")
 /// Will return slices for "abc", "def", "ghi", null, in that order.
 pub fn split(buffer: []const u8, split_bytes: []const u8) -> SplitIterator {
-    SplitIterator {
+    return SplitIterator {
         .index = 0,
         .buffer = buffer,
         .split_bytes = split_bytes,
-    }
+    };
 }
 
 test "mem.split" {
@@ -433,9 +511,8 @@ fn testWriteIntImpl() {
 
 pub fn min(comptime T: type, slice: []const T) -> T {
     var best = slice[0];
-    var i: usize = 1;
-    while (i < slice.len) : (i += 1) {
-        best = math.min(best, slice[i]);
+    for (slice[1..]) |item| {
+        best = math.min(best, item);
     }
     return best;
 }
@@ -446,9 +523,8 @@ test "mem.min" {
 
 pub fn max(comptime T: type, slice: []const T) -> T {
     var best = slice[0];
-    var i: usize = 1;
-    while (i < slice.len) : (i += 1) {
-        best = math.max(best, slice[i]);
+    for (slice[1..]) |item| {
+        best = math.max(best, item);
     }
     return best;
 }
@@ -456,3 +532,40 @@ pub fn max(comptime T: type, slice: []const T) -> T {
 test "mem.max" {
     assert(max(u8, "abcdefg") == 'g');
 }
+
+pub fn swap(comptime T: type, a: &T, b: &T) {
+    const tmp = *a;
+    *a = *b;
+    *b = tmp;
+}
+
+/// In-place order reversal of a slice
+pub fn reverse(comptime T: type, items: []T) {
+    var i: usize = 0;
+    const end = items.len / 2;
+    while (i < end) : (i += 1) {
+        swap(T, &items[i], &items[items.len - i - 1]);
+    }
+}
+
+test "std.mem.reverse" {
+    var arr = []i32{ 5, 3, 1, 2, 4 };
+    reverse(i32, arr[0..]);
+
+    assert(eql(i32, arr, []i32{ 4, 2, 1, 3, 5 }));
+}
+
+/// In-place rotation of the values in an array ([0 1 2 3] becomes [1 2 3 0] if we rotate by 1)
+/// Assumes 0 <= amount <= items.len
+pub fn rotate(comptime T: type, items: []T, amount: usize) {
+    reverse(T, items[0..amount]);
+    reverse(T, items[amount..]);
+    reverse(T, items);
+}
+
+test "std.mem.rotate" {
+    var arr = []i32{ 5, 3, 1, 2, 4 };
+    rotate(i32, arr[0..], 2);
+
+    assert(eql(i32, arr, []i32{ 1, 2, 4, 5, 3 }));
+}
diff --git a/std/net.zig b/std/net.zig
index 3551499c6b..a5fd4d6036 100644
--- a/std/net.zig
+++ b/std/net.zig
@@ -72,7 +72,7 @@ pub fn lookup(hostname: []const u8, out_addrs: []Address) -> %[]Address {
 //		if (family != AF_INET)
 //			buf[cnt++] = (struct address){ .family = AF_INET6, .addr = { [15] = 1 } };
 //
-        unreachable // TODO
+        unreachable; // TODO
     }
 
     // TODO
@@ -84,7 +84,7 @@ pub fn lookup(hostname: []const u8, out_addrs: []Address) -> %[]Address {
     //    else => {},
     //};
 
-    unreachable // TODO
+    unreachable; // TODO
 }
 
 pub fn connectAddr(addr: &Address, port: u16) -> %Connection {
@@ -96,23 +96,23 @@ pub fn connectAddr(addr: &Address, port: u16) -> %Connection {
     }
     const socket_fd = i32(socket_ret);
 
-    const connect_ret = if (addr.family == linux.AF_INET) {
+    const connect_ret = if (addr.family == linux.AF_INET) x: {
         var os_addr: linux.sockaddr_in = undefined;
         os_addr.family = addr.family;
         os_addr.port = endian.swapIfLe(u16, port);
         @memcpy((&u8)(&os_addr.addr), &addr.addr[0], 4);
         @memset(&os_addr.zero[0], 0, @sizeOf(@typeOf(os_addr.zero)));
-        linux.connect(socket_fd, (&linux.sockaddr)(&os_addr), @sizeOf(linux.sockaddr_in))
-    } else if (addr.family == linux.AF_INET6) {
+        break :x linux.connect(socket_fd, (&linux.sockaddr)(&os_addr), @sizeOf(linux.sockaddr_in));
+    } else if (addr.family == linux.AF_INET6) x: {
         var os_addr: linux.sockaddr_in6 = undefined;
         os_addr.family = addr.family;
         os_addr.port = endian.swapIfLe(u16, port);
         os_addr.flowinfo = 0;
         os_addr.scope_id = addr.scope_id;
         @memcpy(&os_addr.addr[0], &addr.addr[0], 16);
-        linux.connect(socket_fd, (&linux.sockaddr)(&os_addr), @sizeOf(linux.sockaddr_in6))
+        break :x linux.connect(socket_fd, (&linux.sockaddr)(&os_addr), @sizeOf(linux.sockaddr_in6));
     } else {
-        unreachable
+        unreachable;
     };
     const connect_err = linux.getErrno(connect_ret);
     if (connect_err > 0) {
@@ -165,13 +165,13 @@ pub fn parseIpLiteral(buf: []const u8) -> %Address {
 fn hexDigit(c: u8) -> u8 {
     // TODO use switch with range
     if ('0' <= c and c <= '9') {
-        c - '0'
+        return c - '0';
     } else if ('A' <= c and c <= 'Z') {
-        c - 'A' + 10
+        return c - 'A' + 10;
     } else if ('a' <= c and c <= 'z') {
-        c - 'a' + 10
+        return c - 'a' + 10;
     } else {
-        @maxValue(u8)
+        return @maxValue(u8);
     }
 }
 
diff --git a/std/os/child_process.zig b/std/os/child_process.zig
index 75a2dcf24d..6e86c99056 100644
--- a/std/os/child_process.zig
+++ b/std/os/child_process.zig
@@ -5,7 +5,6 @@ const os = std.os;
 const posix = os.posix;
 const windows = os.windows;
 const mem = std.mem;
-const Allocator = mem.Allocator;
 const debug = std.debug;
 const assert = debug.assert;
 const BufMap = std.BufMap;
@@ -74,7 +73,7 @@ pub const ChildProcess = struct {
 
     /// First argument in argv is the executable.
     /// On success must call deinit.
-    pub fn init(argv: []const []const u8, allocator: &Allocator) -> %&ChildProcess {
+    pub fn init(argv: []const []const u8, allocator: &mem.Allocator) -> %&ChildProcess {
         const child = %return allocator.create(ChildProcess);
         %defer allocator.destroy(child);
 
@@ -116,7 +115,7 @@ pub const ChildProcess = struct {
             return self.spawnWindows();
         } else {
             return self.spawnPosix();
-        };
+        }
     }
 
     pub fn spawnAndWait(self: &ChildProcess) -> %Term {
@@ -180,6 +179,46 @@ pub const ChildProcess = struct {
         }
     }
 
+    pub const ExecResult = struct {
+        term: os.ChildProcess.Term,
+        stdout: []u8,
+        stderr: []u8,
+    };
+
+    /// Spawns a child process, waits for it, collecting stdout and stderr, and then returns.
+    /// If it succeeds, the caller owns result.stdout and result.stderr memory.
+    pub fn exec(allocator: &mem.Allocator, argv: []const []const u8, cwd: ?[]const u8,
+        env_map: ?&const BufMap, max_output_size: usize) -> %ExecResult
+    {
+        const child = %%ChildProcess.init(argv, allocator);
+        defer child.deinit();
+
+        child.stdin_behavior = ChildProcess.StdIo.Ignore;
+        child.stdout_behavior = ChildProcess.StdIo.Pipe;
+        child.stderr_behavior = ChildProcess.StdIo.Pipe;
+        child.cwd = cwd;
+        child.env_map = env_map;
+
+        %return child.spawn();
+
+        var stdout = Buffer.initNull(allocator);
+        var stderr = Buffer.initNull(allocator);
+        defer Buffer.deinit(&stdout);
+        defer Buffer.deinit(&stderr);
+
+        var stdout_file_in_stream = io.FileInStream.init(&??child.stdout);
+        var stderr_file_in_stream = io.FileInStream.init(&??child.stderr);
+
+        %return stdout_file_in_stream.stream.readAllBuffer(&stdout, max_output_size);
+        %return stderr_file_in_stream.stream.readAllBuffer(&stderr, max_output_size);
+
+        return ExecResult {
+            .term = %return child.wait(),
+            .stdout = stdout.toOwnedSlice(),
+            .stderr = stderr.toOwnedSlice(),
+        };
+    }
+
     fn waitWindows(self: &ChildProcess) -> %Term {
         if (self.term) |term| {
             self.cleanupStreams();
@@ -210,12 +249,12 @@ pub const ChildProcess = struct {
     fn waitUnwrappedWindows(self: &ChildProcess) -> %void {
         const result = os.windowsWaitSingle(self.handle, windows.INFINITE);
 
-        self.term = (%Term)({
+        self.term = (%Term)(x: {
             var exit_code: windows.DWORD = undefined;
             if (windows.GetExitCodeProcess(self.handle, &exit_code) == 0) {
-                Term { .Unknown = 0 }
+                break :x Term { .Unknown = 0 };
             } else {
-                Term { .Exited = @bitCast(i32, exit_code)}
+                break :x Term { .Exited = @bitCast(i32, exit_code)};
             }
         });
 
@@ -261,7 +300,7 @@ pub const ChildProcess = struct {
         defer {
             os.close(self.err_pipe[0]);
             os.close(self.err_pipe[1]);
-        };
+        }
 
         // Write @maxValue(ErrInt) to the write end of the err_pipe. This is after
         // waitpid, so this write is guaranteed to be after the child
@@ -280,15 +319,15 @@ pub const ChildProcess = struct {
     }
 
     fn statusToTerm(status: i32) -> Term {
-        return if (posix.WIFEXITED(status)) {
+        return if (posix.WIFEXITED(status))
             Term { .Exited = posix.WEXITSTATUS(status) }
-        } else if (posix.WIFSIGNALED(status)) {
+        else if (posix.WIFSIGNALED(status))
             Term { .Signal = posix.WTERMSIG(status) }
-        } else if (posix.WIFSTOPPED(status)) {
+        else if (posix.WIFSTOPPED(status))
             Term { .Stopped = posix.WSTOPSIG(status) }
-        } else {
+        else
             Term { .Unknown = status }
-        };
+        ;
     }
 
     fn spawnPosix(self: &ChildProcess) -> %void {
@@ -305,22 +344,22 @@ pub const ChildProcess = struct {
         %defer if (self.stderr_behavior == StdIo.Pipe) { destroyPipe(stderr_pipe); };
 
         const any_ignore = (self.stdin_behavior == StdIo.Ignore or self.stdout_behavior == StdIo.Ignore or self.stderr_behavior == StdIo.Ignore);
-        const dev_null_fd = if (any_ignore) {
+        const dev_null_fd = if (any_ignore)
             %return os.posixOpen("/dev/null", posix.O_RDWR, 0, null)
-        } else {
+        else
             undefined
-        };
-        defer { if (any_ignore) os.close(dev_null_fd); };
+        ;
+        defer { if (any_ignore) os.close(dev_null_fd); }
 
         var env_map_owned: BufMap = undefined;
         var we_own_env_map: bool = undefined;
-        const env_map = if (self.env_map) |env_map| {
+        const env_map = if (self.env_map) |env_map| x: {
             we_own_env_map = false;
-            env_map
-        } else {
+            break :x env_map;
+        } else x: {
             we_own_env_map = true;
             env_map_owned = %return os.getEnvMap(self.allocator);
-            &env_map_owned
+            break :x &env_map_owned;
         };
         defer { if (we_own_env_map) env_map_owned.deinit(); }
 
@@ -411,13 +450,13 @@ pub const ChildProcess = struct {
             self.stdout_behavior == StdIo.Ignore or
             self.stderr_behavior == StdIo.Ignore);
 
-        const nul_handle = if (any_ignore) {
+        const nul_handle = if (any_ignore)
             %return os.windowsOpen("NUL", windows.GENERIC_READ, windows.FILE_SHARE_READ,
                 windows.OPEN_EXISTING, windows.FILE_ATTRIBUTE_NORMAL, null)
-        } else {
+        else
             undefined
-        };
-        defer { if (any_ignore) os.close(nul_handle); };
+        ;
+        defer { if (any_ignore) os.close(nul_handle); }
         if (any_ignore) {
             %return windowsSetHandleInfo(nul_handle, windows.HANDLE_FLAG_INHERIT, 0);
         }
@@ -503,30 +542,32 @@ pub const ChildProcess = struct {
         };
         var piProcInfo: windows.PROCESS_INFORMATION = undefined;
 
-        const cwd_slice = if (self.cwd) |cwd| {
+        const cwd_slice = if (self.cwd) |cwd|
             %return cstr.addNullByte(self.allocator, cwd)
-        } else {
+        else
             null
-        };
+        ;
         defer if (cwd_slice) |cwd| self.allocator.free(cwd);
         const cwd_ptr = if (cwd_slice) |cwd| cwd.ptr else null;
 
-        const maybe_envp_buf = if (self.env_map) |env_map| {
+        const maybe_envp_buf = if (self.env_map) |env_map|
             %return os.createWindowsEnvBlock(self.allocator, env_map)
-        } else {
+        else
             null
-        };
+        ;
         defer if (maybe_envp_buf) |envp_buf| self.allocator.free(envp_buf);
         const envp_ptr = if (maybe_envp_buf) |envp_buf| envp_buf.ptr else null;
 
         // the cwd set in ChildProcess is in effect when choosing the executable path
         // to match posix semantics
-        const app_name = if (self.cwd) |cwd| {
-            const resolved = %return os.path.resolve(self.allocator, cwd, self.argv[0]);
-            defer self.allocator.free(resolved);
-            %return cstr.addNullByte(self.allocator, resolved)
-        } else {
-            %return cstr.addNullByte(self.allocator, self.argv[0])
+        const app_name = x: {
+            if (self.cwd) |cwd| {
+                const resolved = %return os.path.resolve(self.allocator, cwd, self.argv[0]);
+                defer self.allocator.free(resolved);
+                break :x %return cstr.addNullByte(self.allocator, resolved);
+            } else {
+                break :x %return cstr.addNullByte(self.allocator, self.argv[0]);
+            }
         };
         defer self.allocator.free(app_name);
 
@@ -589,6 +630,7 @@ pub const ChildProcess = struct {
             StdIo.Ignore => %return os.posixDup2(dev_null_fd, std_fileno),
         }
     }
+
 };
 
 fn windowsCreateProcess(app_name: &u8, cmd_line: &u8, envp_ptr: ?&u8, cwd_ptr: ?&u8,
@@ -611,7 +653,7 @@ fn windowsCreateProcess(app_name: &u8, cmd_line: &u8, envp_ptr: ?&u8, cwd_ptr: ?
 
 /// Caller must dealloc.
 /// Guarantees a null byte at result[result.len].
-fn windowsCreateCommandLine(allocator: &Allocator, argv: []const []const u8) -> %[]u8 {
+fn windowsCreateCommandLine(allocator: &mem.Allocator, argv: []const []const u8) -> %[]u8 {
     var buf = %return Buffer.initSize(allocator, 0);
     defer buf.deinit();
 
@@ -701,7 +743,7 @@ fn makePipe() -> %[2]i32 {
         return switch (err) {
             posix.EMFILE, posix.ENFILE => error.SystemResources,
             else => os.unexpectedErrorPosix(err),
-        }
+        };
     }
     return fds;
 }
@@ -760,10 +802,10 @@ fn handleTerm(pid: i32, status: i32) {
     }
 }
 
-const sigchld_set = {
+const sigchld_set = x: {
     var signal_set = posix.empty_sigset;
     posix.sigaddset(&signal_set, posix.SIGCHLD);
-    signal_set
+    break :x signal_set;
 };
 
 fn block_SIGCHLD() {
diff --git a/std/os/darwin.zig b/std/os/darwin.zig
index 9d80c64006..e230826b7e 100644
--- a/std/os/darwin.zig
+++ b/std/os/darwin.zig
@@ -97,63 +97,63 @@ pub const SIGINFO   = 29; /// information request
 pub const SIGUSR1   = 30; /// user defined signal 1
 pub const SIGUSR2   = 31; /// user defined signal 2
 
-fn wstatus(x: i32) -> i32 { x & 0o177 }
+fn wstatus(x: i32) -> i32 { return x & 0o177; }
 const wstopped = 0o177;
-pub fn WEXITSTATUS(x: i32) -> i32 { x >> 8 }
-pub fn WTERMSIG(x: i32) -> i32 { wstatus(x) }
-pub fn WSTOPSIG(x: i32) -> i32 { x >> 8 }
-pub fn WIFEXITED(x: i32) -> bool { wstatus(x) == 0 }
-pub fn WIFSTOPPED(x: i32) -> bool { wstatus(x) == wstopped and WSTOPSIG(x) != 0x13 }
-pub fn WIFSIGNALED(x: i32) -> bool { wstatus(x) != wstopped and wstatus(x) != 0 }
+pub fn WEXITSTATUS(x: i32) -> i32 { return x >> 8; }
+pub fn WTERMSIG(x: i32) -> i32 { return wstatus(x); }
+pub fn WSTOPSIG(x: i32) -> i32 { return x >> 8; }
+pub fn WIFEXITED(x: i32) -> bool { return wstatus(x) == 0; }
+pub fn WIFSTOPPED(x: i32) -> bool { return wstatus(x) == wstopped and WSTOPSIG(x) != 0x13; }
+pub fn WIFSIGNALED(x: i32) -> bool { return wstatus(x) != wstopped and wstatus(x) != 0; }
 
 /// Get the errno from a syscall return value, or 0 for no error.
 pub fn getErrno(r: usize) -> usize {
     const signed_r = @bitCast(isize, r);
-    if (signed_r > -4096 and signed_r < 0) usize(-signed_r) else 0
+    return if (signed_r > -4096 and signed_r < 0) usize(-signed_r) else 0;
 }
 
 pub fn close(fd: i32) -> usize {
-    errnoWrap(c.close(fd))
+    return errnoWrap(c.close(fd));
 }
 
 pub fn abort() -> noreturn {
-    c.abort()
+    c.abort();
 }
 
 pub fn exit(code: i32) -> noreturn {
-    c.exit(code)
+    c.exit(code);
 }
 
 pub fn isatty(fd: i32) -> bool {
-    c.isatty(fd) != 0
+    return c.isatty(fd) != 0;
 }
 
 pub fn fstat(fd: i32, buf: &c.Stat) -> usize {
-    errnoWrap(c.@"fstat$INODE64"(fd, buf))
+    return errnoWrap(c.@"fstat$INODE64"(fd, buf));
 }
 
 pub fn lseek(fd: i32, offset: isize, whence: c_int) -> usize {
-    errnoWrap(c.lseek(fd, offset, whence))
+    return errnoWrap(c.lseek(fd, offset, whence));
 }
 
 pub fn open(path: &const u8, flags: u32, mode: usize) -> usize {
-    errnoWrap(c.open(path, @bitCast(c_int, flags), mode))
+    return errnoWrap(c.open(path, @bitCast(c_int, flags), mode));
 }
 
 pub fn raise(sig: i32) -> usize {
-    errnoWrap(c.raise(sig))
+    return errnoWrap(c.raise(sig));
 }
 
 pub fn read(fd: i32, buf: &u8, nbyte: usize) -> usize {
-    errnoWrap(c.read(fd, @ptrCast(&c_void, buf), nbyte))
+    return errnoWrap(c.read(fd, @ptrCast(&c_void, buf), nbyte));
 }
 
 pub fn stat(noalias path: &const u8, noalias buf: &stat) -> usize {
-    errnoWrap(c.stat(path, buf))
+    return errnoWrap(c.stat(path, buf));
 }
 
 pub fn write(fd: i32, buf: &const u8, nbyte: usize) -> usize {
-    errnoWrap(c.write(fd, @ptrCast(&const c_void, buf), nbyte))
+    return errnoWrap(c.write(fd, @ptrCast(&const c_void, buf), nbyte));
 }
 
 pub fn mmap(address: ?&u8, length: usize, prot: usize, flags: usize, fd: i32,
@@ -166,79 +166,79 @@ pub fn mmap(address: ?&u8, length: usize, prot: usize, flags: usize, fd: i32,
 }
 
 pub fn munmap(address: &u8, length: usize) -> usize {
-    errnoWrap(c.munmap(@ptrCast(&c_void, address), length))
+    return errnoWrap(c.munmap(@ptrCast(&c_void, address), length));
 }
 
 pub fn unlink(path: &const u8) -> usize {
-    errnoWrap(c.unlink(path))
+    return errnoWrap(c.unlink(path));
 }
 
 pub fn getcwd(buf: &u8, size: usize) -> usize {
-    if (c.getcwd(buf, size) == null) @bitCast(usize, -isize(*c._errno())) else 0
+    return if (c.getcwd(buf, size) == null) @bitCast(usize, -isize(*c._errno())) else 0;
 }
 
 pub fn waitpid(pid: i32, status: &i32, options: u32) -> usize {
     comptime assert(i32.bit_count == c_int.bit_count);
-    errnoWrap(c.waitpid(pid, @ptrCast(&c_int, status), @bitCast(c_int, options)))
+    return errnoWrap(c.waitpid(pid, @ptrCast(&c_int, status), @bitCast(c_int, options)));
 }
 
 pub fn fork() -> usize {
-    errnoWrap(c.fork())
+    return errnoWrap(c.fork());
 }
 
 pub fn pipe(fds: &[2]i32) -> usize {
     comptime assert(i32.bit_count == c_int.bit_count);
-    errnoWrap(c.pipe(@ptrCast(&c_int, fds)))
+    return errnoWrap(c.pipe(@ptrCast(&c_int, fds)));
 }
 
 pub fn mkdir(path: &const u8, mode: u32) -> usize {
-    errnoWrap(c.mkdir(path, mode))
+    return errnoWrap(c.mkdir(path, mode));
 }
 
 pub fn symlink(existing: &const u8, new: &const u8) -> usize {
-    errnoWrap(c.symlink(existing, new))
+    return errnoWrap(c.symlink(existing, new));
 }
 
 pub fn rename(old: &const u8, new: &const u8) -> usize {
-    errnoWrap(c.rename(old, new))
+    return errnoWrap(c.rename(old, new));
 }
 
 pub fn chdir(path: &const u8) -> usize {
-    errnoWrap(c.chdir(path))
+    return errnoWrap(c.chdir(path));
 }
 
 pub fn execve(path: &const u8, argv: &const ?&const u8, envp: &const ?&const u8)
     -> usize
 {
-    errnoWrap(c.execve(path, argv, envp))
+    return errnoWrap(c.execve(path, argv, envp));
 }
 
 pub fn dup2(old: i32, new: i32) -> usize {
-    errnoWrap(c.dup2(old, new))
+    return errnoWrap(c.dup2(old, new));
 }
 
 pub fn readlink(noalias path: &const u8, noalias buf_ptr: &u8, buf_len: usize) -> usize {
-    errnoWrap(c.readlink(path, buf_ptr, buf_len))
+    return errnoWrap(c.readlink(path, buf_ptr, buf_len));
 }
 
 pub fn nanosleep(req: &const timespec, rem: ?&timespec) -> usize {
-    errnoWrap(c.nanosleep(req, rem))
+    return errnoWrap(c.nanosleep(req, rem));
 }
 
 pub fn realpath(noalias filename: &const u8, noalias resolved_name: &u8) -> usize {
-    if (c.realpath(filename, resolved_name) == null) @bitCast(usize, -isize(*c._errno())) else 0
+    return if (c.realpath(filename, resolved_name) == null) @bitCast(usize, -isize(*c._errno())) else 0;
 }
 
 pub fn setreuid(ruid: u32, euid: u32) -> usize {
-    errnoWrap(c.setreuid(ruid, euid))
+    return errnoWrap(c.setreuid(ruid, euid));
 }
 
 pub fn setregid(rgid: u32, egid: u32) -> usize {
-    errnoWrap(c.setregid(rgid, egid))
+    return errnoWrap(c.setregid(rgid, egid));
 }
 
 pub fn sigprocmask(flags: u32, noalias set: &const sigset_t, noalias oldset: ?&sigset_t) -> usize {
-    errnoWrap(c.sigprocmask(@bitCast(c_int, flags), set, oldset))
+    return errnoWrap(c.sigprocmask(@bitCast(c_int, flags), set, oldset));
 }
 
 pub fn sigaction(sig: u5, noalias act: &const Sigaction, noalias oact: ?&Sigaction) -> usize {
@@ -285,9 +285,5 @@ pub fn sigaddset(set: &sigset_t, signo: u5) {
 /// that the kernel represents it to libc. Errno was a mistake, let's make
 /// it go away forever.
 fn errnoWrap(value: isize) -> usize {
-    @bitCast(usize, if (value == -1) {
-        -isize(*c._errno())
-    } else {
-        value
-    })
+    return @bitCast(usize, if (value == -1) -isize(*c._errno()) else value);
 }
diff --git a/std/os/index.zig b/std/os/index.zig
index 361750aedc..8e79eda40b 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -84,7 +84,7 @@ pub fn getRandomBytes(buf: []u8) -> %void {
                     posix.EFAULT => unreachable,
                     posix.EINTR  => continue,
                     else         => unexpectedErrorPosix(err),
-                }
+                };
             }
             return;
         },
@@ -151,18 +151,17 @@ pub coldcc fn exit(status: i32) -> noreturn {
     }
     switch (builtin.os) {
         Os.linux, Os.darwin, Os.macosx, Os.ios => {
-            posix.exit(status)
+            posix.exit(status);
         },
         Os.windows => {
             // Map a possibly negative status code to a non-negative status for the systems default
             // integer width.
-            const p_status = if (@sizeOf(c_uint) < @sizeOf(u32)) {
+            const p_status = if (@sizeOf(c_uint) < @sizeOf(u32))
                 @truncate(c_uint, @bitCast(u32, status))
-            } else {
-                c_uint(@bitCast(u32, status))
-            };
+            else
+                c_uint(@bitCast(u32, status));
 
-            windows.ExitProcess(p_status)
+            windows.ExitProcess(p_status);
         },
         else => @compileError("Unsupported OS"),
     }
@@ -289,7 +288,7 @@ pub fn posixOpen(file_path: []const u8, flags: u32, perm: usize, allocator: ?&Al
                 posix.EPERM => error.AccessDenied,
                 posix.EEXIST => error.PathAlreadyExists,
                 else => unexpectedErrorPosix(err),
-            }
+            };
         }
         return i32(result);
     }
@@ -680,7 +679,7 @@ pub fn deleteFileWindows(allocator: &Allocator, file_path: []const u8) -> %void
             windows.ERROR.ACCESS_DENIED => error.AccessDenied,
             windows.ERROR.FILENAME_EXCED_RANGE, windows.ERROR.INVALID_PARAMETER => error.NameTooLong,
             else => unexpectedErrorWindows(err),
-        }
+        };
     }
 }
 
@@ -902,40 +901,41 @@ pub fn deleteDir(allocator: &Allocator, dir_path: []const u8) -> %void {
 /// this function recursively removes its entries and then tries again.
 // TODO non-recursive implementation
 pub fn deleteTree(allocator: &Allocator, full_path: []const u8) -> %void {
-start_over:
-    // First, try deleting the item as a file. This way we don't follow sym links.
-    if (deleteFile(allocator, full_path)) {
-        return;
-    } else |err| {
-        if (err == error.FileNotFound)
+    start_over: while (true) {
+        // First, try deleting the item as a file. This way we don't follow sym links.
+        if (deleteFile(allocator, full_path)) {
             return;
-        if (err != error.IsDir)
-            return err;
-    }
-    {
-        var dir = Dir.open(allocator, full_path) %% |err| {
+        } else |err| {
             if (err == error.FileNotFound)
                 return;
-            if (err == error.NotDir)
-                goto start_over;
-            return err;
-        };
-        defer dir.close();
+            if (err != error.IsDir)
+                return err;
+        }
+        {
+            var dir = Dir.open(allocator, full_path) %% |err| {
+                if (err == error.FileNotFound)
+                    return;
+                if (err == error.NotDir)
+                    continue :start_over;
+                return err;
+            };
+            defer dir.close();
 
-        var full_entry_buf = ArrayList(u8).init(allocator);
-        defer full_entry_buf.deinit();
+            var full_entry_buf = ArrayList(u8).init(allocator);
+            defer full_entry_buf.deinit();
 
-        while (%return dir.next()) |entry| {
-            %return full_entry_buf.resize(full_path.len + entry.name.len + 1);
-            const full_entry_path = full_entry_buf.toSlice();
-            mem.copy(u8, full_entry_path, full_path);
-            full_entry_path[full_path.len] = '/';
-            mem.copy(u8, full_entry_path[full_path.len + 1..], entry.name);
+            while (%return dir.next()) |entry| {
+                %return full_entry_buf.resize(full_path.len + entry.name.len + 1);
+                const full_entry_path = full_entry_buf.toSlice();
+                mem.copy(u8, full_entry_path, full_path);
+                full_entry_path[full_path.len] = '/';
+                mem.copy(u8, full_entry_path[full_path.len + 1..], entry.name);
 
-            %return deleteTree(allocator, full_entry_path);
+                %return deleteTree(allocator, full_entry_path);
+            }
         }
+        return deleteDir(allocator, full_path);
     }
-    return deleteDir(allocator, full_path);
 }
 
 pub const Dir = struct {
@@ -988,58 +988,59 @@ pub const Dir = struct {
     /// Memory such as file names referenced in this returned entry becomes invalid
     /// with subsequent calls to next, as well as when this ::Dir is deinitialized.
     pub fn next(self: &Dir) -> %?Entry {
-    start_over:
-        if (self.index >= self.end_index) {
-            if (self.buf.len == 0) {
-                self.buf = %return self.allocator.alloc(u8, page_size);
-            }
+        start_over: while (true) {
+            if (self.index >= self.end_index) {
+                if (self.buf.len == 0) {
+                    self.buf = %return self.allocator.alloc(u8, page_size);
+                }
 
-            while (true) {
-                const result = posix.getdents(self.fd, self.buf.ptr, self.buf.len);
-                const err = linux.getErrno(result);
-                if (err > 0) {
-                    switch (err) {
-                        posix.EBADF, posix.EFAULT, posix.ENOTDIR => unreachable,
-                        posix.EINVAL => {
-                            self.buf = %return self.allocator.realloc(u8, self.buf, self.buf.len * 2);
-                            continue;
-                        },
-                        else => return unexpectedErrorPosix(err),
-                    };
+                while (true) {
+                    const result = posix.getdents(self.fd, self.buf.ptr, self.buf.len);
+                    const err = linux.getErrno(result);
+                    if (err > 0) {
+                        switch (err) {
+                            posix.EBADF, posix.EFAULT, posix.ENOTDIR => unreachable,
+                            posix.EINVAL => {
+                                self.buf = %return self.allocator.realloc(u8, self.buf, self.buf.len * 2);
+                                continue;
+                            },
+                            else => return unexpectedErrorPosix(err),
+                        }
+                    }
+                    if (result == 0)
+                        return null;
+                    self.index = 0;
+                    self.end_index = result;
+                    break;
                 }
-                if (result == 0)
-                    return null;
-                self.index = 0;
-                self.end_index = result;
-                break;
             }
-        }
-        const linux_entry = @ptrCast(& align(1) LinuxEntry, &self.buf[self.index]);
-        const next_index = self.index + linux_entry.d_reclen;
-        self.index = next_index;
+            const linux_entry = @ptrCast(& align(1) LinuxEntry, &self.buf[self.index]);
+            const next_index = self.index + linux_entry.d_reclen;
+            self.index = next_index;
 
-        const name = cstr.toSlice(&linux_entry.d_name);
+            const name = cstr.toSlice(&linux_entry.d_name);
 
-        // skip . and .. entries
-        if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..")) {
-            goto start_over;
-        }
+            // skip . and .. entries
+            if (mem.eql(u8, name, ".") or mem.eql(u8, name, "..")) {
+                continue :start_over;
+            }
 
-        const type_char = self.buf[next_index - 1];
-        const entry_kind = switch (type_char) {
-            posix.DT_BLK => Entry.Kind.BlockDevice,
-            posix.DT_CHR => Entry.Kind.CharacterDevice,
-            posix.DT_DIR => Entry.Kind.Directory,
-            posix.DT_FIFO => Entry.Kind.NamedPipe,
-            posix.DT_LNK => Entry.Kind.SymLink,
-            posix.DT_REG => Entry.Kind.File,
-            posix.DT_SOCK => Entry.Kind.UnixDomainSocket,
-            else => Entry.Kind.Unknown,
-        };
-        return Entry {
-            .name = name,
-            .kind = entry_kind,
-        };
+            const type_char = self.buf[next_index - 1];
+            const entry_kind = switch (type_char) {
+                posix.DT_BLK => Entry.Kind.BlockDevice,
+                posix.DT_CHR => Entry.Kind.CharacterDevice,
+                posix.DT_DIR => Entry.Kind.Directory,
+                posix.DT_FIFO => Entry.Kind.NamedPipe,
+                posix.DT_LNK => Entry.Kind.SymLink,
+                posix.DT_REG => Entry.Kind.File,
+                posix.DT_SOCK => Entry.Kind.UnixDomainSocket,
+                else => Entry.Kind.Unknown,
+            };
+            return Entry {
+                .name = name,
+                .kind = entry_kind,
+            };
+        }
     }
 };
 
@@ -1422,6 +1423,54 @@ pub fn args() -> ArgIterator {
     return ArgIterator.init();
 }
 
+/// Caller must call freeArgs on result.
+pub fn argsAlloc(allocator: &mem.Allocator) -> %[]const []u8 {
+    // TODO refactor to only make 1 allocation.
+    var it = args();
+    var contents = %return Buffer.initSize(allocator, 0);
+    defer contents.deinit();
+
+    var slice_list = ArrayList(usize).init(allocator);
+    defer slice_list.deinit();
+
+    while (it.next(allocator)) |arg_or_err| {
+        const arg = %return arg_or_err;
+        defer allocator.free(arg);
+        %return contents.append(arg);
+        %return slice_list.append(arg.len);
+    }
+
+    const contents_slice = contents.toSliceConst();
+    const slice_sizes = slice_list.toSliceConst();
+    const slice_list_bytes = %return math.mul(usize, @sizeOf([]u8), slice_sizes.len);
+    const total_bytes = %return math.add(usize, slice_list_bytes, contents_slice.len);
+    const buf = %return allocator.alignedAlloc(u8, @alignOf([]u8), total_bytes);
+    %defer allocator.free(buf);
+
+    const result_slice_list = ([][]u8)(buf[0..slice_list_bytes]);
+    const result_contents = buf[slice_list_bytes..];
+    mem.copy(u8, result_contents, contents_slice);
+
+    var contents_index: usize = 0;
+    for (slice_sizes) |len, i| {
+        const new_index = contents_index + len;
+        result_slice_list[i] = result_contents[contents_index..new_index];
+        contents_index = new_index;
+    }
+
+    return result_slice_list;
+}
+
+pub fn argsFree(allocator: &mem.Allocator, args_alloc: []const []u8) {
+    var total_bytes: usize = 0;
+    for (args_alloc) |arg| {
+        total_bytes += @sizeOf([]u8) + arg.len;
+    }
+    const unaligned_allocated_buf = @ptrCast(&u8, args_alloc.ptr)[0..total_bytes];
+    const aligned_allocated_buf = @alignCast(@alignOf([]u8), unaligned_allocated_buf);
+    return allocator.free(aligned_allocated_buf);
+}
+
 test "windows arg parsing" {
     testWindowsCmdLine(c"a   b\tc d", [][]const u8{"a", "b", "c", "d"});
     testWindowsCmdLine(c"\"abc\" d e", [][]const u8{"abc", "d", "e"});
@@ -1494,6 +1543,39 @@ pub fn openSelfExe() -> %io.File {
     }
 }
 
+/// Get the directory path that contains the current executable.
+/// Caller owns returned memory.
+pub fn selfExeDirPath(allocator: &mem.Allocator) -> %[]u8 {
+    switch (builtin.os) {
+        Os.linux => {
+            // If the currently executing binary has been deleted,
+            // the file path looks something like `/a/b/c/exe (deleted)`
+            // This path cannot be opened, but it's valid for determining the directory
+            // the executable was in when it was run.
+            const full_exe_path = %return readLink(allocator, "/proc/self/exe");
+            %defer allocator.free(full_exe_path);
+            const dir = path.dirname(full_exe_path);
+            return allocator.shrink(u8, full_exe_path, dir.len);
+        },
+        Os.windows => {
+            @panic("TODO windows std.os.selfExeDirPath");
+            //buf_resize(out_path, 256);
+            //for (;;) {
+            //    DWORD copied_amt = GetModuleFileName(nullptr, buf_ptr(out_path), buf_len(out_path));
+            //    if (copied_amt <= 0) {
+            //        return ErrorFileNotFound;
+            //    }
+            //    if (copied_amt < buf_len(out_path)) {
+            //        buf_resize(out_path, copied_amt);
+            //        return 0;
+            //    }
+            //    buf_resize(out_path, buf_len(out_path) * 2);
+            //}
+        },
+        else => @compileError("unimplemented: std.os.selfExeDirPath for " ++ @tagName(builtin.os)),
+    }
+}
+
 pub fn isTty(handle: FileHandle) -> bool {
     if (is_windows) {
         return windows_util.windowsIsTty(handle);
diff --git a/std/os/linux.zig b/std/os/linux.zig
index 5951f9d7bc..f9baa43098 100644
--- a/std/os/linux.zig
+++ b/std/os/linux.zig
@@ -367,14 +367,14 @@ pub const TFD_CLOEXEC = O_CLOEXEC;
 pub const TFD_TIMER_ABSTIME = 1;
 pub const TFD_TIMER_CANCEL_ON_SET = (1 << 1);
 
-fn unsigned(s: i32) -> u32 { @bitCast(u32, s) }
-fn signed(s: u32) -> i32 { @bitCast(i32, s) }
-pub fn WEXITSTATUS(s: i32) -> i32 { signed((unsigned(s) & 0xff00) >> 8) }
-pub fn WTERMSIG(s: i32) -> i32 { signed(unsigned(s) & 0x7f) }
-pub fn WSTOPSIG(s: i32) -> i32 { WEXITSTATUS(s) }
-pub fn WIFEXITED(s: i32) -> bool { WTERMSIG(s) == 0 }
-pub fn WIFSTOPPED(s: i32) -> bool { (u16)(((unsigned(s)&0xffff)*%0x10001)>>8) > 0x7f00 }
-pub fn WIFSIGNALED(s: i32) -> bool { (unsigned(s)&0xffff)-%1 < 0xff }
+fn unsigned(s: i32) -> u32 { return @bitCast(u32, s); }
+fn signed(s: u32) -> i32 { return @bitCast(i32, s); }
+pub fn WEXITSTATUS(s: i32) -> i32 { return signed((unsigned(s) & 0xff00) >> 8); }
+pub fn WTERMSIG(s: i32) -> i32 { return signed(unsigned(s) & 0x7f); }
+pub fn WSTOPSIG(s: i32) -> i32 { return WEXITSTATUS(s); }
+pub fn WIFEXITED(s: i32) -> bool { return WTERMSIG(s) == 0; }
+pub fn WIFSTOPPED(s: i32) -> bool { return (u16)(((unsigned(s)&0xffff)*%0x10001)>>8) > 0x7f00; }
+pub fn WIFSIGNALED(s: i32) -> bool { return (unsigned(s)&0xffff)-%1 < 0xff; }
 
 
 pub const winsize = extern struct {
@@ -387,31 +387,31 @@ pub const winsize = extern struct {
 /// Get the errno from a syscall return value, or 0 for no error.
 pub fn getErrno(r: usize) -> usize {
     const signed_r = @bitCast(isize, r);
-    if (signed_r > -4096 and signed_r < 0) usize(-signed_r) else 0
+    return if (signed_r > -4096 and signed_r < 0) usize(-signed_r) else 0;
 }
 
 pub fn dup2(old: i32, new: i32) -> usize {
-    arch.syscall2(arch.SYS_dup2, usize(old), usize(new))
+    return arch.syscall2(arch.SYS_dup2, usize(old), usize(new));
 }
 
 pub fn chdir(path: &const u8) -> usize {
-    arch.syscall1(arch.SYS_chdir, @ptrToInt(path))
+    return arch.syscall1(arch.SYS_chdir, @ptrToInt(path));
 }
 
 pub fn execve(path: &const u8, argv: &const ?&const u8, envp: &const ?&const u8) -> usize {
-    arch.syscall3(arch.SYS_execve, @ptrToInt(path), @ptrToInt(argv), @ptrToInt(envp))
+    return arch.syscall3(arch.SYS_execve, @ptrToInt(path), @ptrToInt(argv), @ptrToInt(envp));
 }
 
 pub fn fork() -> usize {
-    arch.syscall0(arch.SYS_fork)
+    return arch.syscall0(arch.SYS_fork);
 }
 
 pub fn getcwd(buf: &u8, size: usize) -> usize {
-    arch.syscall2(arch.SYS_getcwd, @ptrToInt(buf), size)
+    return arch.syscall2(arch.SYS_getcwd, @ptrToInt(buf), size);
 }
 
 pub fn getdents(fd: i32, dirp: &u8, count: usize) -> usize {
-    arch.syscall3(arch.SYS_getdents, usize(fd), @ptrToInt(dirp), count)
+    return arch.syscall3(arch.SYS_getdents, usize(fd), @ptrToInt(dirp), count);
 }
 
 pub fn isatty(fd: i32) -> bool {
@@ -420,123 +420,123 @@ pub fn isatty(fd: i32) -> bool {
 }
 
 pub fn readlink(noalias path: &const u8, noalias buf_ptr: &u8, buf_len: usize) -> usize {
-    arch.syscall3(arch.SYS_readlink, @ptrToInt(path), @ptrToInt(buf_ptr), buf_len)
+    return arch.syscall3(arch.SYS_readlink, @ptrToInt(path), @ptrToInt(buf_ptr), buf_len);
 }
 
 pub fn mkdir(path: &const u8, mode: u32) -> usize {
-    arch.syscall2(arch.SYS_mkdir, @ptrToInt(path), mode)
+    return arch.syscall2(arch.SYS_mkdir, @ptrToInt(path), mode);
 }
 
 pub fn mmap(address: ?&u8, length: usize, prot: usize, flags: usize, fd: i32, offset: isize)
     -> usize
 {
-    arch.syscall6(arch.SYS_mmap, @ptrToInt(address), length, prot, flags, usize(fd),
-        @bitCast(usize, offset))
+    return arch.syscall6(arch.SYS_mmap, @ptrToInt(address), length, prot, flags, usize(fd),
+        @bitCast(usize, offset));
 }
 
 pub fn munmap(address: &u8, length: usize) -> usize {
-    arch.syscall2(arch.SYS_munmap, @ptrToInt(address), length)
+    return arch.syscall2(arch.SYS_munmap, @ptrToInt(address), length);
 }
 
 pub fn read(fd: i32, buf: &u8, count: usize) -> usize {
-    arch.syscall3(arch.SYS_read, usize(fd), @ptrToInt(buf), count)
+    return arch.syscall3(arch.SYS_read, usize(fd), @ptrToInt(buf), count);
 }
 
 pub fn rmdir(path: &const u8) -> usize {
-    arch.syscall1(arch.SYS_rmdir, @ptrToInt(path))
+    return arch.syscall1(arch.SYS_rmdir, @ptrToInt(path));
 }
 
 pub fn symlink(existing: &const u8, new: &const u8) -> usize {
-    arch.syscall2(arch.SYS_symlink, @ptrToInt(existing), @ptrToInt(new))
+    return arch.syscall2(arch.SYS_symlink, @ptrToInt(existing), @ptrToInt(new));
 }
 
 pub fn pread(fd: i32, buf: &u8, count: usize, offset: usize) -> usize {
-    arch.syscall4(arch.SYS_pread, usize(fd), @ptrToInt(buf), count, offset)
+    return arch.syscall4(arch.SYS_pread, usize(fd), @ptrToInt(buf), count, offset);
 }
 
 pub fn pipe(fd: &[2]i32) -> usize {
-    pipe2(fd, 0)
+    return pipe2(fd, 0);
 }
 
 pub fn pipe2(fd: &[2]i32, flags: usize) -> usize {
-    arch.syscall2(arch.SYS_pipe2, @ptrToInt(fd), flags)
+    return arch.syscall2(arch.SYS_pipe2, @ptrToInt(fd), flags);
 }
 
 pub fn write(fd: i32, buf: &const u8, count: usize) -> usize {
-    arch.syscall3(arch.SYS_write, usize(fd), @ptrToInt(buf), count)
+    return arch.syscall3(arch.SYS_write, usize(fd), @ptrToInt(buf), count);
 }
 
 pub fn pwrite(fd: i32, buf: &const u8, count: usize, offset: usize) -> usize {
-    arch.syscall4(arch.SYS_pwrite, usize(fd), @ptrToInt(buf), count, offset)
+    return arch.syscall4(arch.SYS_pwrite, usize(fd), @ptrToInt(buf), count, offset);
 }
 
 pub fn rename(old: &const u8, new: &const u8) -> usize {
-    arch.syscall2(arch.SYS_rename, @ptrToInt(old), @ptrToInt(new))
+    return arch.syscall2(arch.SYS_rename, @ptrToInt(old), @ptrToInt(new));
 }
 
 pub fn open(path: &const u8, flags: u32, perm: usize) -> usize {
-    arch.syscall3(arch.SYS_open, @ptrToInt(path), flags, perm)
+    return arch.syscall3(arch.SYS_open, @ptrToInt(path), flags, perm);
 }
 
 pub fn create(path: &const u8, perm: usize) -> usize {
-    arch.syscall2(arch.SYS_creat, @ptrToInt(path), perm)
+    return arch.syscall2(arch.SYS_creat, @ptrToInt(path), perm);
 }
 
 pub fn openat(dirfd: i32, path: &const u8, flags: usize, mode: usize) -> usize {
-    arch.syscall4(arch.SYS_openat, usize(dirfd), @ptrToInt(path), flags, mode)
+    return arch.syscall4(arch.SYS_openat, usize(dirfd), @ptrToInt(path), flags, mode);
 }
 
 pub fn close(fd: i32) -> usize {
-    arch.syscall1(arch.SYS_close, usize(fd))
+    return arch.syscall1(arch.SYS_close, usize(fd));
 }
 
 pub fn lseek(fd: i32, offset: isize, ref_pos: usize) -> usize {
-    arch.syscall3(arch.SYS_lseek, usize(fd), @bitCast(usize, offset), ref_pos)
+    return arch.syscall3(arch.SYS_lseek, usize(fd), @bitCast(usize, offset), ref_pos);
 }
 
 pub fn exit(status: i32) -> noreturn {
     _ = arch.syscall1(arch.SYS_exit, @bitCast(usize, isize(status)));
-    unreachable
+    unreachable;
 }
 
 pub fn getrandom(buf: &u8, count: usize, flags: u32) -> usize {
-    arch.syscall3(arch.SYS_getrandom, @ptrToInt(buf), count, usize(flags))
+    return arch.syscall3(arch.SYS_getrandom, @ptrToInt(buf), count, usize(flags));
 }
 
 pub fn kill(pid: i32, sig: i32) -> usize {
-    arch.syscall2(arch.SYS_kill, @bitCast(usize, isize(pid)), usize(sig))
+    return arch.syscall2(arch.SYS_kill, @bitCast(usize, isize(pid)), usize(sig));
 }
 
 pub fn unlink(path: &const u8) -> usize {
-    arch.syscall1(arch.SYS_unlink, @ptrToInt(path))
+    return arch.syscall1(arch.SYS_unlink, @ptrToInt(path));
 }
 
 pub fn waitpid(pid: i32, status: &i32, options: i32) -> usize {
-    arch.syscall4(arch.SYS_wait4, @bitCast(usize, isize(pid)), @ptrToInt(status), @bitCast(usize, isize(options)), 0)
+    return arch.syscall4(arch.SYS_wait4, @bitCast(usize, isize(pid)), @ptrToInt(status), @bitCast(usize, isize(options)), 0);
 }
 
 pub fn nanosleep(req: &const timespec, rem: ?&timespec) -> usize {
-    arch.syscall2(arch.SYS_nanosleep, @ptrToInt(req), @ptrToInt(rem))
+    return arch.syscall2(arch.SYS_nanosleep, @ptrToInt(req), @ptrToInt(rem));
 }
 
 pub fn setuid(uid: u32) -> usize {
-    arch.syscall1(arch.SYS_setuid, uid)
+    return arch.syscall1(arch.SYS_setuid, uid);
 }
 
 pub fn setgid(gid: u32) -> usize {
-    arch.syscall1(arch.SYS_setgid, gid)
+    return arch.syscall1(arch.SYS_setgid, gid);
 }
 
 pub fn setreuid(ruid: u32, euid: u32) -> usize {
-    arch.syscall2(arch.SYS_setreuid, ruid, euid)
+    return arch.syscall2(arch.SYS_setreuid, ruid, euid);
 }
 
 pub fn setregid(rgid: u32, egid: u32) -> usize {
-    arch.syscall2(arch.SYS_setregid, rgid, egid)
+    return arch.syscall2(arch.SYS_setregid, rgid, egid);
 }
 
 pub fn sigprocmask(flags: u32, noalias set: &const sigset_t, noalias oldset: ?&sigset_t) -> usize {
-    arch.syscall4(arch.SYS_rt_sigprocmask, flags, @ptrToInt(set), @ptrToInt(oldset), NSIG/8)
+    return arch.syscall4(arch.SYS_rt_sigprocmask, flags, @ptrToInt(set), @ptrToInt(oldset), NSIG/8);
 }
 
 pub fn sigaction(sig: u6, noalias act: &const Sigaction, noalias oact: ?&Sigaction) -> usize {
@@ -651,92 +651,70 @@ pub const iovec = extern struct {
     iov_len: usize,
 };
 
-//
-//const IF_NAMESIZE = 16;
-//
-//export struct ifreq {
-//    ifrn_name: [IF_NAMESIZE]u8,
-//    union {
-//        ifru_addr: sockaddr,
-//        ifru_dstaddr: sockaddr,
-//        ifru_broadaddr: sockaddr,
-//        ifru_netmask: sockaddr,
-//        ifru_hwaddr: sockaddr,
-//        ifru_flags: i16,
-//        ifru_ivalue: i32,
-//        ifru_mtu: i32,
-//        ifru_map: ifmap,
-//        ifru_slave: [IF_NAMESIZE]u8,
-//        ifru_newname: [IF_NAMESIZE]u8,
-//        ifru_data: &u8,
-//    } ifr_ifru;
-//}
-//
-
 pub fn getsockname(fd: i32, noalias addr: &sockaddr, noalias len: &socklen_t) -> usize {
-    arch.syscall3(arch.SYS_getsockname, usize(fd), @ptrToInt(addr), @ptrToInt(len))
+    return arch.syscall3(arch.SYS_getsockname, usize(fd), @ptrToInt(addr), @ptrToInt(len));
 }
 
 pub fn getpeername(fd: i32, noalias addr: &sockaddr, noalias len: &socklen_t) -> usize {
-    arch.syscall3(arch.SYS_getpeername, usize(fd), @ptrToInt(addr), @ptrToInt(len))
+    return arch.syscall3(arch.SYS_getpeername, usize(fd), @ptrToInt(addr), @ptrToInt(len));
 }
 
 pub fn socket(domain: i32, socket_type: i32, protocol: i32) -> usize {
-    arch.syscall3(arch.SYS_socket, usize(domain), usize(socket_type), usize(protocol))
+    return arch.syscall3(arch.SYS_socket, usize(domain), usize(socket_type), usize(protocol));
 }
 
 pub fn setsockopt(fd: i32, level: i32, optname: i32, optval: &const u8, optlen: socklen_t) -> usize {
-    arch.syscall5(arch.SYS_setsockopt, usize(fd), usize(level), usize(optname), usize(optval), @ptrToInt(optlen))
+    return arch.syscall5(arch.SYS_setsockopt, usize(fd), usize(level), usize(optname), usize(optval), @ptrToInt(optlen));
 }
 
 pub fn getsockopt(fd: i32, level: i32, optname: i32, noalias optval: &u8, noalias optlen: &socklen_t) -> usize {
-    arch.syscall5(arch.SYS_getsockopt, usize(fd), usize(level), usize(optname), @ptrToInt(optval), @ptrToInt(optlen))
+    return arch.syscall5(arch.SYS_getsockopt, usize(fd), usize(level), usize(optname), @ptrToInt(optval), @ptrToInt(optlen));
 }
 
 pub fn sendmsg(fd: i32, msg: &const arch.msghdr, flags: u32) -> usize {
-    arch.syscall3(arch.SYS_sendmsg, usize(fd), @ptrToInt(msg), flags)
+    return arch.syscall3(arch.SYS_sendmsg, usize(fd), @ptrToInt(msg), flags);
 }
 
 pub fn connect(fd: i32, addr: &const sockaddr, len: socklen_t) -> usize {
-    arch.syscall3(arch.SYS_connect, usize(fd), @ptrToInt(addr), usize(len))
+    return arch.syscall3(arch.SYS_connect, usize(fd), @ptrToInt(addr), usize(len));
 }
 
 pub fn recvmsg(fd: i32, msg: &arch.msghdr, flags: u32) -> usize {
-    arch.syscall3(arch.SYS_recvmsg, usize(fd), @ptrToInt(msg), flags)
+    return arch.syscall3(arch.SYS_recvmsg, usize(fd), @ptrToInt(msg), flags);
 }
 
 pub fn recvfrom(fd: i32, noalias buf: &u8, len: usize, flags: u32,
     noalias addr: ?&sockaddr, noalias alen: ?&socklen_t) -> usize
 {
-    arch.syscall6(arch.SYS_recvfrom, usize(fd), @ptrToInt(buf), len, flags, @ptrToInt(addr), @ptrToInt(alen))
+    return arch.syscall6(arch.SYS_recvfrom, usize(fd), @ptrToInt(buf), len, flags, @ptrToInt(addr), @ptrToInt(alen));
 }
 
 pub fn shutdown(fd: i32, how: i32) -> usize {
-    arch.syscall2(arch.SYS_shutdown, usize(fd), usize(how))
+    return arch.syscall2(arch.SYS_shutdown, usize(fd), usize(how));
 }
 
 pub fn bind(fd: i32, addr: &const sockaddr, len: socklen_t) -> usize {
-    arch.syscall3(arch.SYS_bind, usize(fd), @ptrToInt(addr), usize(len))
+    return arch.syscall3(arch.SYS_bind, usize(fd), @ptrToInt(addr), usize(len));
 }
 
 pub fn listen(fd: i32, backlog: i32) -> usize {
-    arch.syscall2(arch.SYS_listen, usize(fd), usize(backlog))
+    return arch.syscall2(arch.SYS_listen, usize(fd), usize(backlog));
 }
 
 pub fn sendto(fd: i32, buf: &const u8, len: usize, flags: u32, addr: ?&const sockaddr, alen: socklen_t) -> usize {
-    arch.syscall6(arch.SYS_sendto, usize(fd), @ptrToInt(buf), len, flags, @ptrToInt(addr), usize(alen))
+    return arch.syscall6(arch.SYS_sendto, usize(fd), @ptrToInt(buf), len, flags, @ptrToInt(addr), usize(alen));
 }
 
 pub fn socketpair(domain: i32, socket_type: i32, protocol: i32, fd: [2]i32) -> usize {
-    arch.syscall4(arch.SYS_socketpair, usize(domain), usize(socket_type), usize(protocol), @ptrToInt(&fd[0]))
+    return arch.syscall4(arch.SYS_socketpair, usize(domain), usize(socket_type), usize(protocol), @ptrToInt(&fd[0]));
 }
 
 pub fn accept(fd: i32, noalias addr: &sockaddr, noalias len: &socklen_t) -> usize {
-    accept4(fd, addr, len, 0)
+    return accept4(fd, addr, len, 0);
 }
 
 pub fn accept4(fd: i32, noalias addr: &sockaddr, noalias len: &socklen_t, flags: u32) -> usize {
-    arch.syscall4(arch.SYS_accept4, usize(fd), @ptrToInt(addr), @ptrToInt(len), flags)
+    return arch.syscall4(arch.SYS_accept4, usize(fd), @ptrToInt(addr), @ptrToInt(len), flags);
 }
 
 // error NameTooLong;
@@ -771,7 +749,7 @@ pub const Stat = arch.Stat;
 pub const timespec = arch.timespec;
 
 pub fn fstat(fd: i32, stat_buf: &Stat) -> usize {
-    arch.syscall2(arch.SYS_fstat, usize(fd), @ptrToInt(stat_buf))
+    return arch.syscall2(arch.SYS_fstat, usize(fd), @ptrToInt(stat_buf));
 }
 
 pub const epoll_data = u64;
@@ -782,19 +760,19 @@ pub const epoll_event = extern struct {
 };
 
 pub fn epoll_create() -> usize {
-    arch.syscall1(arch.SYS_epoll_create, usize(1))
+    return arch.syscall1(arch.SYS_epoll_create, usize(1));
 }
 
 pub fn epoll_ctl(epoll_fd: i32, op: i32, fd: i32, ev: &epoll_event) -> usize {
-    arch.syscall4(arch.SYS_epoll_ctl, usize(epoll_fd), usize(op), usize(fd), @ptrToInt(ev))
+    return arch.syscall4(arch.SYS_epoll_ctl, usize(epoll_fd), usize(op), usize(fd), @ptrToInt(ev));
 }
 
 pub fn epoll_wait(epoll_fd: i32, events: &epoll_event, maxevents: i32, timeout: i32) -> usize {
-    arch.syscall4(arch.SYS_epoll_wait, usize(epoll_fd), @ptrToInt(events), usize(maxevents), usize(timeout))
+    return arch.syscall4(arch.SYS_epoll_wait, usize(epoll_fd), @ptrToInt(events), usize(maxevents), usize(timeout));
 }
 
 pub fn timerfd_create(clockid: i32, flags: u32) -> usize {
-    arch.syscall2(arch.SYS_timerfd_create, usize(clockid), usize(flags))
+    return arch.syscall2(arch.SYS_timerfd_create, usize(clockid), usize(flags));
 }
 
 pub const itimerspec = extern struct {
@@ -803,11 +781,11 @@ pub const itimerspec = extern struct {
 };
 
 pub fn timerfd_gettime(fd: i32, curr_value: &itimerspec) -> usize {
-    arch.syscall2(arch.SYS_timerfd_gettime, usize(fd), @ptrToInt(curr_value))
+    return arch.syscall2(arch.SYS_timerfd_gettime, usize(fd), @ptrToInt(curr_value));
 }
 
 pub fn timerfd_settime(fd: i32, flags: u32, new_value: &const itimerspec, old_value: ?&itimerspec) -> usize {
-    arch.syscall4(arch.SYS_timerfd_settime, usize(fd), usize(flags), @ptrToInt(new_value), @ptrToInt(old_value))
+    return arch.syscall4(arch.SYS_timerfd_settime, usize(fd), usize(flags), @ptrToInt(new_value), @ptrToInt(old_value));
 }
 
 test "import linux_test" {
diff --git a/std/os/linux_i386.zig b/std/os/linux_i386.zig
index 215670e3a9..ed49e33c2b 100644
--- a/std/os/linux_i386.zig
+++ b/std/os/linux_i386.zig
@@ -502,13 +502,3 @@ pub nakedcc fn restore_rt() {
         : [number] "{eax}" (usize(SYS_rt_sigreturn))
         : "rcx", "r11")
 }
-
-export struct msghdr {
-    msg_name: &u8,
-    msg_namelen: socklen_t,
-    msg_iov: &iovec,
-    msg_iovlen: i32,
-    msg_control: &u8,
-    msg_controllen: socklen_t,
-    msg_flags: i32,
-}
diff --git a/std/os/linux_x86_64.zig b/std/os/linux_x86_64.zig
index 6c94528df0..db78decde2 100644
--- a/std/os/linux_x86_64.zig
+++ b/std/os/linux_x86_64.zig
@@ -371,52 +371,52 @@ pub const F_GETOWN_EX = 16;
 pub const F_GETOWNER_UIDS = 17;
 
 pub fn syscall0(number: usize) -> usize {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         : [ret] "={rax}" (-> usize)
         : [number] "{rax}" (number)
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 pub fn syscall1(number: usize, arg1: usize) -> usize {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         : [ret] "={rax}" (-> usize)
         : [number] "{rax}" (number),
             [arg1] "{rdi}" (arg1)
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 pub fn syscall2(number: usize, arg1: usize, arg2: usize) -> usize {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         : [ret] "={rax}" (-> usize)
         : [number] "{rax}" (number),
             [arg1] "{rdi}" (arg1),
             [arg2] "{rsi}" (arg2)
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) -> usize {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         : [ret] "={rax}" (-> usize)
         : [number] "{rax}" (number),
             [arg1] "{rdi}" (arg1),
             [arg2] "{rsi}" (arg2),
             [arg3] "{rdx}" (arg3)
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 pub fn syscall4(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize) -> usize {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         : [ret] "={rax}" (-> usize)
         : [number] "{rax}" (number),
             [arg1] "{rdi}" (arg1),
             [arg2] "{rsi}" (arg2),
             [arg3] "{rdx}" (arg3),
             [arg4] "{r10}" (arg4)
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 pub fn syscall5(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) -> usize {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         : [ret] "={rax}" (-> usize)
         : [number] "{rax}" (number),
             [arg1] "{rdi}" (arg1),
@@ -424,13 +424,13 @@ pub fn syscall5(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usiz
             [arg3] "{rdx}" (arg3),
             [arg4] "{r10}" (arg4),
             [arg5] "{r8}" (arg5)
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 pub fn syscall6(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize,
     arg5: usize, arg6: usize) -> usize
 {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         : [ret] "={rax}" (-> usize)
         : [number] "{rax}" (number),
             [arg1] "{rdi}" (arg1),
@@ -439,14 +439,14 @@ pub fn syscall6(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usiz
             [arg4] "{r10}" (arg4),
             [arg5] "{r8}" (arg5),
             [arg6] "{r9}" (arg6)
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 pub nakedcc fn restore_rt() {
-    asm volatile ("syscall"
+    return asm volatile ("syscall"
         :
         : [number] "{rax}" (usize(SYS_rt_sigreturn))
-        : "rcx", "r11")
+        : "rcx", "r11");
 }
 
 
diff --git a/std/os/path.zig b/std/os/path.zig
index 3fd7b5e2db..59e9a53027 100644
--- a/std/os/path.zig
+++ b/std/os/path.zig
@@ -749,21 +749,19 @@ pub fn relativeWindows(allocator: &Allocator, from: []const u8, to: []const u8)
     const resolved_to = %return resolveWindows(allocator, [][]const u8{to});
     defer if (clean_up_resolved_to) allocator.free(resolved_to);
 
-    const result_is_to = if (drive(resolved_to)) |to_drive| {
-        if (drive(resolved_from)) |from_drive| {
+    const result_is_to = if (drive(resolved_to)) |to_drive|
+        if (drive(resolved_from)) |from_drive|
             asciiUpper(from_drive[0]) != asciiUpper(to_drive[0])
-        } else {
+        else
             true
-        }
-    } else if (networkShare(resolved_to)) |to_ns| {
-        if (networkShare(resolved_from)) |from_ns| {
+    else if (networkShare(resolved_to)) |to_ns|
+        if (networkShare(resolved_from)) |from_ns|
             !networkShareServersEql(to_ns, from_ns)
-        } else {
+        else
             true
-        }
-    } else {
-        unreachable
-    };
+    else
+        unreachable;
+
     if (result_is_to) {
         clean_up_resolved_to = false;
         return resolved_to;
@@ -964,14 +962,16 @@ pub fn real(allocator: &Allocator, pathname: []const u8) -> %[]u8 {
 
                 // windows returns \\?\ prepended to the path
                 // we strip it because nobody wants \\?\ prepended to their path
-                const final_len = if (result > 4 and mem.startsWith(u8, buf, "\\\\?\\")) {
-                    var i: usize = 4;
-                    while (i < result) : (i += 1) {
-                        buf[i - 4] = buf[i];
+                const final_len = x: {
+                    if (result > 4 and mem.startsWith(u8, buf, "\\\\?\\")) {
+                        var i: usize = 4;
+                        while (i < result) : (i += 1) {
+                            buf[i - 4] = buf[i];
+                        }
+                        break :x result - 4;
+                    } else {
+                        break :x result;
                     }
-                    result - 4
-                } else {
-                    result
                 };
 
                 return allocator.shrink(u8, buf, final_len);
@@ -1012,7 +1012,7 @@ pub fn real(allocator: &Allocator, pathname: []const u8) -> %[]u8 {
             defer os.close(fd);
 
             var buf: ["/proc/self/fd/-2147483648".len]u8 = undefined;
-            const proc_path = fmt.bufPrint(buf[0..], "/proc/self/fd/{}", fd);
+            const proc_path = %%fmt.bufPrint(buf[0..], "/proc/self/fd/{}", fd);
 
             return os.readLink(allocator, proc_path);
         },
diff --git a/std/os/windows/util.zig b/std/os/windows/util.zig
index b3fc095d43..5b2dc1efb8 100644
--- a/std/os/windows/util.zig
+++ b/std/os/windows/util.zig
@@ -16,11 +16,11 @@ pub fn windowsWaitSingle(handle: windows.HANDLE, milliseconds: windows.DWORD) ->
         windows.WAIT_ABANDONED => error.WaitAbandoned,
         windows.WAIT_OBJECT_0 => {},
         windows.WAIT_TIMEOUT => error.WaitTimeOut,
-        windows.WAIT_FAILED => {
+        windows.WAIT_FAILED => x: {
             const err = windows.GetLastError();
-            switch (err) {
+            break :x switch (err) {
                 else => os.unexpectedErrorWindows(err),
-            }
+            };
         },
         else => error.Unexpected,
     };
@@ -122,7 +122,7 @@ pub fn windowsOpen(file_path: []const u8, desired_access: windows.DWORD, share_m
 /// Caller must free result.
 pub fn createWindowsEnvBlock(allocator: &mem.Allocator, env_map: &const BufMap) -> %[]u8 {
     // count bytes needed
-    const bytes_needed = {
+    const bytes_needed = x: {
         var bytes_needed: usize = 1; // 1 for the final null byte
         var it = env_map.iterator();
         while (it.next()) |pair| {
@@ -130,7 +130,7 @@ pub fn createWindowsEnvBlock(allocator: &mem.Allocator, env_map: &const BufMap)
             // +1 for null byte
             bytes_needed += pair.key.len + pair.value.len + 2;
         }
-        bytes_needed
+        break :x bytes_needed;
     };
     const result = %return allocator.alloc(u8, bytes_needed);
     %defer allocator.free(result);
diff --git a/std/rand.zig b/std/rand.zig
index 09e0c8ac78..73801a078f 100644
--- a/std/rand.zig
+++ b/std/rand.zig
@@ -28,9 +28,9 @@ pub const Rand = struct {
 
     /// Initialize random state with the given seed.
     pub fn init(seed: usize) -> Rand {
-        Rand {
+        return Rand {
             .rng = Rng.init(seed),
-        }
+        };
     }
 
     /// Get an integer or boolean with random bits.
@@ -78,13 +78,13 @@ pub const Rand = struct {
                 const end_uint = uint(end);
                 const total_range = math.absCast(start) + end_uint;
                 const value = r.range(uint, 0, total_range);
-                const result = if (value < end_uint) {
-                    T(value)
-                } else if (value == end_uint) {
-                    start
-                } else {
+                const result = if (value < end_uint) x: {
+                    break :x T(value);
+                } else if (value == end_uint) x: {
+                    break :x start;
+                } else x: {
                     // Can't overflow because the range is over signed ints
-                    %%math.negateCast(value - end_uint)
+                    break :x %%math.negateCast(value - end_uint);
                 };
                 return result;
             } else {
@@ -114,13 +114,13 @@ pub const Rand = struct {
         // const rand_bits = r.rng.scalar(int) & mask;
         // return @float_compose(T, false, 0, rand_bits) - 1.0
         const int_type = @IntType(false, @sizeOf(T) * 8);
-        const precision = if (T == f32) {
+        const precision = if (T == f32)
             16777216
-        } else if (T == f64) {
+        else if (T == f64)
             9007199254740992
-        } else {
+        else
             @compileError("unknown floating point type")
-        };
+        ;
         return T(r.range(int_type, 0, precision)) / T(precision);
     }
 };
@@ -133,7 +133,7 @@ fn MersenneTwister(
     comptime t: math.Log2Int(int), comptime c: int,
     comptime l: math.Log2Int(int), comptime f: int) -> type
 {
-    struct {
+    return struct {
         const Self = this;
 
         array: [n]int,
@@ -189,7 +189,7 @@ fn MersenneTwister(
 
             return x;
         }
-    }
+    };
 }
 
 test "rand float 32" {
diff --git a/std/sort.zig b/std/sort.zig
index d02d685e07..a36a5e1747 100644
--- a/std/sort.zig
+++ b/std/sort.zig
@@ -1,75 +1,966 @@
-const assert = @import("debug.zig").assert;
-const mem = @import("mem.zig");
-const math = @import("math/index.zig");
+const std = @import("index.zig");
+const assert = std.debug.assert;
+const mem = std.mem;
+const math = std.math;
+const builtin = @import("builtin");
 
-pub const Cmp = math.Cmp;
-
-/// Stable sort using O(1) space. Currently implemented as insertion sort.
-pub fn sort_stable(comptime T: type, array: []T, comptime cmp: fn(a: &const T, b: &const T)->Cmp) {
-    {var i: usize = 1; while (i < array.len) : (i += 1) {
-        const x = array[i];
+/// Stable in-place sort. O(n) best case, O(pow(n, 2)) worst case. O(1) memory (no allocator required).
+pub fn insertionSort(comptime T: type, items: []T, lessThan: fn(lhs: &const T, rhs: &const T)->bool) {
+    {var i: usize = 1; while (i < items.len) : (i += 1) {
+        const x = items[i];
         var j: usize = i;
-        while (j > 0 and cmp(array[j - 1], x) == Cmp.Greater) : (j -= 1) {
-            array[j] = array[j - 1];
+        while (j > 0 and lessThan(x, items[j - 1])) : (j -= 1) {
+            items[j] = items[j - 1];
         }
-        array[j] = x;
+        items[j] = x;
     }}
 }
 
-/// Unstable sort using O(n) stack space. Currently implemented as quicksort.
-pub fn sort(comptime T: type, array: []T, comptime cmp: fn(a: &const T, b: &const T)->Cmp) {
-    if (array.len > 0) {
-        quicksort(T, array, 0, array.len - 1, cmp);
+const Range = struct {
+    start: usize,
+    end: usize,
+
+    fn init(start: usize, end: usize) -> Range {
+        return Range { .start = start, .end = end };
+    }
+
+    fn length(self: &const Range) -> usize {
+        return self.end - self.start;
+    }
+};
+
+
+const Iterator = struct {
+    size: usize,
+    power_of_two: usize,
+    numerator: usize,
+    decimal: usize,
+    denominator: usize,
+    decimal_step: usize,
+    numerator_step: usize,
+
+    fn init(size2: usize, min_level: usize) -> Iterator {
+        const power_of_two = math.floorPowerOfTwo(usize, size2);
+        const denominator = power_of_two / min_level;
+        return Iterator {
+            .numerator = 0,
+            .decimal = 0,
+            .size = size2,
+            .power_of_two = power_of_two,
+            .denominator = denominator,
+            .decimal_step = size2 / denominator,
+            .numerator_step = size2 % denominator,
+        };
+    }
+
+    fn begin(self: &Iterator) {
+        self.numerator = 0;
+        self.decimal = 0;
+    }
+
+    fn nextRange(self: &Iterator) -> Range {
+        const start = self.decimal;
+
+        self.decimal += self.decimal_step;
+        self.numerator += self.numerator_step;
+        if (self.numerator >= self.denominator) {
+            self.numerator -= self.denominator;
+            self.decimal += 1;
+        }
+        
+        return Range {.start = start, .end = self.decimal};
+    }
+
+    fn finished(self: &Iterator) -> bool {
+        return self.decimal >= self.size;
+    }
+
+    fn nextLevel(self: &Iterator) -> bool {
+        self.decimal_step += self.decimal_step;
+        self.numerator_step += self.numerator_step;
+        if (self.numerator_step >= self.denominator) {
+            self.numerator_step -= self.denominator;
+            self.decimal_step += 1;
+        }
+        
+        return (self.decimal_step < self.size);
+    }
+
+    fn length(self: &Iterator) -> usize {
+        return self.decimal_step;
+    }
+};
+
+const Pull = struct {
+    from: usize,
+    to: usize,
+    count: usize,
+    range: Range,
+};
+
+/// Stable in-place sort. O(n) best case, O(n*log(n)) worst case and average case. O(1) memory (no allocator required).
+/// Currently implemented as block sort.
+pub fn sort(comptime T: type, items: []T, lessThan: fn(lhs: &const T, rhs: &const T)->bool) {
+    // Implementation ported from https://github.com/BonzaiThePenguin/WikiSort/blob/master/WikiSort.c
+    var cache: [512]T = undefined;
+
+    if (items.len < 4) {
+        if (items.len == 3) {
+            // hard coded insertion sort
+            if (lessThan(items[1], items[0])) mem.swap(T, &items[0], &items[1]);
+            if (lessThan(items[2], items[1])) {
+                mem.swap(T, &items[1], &items[2]);
+                if (lessThan(items[1], items[0])) mem.swap(T, &items[0], &items[1]);
+            }
+        } else if (items.len == 2) {
+            if (lessThan(items[1], items[0])) mem.swap(T, &items[0], &items[1]);
+        }
+        return;
+    }
+
+    // sort groups of 4-8 items at a time using an unstable sorting network,
+    // but keep track of the original item orders to force it to be stable
+    // http://pages.ripco.net/~jgamble/nw.html
+    var iterator = Iterator.init(items.len, 4);
+    while (!iterator.finished()) {
+        var order = []u8{0, 1, 2, 3, 4, 5, 6, 7};
+        const range = iterator.nextRange();
+
+        const sliced_items = items[range.start..];
+        switch (range.length()) {
+            8 => {
+                swap(T, sliced_items, lessThan, &order, 0, 1);
+                swap(T, sliced_items, lessThan, &order, 2, 3);
+                swap(T, sliced_items, lessThan, &order, 4, 5);
+                swap(T, sliced_items, lessThan, &order, 6, 7);
+                swap(T, sliced_items, lessThan, &order, 0, 2);
+                swap(T, sliced_items, lessThan, &order, 1, 3);
+                swap(T, sliced_items, lessThan, &order, 4, 6);
+                swap(T, sliced_items, lessThan, &order, 5, 7);
+                swap(T, sliced_items, lessThan, &order, 1, 2);
+                swap(T, sliced_items, lessThan, &order, 5, 6);
+                swap(T, sliced_items, lessThan, &order, 0, 4);
+                swap(T, sliced_items, lessThan, &order, 3, 7);
+                swap(T, sliced_items, lessThan, &order, 1, 5);
+                swap(T, sliced_items, lessThan, &order, 2, 6);
+                swap(T, sliced_items, lessThan, &order, 1, 4);
+                swap(T, sliced_items, lessThan, &order, 3, 6);
+                swap(T, sliced_items, lessThan, &order, 2, 4);
+                swap(T, sliced_items, lessThan, &order, 3, 5);
+                swap(T, sliced_items, lessThan, &order, 3, 4);
+            },
+              7 => {
+                  swap(T, sliced_items, lessThan, &order, 1, 2);
+                  swap(T, sliced_items, lessThan, &order, 3, 4);
+                  swap(T, sliced_items, lessThan, &order, 5, 6);
+                  swap(T, sliced_items, lessThan, &order, 0, 2);
+                  swap(T, sliced_items, lessThan, &order, 3, 5);
+                  swap(T, sliced_items, lessThan, &order, 4, 6);
+                  swap(T, sliced_items, lessThan, &order, 0, 1);
+                  swap(T, sliced_items, lessThan, &order, 4, 5);
+                  swap(T, sliced_items, lessThan, &order, 2, 6);
+                  swap(T, sliced_items, lessThan, &order, 0, 4);
+                  swap(T, sliced_items, lessThan, &order, 1, 5);
+                  swap(T, sliced_items, lessThan, &order, 0, 3);
+                  swap(T, sliced_items, lessThan, &order, 2, 5);
+                  swap(T, sliced_items, lessThan, &order, 1, 3);
+                  swap(T, sliced_items, lessThan, &order, 2, 4);
+                  swap(T, sliced_items, lessThan, &order, 2, 3);
+              },
+              6 => {
+                  swap(T, sliced_items, lessThan, &order, 1, 2);
+                  swap(T, sliced_items, lessThan, &order, 4, 5);
+                  swap(T, sliced_items, lessThan, &order, 0, 2);
+                  swap(T, sliced_items, lessThan, &order, 3, 5);
+                  swap(T, sliced_items, lessThan, &order, 0, 1);
+                  swap(T, sliced_items, lessThan, &order, 3, 4);
+                  swap(T, sliced_items, lessThan, &order, 2, 5);
+                  swap(T, sliced_items, lessThan, &order, 0, 3);
+                  swap(T, sliced_items, lessThan, &order, 1, 4);
+                  swap(T, sliced_items, lessThan, &order, 2, 4);
+                  swap(T, sliced_items, lessThan, &order, 1, 3);
+                  swap(T, sliced_items, lessThan, &order, 2, 3);
+              },
+              5 => {
+                  swap(T, sliced_items, lessThan, &order, 0, 1);
+                  swap(T, sliced_items, lessThan, &order, 3, 4);
+                  swap(T, sliced_items, lessThan, &order, 2, 4);
+                  swap(T, sliced_items, lessThan, &order, 2, 3);
+                  swap(T, sliced_items, lessThan, &order, 1, 4);
+                  swap(T, sliced_items, lessThan, &order, 0, 3);
+                  swap(T, sliced_items, lessThan, &order, 0, 2);
+                  swap(T, sliced_items, lessThan, &order, 1, 3);
+                  swap(T, sliced_items, lessThan, &order, 1, 2);
+              },
+              4 => {
+                  swap(T, sliced_items, lessThan, &order, 0, 1);
+                  swap(T, sliced_items, lessThan, &order, 2, 3);
+                  swap(T, sliced_items, lessThan, &order, 0, 2);
+                  swap(T, sliced_items, lessThan, &order, 1, 3);
+                  swap(T, sliced_items, lessThan, &order, 1, 2);
+              },
+            else => {},
+        }
+    }
+    if (items.len < 8) return;
+
+    // then merge sort the higher levels, which can be 8-15, 16-31, 32-63, 64-127, etc.
+    while (true) {
+        // if every A and B block will fit into the cache, use a special branch specifically for merging with the cache
+        // (we use < rather than <= since the block size might be one more than iterator.length())
+        if (iterator.length() < cache.len) {
+            // if four subarrays fit into the cache, it's faster to merge both pairs of subarrays into the cache,
+            // then merge the two merged subarrays from the cache back into the original array
+            if ((iterator.length() + 1) * 4 <= cache.len and iterator.length() * 4 <= items.len) {
+                iterator.begin();
+                while (!iterator.finished()) {
+                    // merge A1 and B1 into the cache
+                    var A1 = iterator.nextRange();
+                    var B1 = iterator.nextRange();
+                    var A2 = iterator.nextRange();
+                    var B2 = iterator.nextRange();
+                    
+                    if (lessThan(items[B1.end - 1], items[A1.start])) {
+                        // the two ranges are in reverse order, so copy them in reverse order into the cache
+                        mem.copy(T, cache[B1.length()..], items[A1.start..A1.end]);
+                        mem.copy(T, cache[0..], items[B1.start..B1.end]);
+                    } else if (lessThan(items[B1.start], items[A1.end - 1])) {
+                        // these two ranges weren't already in order, so merge them into the cache
+                        mergeInto(T, items, A1, B1, lessThan, cache[0..]);
+                    } else {
+                        // if A1, B1, A2, and B2 are all in order, skip doing anything else
+                        if (!lessThan(items[B2.start], items[A2.end - 1]) and !lessThan(items[A2.start], items[B1.end - 1])) continue;
+                        
+                        // copy A1 and B1 into the cache in the same order
+                        mem.copy(T, cache[0..], items[A1.start..A1.end]);
+                        mem.copy(T, cache[A1.length()..], items[B1.start..B1.end]);
+                    }
+                    A1 = Range.init(A1.start, B1.end);
+                    
+                    // merge A2 and B2 into the cache
+                    if (lessThan(items[B2.end - 1], items[A2.start])) {
+                        // the two ranges are in reverse order, so copy them in reverse order into the cache
+                        mem.copy(T, cache[A1.length() + B2.length()..], items[A2.start..A2.end]);
+                        mem.copy(T, cache[A1.length()..], items[B2.start..B2.end]);
+                    } else if (lessThan(items[B2.start], items[A2.end - 1])) {
+                        // these two ranges weren't already in order, so merge them into the cache
+                        mergeInto(T, items, A2, B2, lessThan, cache[A1.length()..]);
+                    } else {
+                        // copy A2 and B2 into the cache in the same order
+                        mem.copy(T, cache[A1.length()..], items[A2.start..A2.end]);
+                        mem.copy(T, cache[A1.length() + A2.length()..], items[B2.start..B2.end]);
+                    }
+                    A2 = Range.init(A2.start, B2.end);
+                    
+                    // merge A1 and A2 from the cache into the items
+                    const A3 = Range.init(0, A1.length());
+                    const B3 = Range.init(A1.length(), A1.length() + A2.length());
+                    
+                    if (lessThan(cache[B3.end - 1], cache[A3.start])) {
+                        // the two ranges are in reverse order, so copy them in reverse order into the items
+                        mem.copy(T, items[A1.start + A2.length()..], cache[A3.start..A3.end]);
+                        mem.copy(T, items[A1.start..], cache[B3.start..B3.end]);
+                    } else if (lessThan(cache[B3.start], cache[A3.end - 1])) {
+                        // these two ranges weren't already in order, so merge them back into the items
+                        mergeInto(T, cache[0..], A3, B3, lessThan, items[A1.start..]);
+                    } else {
+                        // copy A3 and B3 into the items in the same order
+                        mem.copy(T, items[A1.start..], cache[A3.start..A3.end]);
+                        mem.copy(T, items[A1.start + A1.length()..], cache[B3.start..B3.end]);
+                    }
+                }
+                
+                // we merged two levels at the same time, so we're done with this level already
+                // (iterator.nextLevel() is called again at the bottom of this outer merge loop)
+                _ = iterator.nextLevel();
+                
+            } else {
+                iterator.begin();
+                while (!iterator.finished()) {
+                    var A = iterator.nextRange();
+                    var B = iterator.nextRange();
+                    
+                    if (lessThan(items[B.end - 1], items[A.start])) {
+                        // the two ranges are in reverse order, so a simple rotation should fix it
+                        mem.rotate(T, items[A.start..B.end], A.length());
+                    } else if (lessThan(items[B.start], items[A.end - 1])) {
+                        // these two ranges weren't already in order, so we'll need to merge them!
+                        mem.copy(T, cache[0..], items[A.start..A.end]);
+                        mergeExternal(T, items, A, B, lessThan, cache[0..]);
+                    }
+                }
+            }
+        } else {
+            // this is where the in-place merge logic starts!
+            // 1. pull out two internal buffers each containing √A unique values
+            //    1a. adjust block_size and buffer_size if we couldn't find enough unique values
+            // 2. loop over the A and B subarrays within this level of the merge sort
+            // 3. break A and B into blocks of size 'block_size'
+            // 4. "tag" each of the A blocks with values from the first internal buffer
+            // 5. roll the A blocks through the B blocks and drop/rotate them where they belong
+            // 6. merge each A block with any B values that follow, using the cache or the second internal buffer
+            // 7. sort the second internal buffer if it exists
+            // 8. redistribute the two internal buffers back into the items
+            
+            var block_size: usize = math.sqrt(iterator.length());
+            var buffer_size = iterator.length()/block_size + 1;
+            
+            // as an optimization, we really only need to pull out the internal buffers once for each level of merges
+            // after that we can reuse the same buffers over and over, then redistribute it when we're finished with this level
+            var A: Range = undefined;
+            var B: Range = undefined;
+            var index: usize = 0;
+            var last: usize = 0;
+            var count: usize = 0;
+            var find: usize = 0;
+            var start: usize = 0;
+            var pull_index: usize = 0;
+            var pull = []Pull{
+                Pull {.from = 0, .to = 0, .count = 0, .range = Range.init(0, 0),},
+                Pull {.from = 0, .to = 0, .count = 0, .range = Range.init(0, 0),},
+            };
+
+            var buffer1 = Range.init(0, 0);
+            var buffer2 = Range.init(0, 0);
+            
+            // find two internal buffers of size 'buffer_size' each
+            find = buffer_size + buffer_size;
+            var find_separately = false;
+            
+            if (block_size <= cache.len) {
+                // if every A block fits into the cache then we won't need the second internal buffer,
+                // so we really only need to find 'buffer_size' unique values
+                find = buffer_size;
+            } else if (find > iterator.length()) {
+                // we can't fit both buffers into the same A or B subarray, so find two buffers separately
+                find = buffer_size;
+                find_separately = true;
+            }
+            
+            // we need to find either a single contiguous space containing 2√A unique values (which will be split up into two buffers of size √A each),
+            // or we need to find one buffer of < 2√A unique values, and a second buffer of √A unique values,
+            // OR if we couldn't find that many unique values, we need the largest possible buffer we can get
+            
+            // in the case where it couldn't find a single buffer of at least √A unique values,
+            // all of the Merge steps must be replaced by a different merge algorithm (MergeInPlace)
+            iterator.begin();
+            while (!iterator.finished()) {
+                A = iterator.nextRange();
+                B = iterator.nextRange();
+                
+                // just store information about where the values will be pulled from and to,
+                // as well as how many values there are, to create the two internal buffers
+                
+                // check A for the number of unique values we need to fill an internal buffer
+                // these values will be pulled out to the start of A
+                last = A.start;
+                count = 1;
+                while (count < find) : ({last = index; count += 1;}) {
+                    index = findLastForward(T, items, items[last], Range.init(last + 1, A.end), lessThan, find - count);
+                    if (index == A.end) break;
+                }
+                index = last;
+                
+                if (count >= buffer_size) {
+                    // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffer
+                    pull[pull_index] = Pull {
+                        .range = Range.init(A.start, B.end),
+                        .count = count,
+                        .from = index,
+                        .to = A.start,
+                    };
+                    pull_index = 1;
+                    
+                    if (count == buffer_size + buffer_size) {
+                        // we were able to find a single contiguous section containing 2√A unique values,
+                        // so this section can be used to contain both of the internal buffers we'll need
+                        buffer1 = Range.init(A.start, A.start + buffer_size);
+                        buffer2 = Range.init(A.start + buffer_size, A.start + count);
+                        break;
+                    } else if (find == buffer_size + buffer_size) {
+                        // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values,
+                        // so we still need to find a second separate buffer of at least √A unique values
+                        buffer1 = Range.init(A.start, A.start + count);
+                        find = buffer_size;
+                    } else if (block_size <= cache.len) {
+                        // we found the first and only internal buffer that we need, so we're done!
+                        buffer1 = Range.init(A.start, A.start + count);
+                        break;
+                    } else if (find_separately) {
+                        // found one buffer, but now find the other one
+                        buffer1 = Range.init(A.start, A.start + count);
+                        find_separately = false;
+                    } else {
+                        // we found a second buffer in an 'A' subarray containing √A unique values, so we're done!
+                        buffer2 = Range.init(A.start, A.start + count);
+                        break;
+                    }
+                } else if (pull_index == 0 and count > buffer1.length()) {
+                    // keep track of the largest buffer we were able to find
+                    buffer1 = Range.init(A.start, A.start + count);
+                    pull[pull_index] = Pull {
+                        .range = Range.init(A.start, B.end),
+                        .count = count,
+                        .from = index,
+                        .to = A.start,
+                    };
+                }
+                
+                // check B for the number of unique values we need to fill an internal buffer
+                // these values will be pulled out to the end of B
+                last = B.end - 1;
+                count = 1;
+                while (count < find) : ({last = index - 1; count += 1;}) {
+                    index = findFirstBackward(T, items, items[last], Range.init(B.start, last), lessThan, find - count);
+                    if (index == B.start) break;
+                }
+                index = last;
+                
+                if (count >= buffer_size) {
+                    // keep track of the range within the items where we'll need to "pull out" these values to create the internal buffe
+                    pull[pull_index] = Pull {
+                        .range = Range.init(A.start, B.end),
+                        .count = count,
+                        .from = index,
+                        .to = B.end,
+                    };
+                    pull_index = 1;
+                    
+                    if (count == buffer_size + buffer_size) {
+                        // we were able to find a single contiguous section containing 2√A unique values,
+                        // so this section can be used to contain both of the internal buffers we'll need
+                        buffer1 = Range.init(B.end - count, B.end - buffer_size);
+                        buffer2 = Range.init(B.end - buffer_size, B.end);
+                        break;
+                    } else if (find == buffer_size + buffer_size) {
+                        // we found a buffer that contains at least √A unique values, but did not contain the full 2√A unique values,
+                        // so we still need to find a second separate buffer of at least √A unique values
+                        buffer1 = Range.init(B.end - count, B.end);
+                        find = buffer_size;
+                    } else if (block_size <= cache.len) {
+                        // we found the first and only internal buffer that we need, so we're done!
+                        buffer1 = Range.init(B.end - count, B.end);
+                        break;
+                    } else if (find_separately) {
+                        // found one buffer, but now find the other one
+                        buffer1 = Range.init(B.end - count, B.end);
+                        find_separately = false;
+                    } else {
+                        // buffer2 will be pulled out from a 'B' subarray, so if the first buffer was pulled out from the corresponding 'A' subarray,
+                        // we need to adjust the end point for that A subarray so it knows to stop redistributing its values before reaching buffer2
+                        if (pull[0].range.start == A.start) pull[0].range.end -= pull[1].count;
+                        
+                        // we found a second buffer in an 'B' subarray containing √A unique values, so we're done!
+                        buffer2 = Range.init(B.end - count, B.end);
+                        break;
+                    }
+                } else if (pull_index == 0 and count > buffer1.length()) {
+                    // keep track of the largest buffer we were able to find
+                    buffer1 = Range.init(B.end - count, B.end);
+                    pull[pull_index] = Pull {
+                        .range = Range.init(A.start, B.end),
+                        .count = count,
+                        .from = index,
+                        .to = B.end,
+                    };
+                }
+            }
+            
+            // pull out the two ranges so we can use them as internal buffers
+            pull_index = 0;
+            while (pull_index < 2) : (pull_index += 1) {
+                const length = pull[pull_index].count;
+                
+                if (pull[pull_index].to < pull[pull_index].from) {
+                    // we're pulling the values out to the left, which means the start of an A subarray
+                    index = pull[pull_index].from;
+                    count = 1;
+                    while (count < length) : (count += 1) {
+                        index = findFirstBackward(T, items, items[index - 1], Range.init(pull[pull_index].to, pull[pull_index].from - (count - 1)), lessThan, length - count);
+                        const range = Range.init(index + 1, pull[pull_index].from + 1);
+                        mem.rotate(T, items[range.start..range.end], range.length() - count);
+                        pull[pull_index].from = index + count;
+                    }
+                } else if (pull[pull_index].to > pull[pull_index].from) {
+                    // we're pulling values out to the right, which means the end of a B subarray
+                    index = pull[pull_index].from + 1;
+                    count = 1;
+                    while (count < length) : (count += 1) {
+                        index = findLastForward(T, items, items[index], Range.init(index, pull[pull_index].to), lessThan, length - count);
+                        const range = Range.init(pull[pull_index].from, index - 1);
+                        mem.rotate(T, items[range.start..range.end], count);
+                        pull[pull_index].from = index - 1 - count;
+                    }
+                }
+            }
+            
+            // adjust block_size and buffer_size based on the values we were able to pull out
+            buffer_size = buffer1.length();
+            block_size = iterator.length()/buffer_size + 1;
+            
+            // the first buffer NEEDS to be large enough to tag each of the evenly sized A blocks,
+            // so this was originally here to test the math for adjusting block_size above
+            // assert((iterator.length() + 1)/block_size <= buffer_size);
+            
+            // now that the two internal buffers have been created, it's time to merge each A+B combination at this level of the merge sort!
+            iterator.begin();
+            while (!iterator.finished()) {
+                A = iterator.nextRange();
+                B = iterator.nextRange();
+                
+                // remove any parts of A or B that are being used by the internal buffers
+                start = A.start;
+                if (start == pull[0].range.start) {
+                    if (pull[0].from > pull[0].to) {
+                        A.start += pull[0].count;
+                        
+                        // if the internal buffer takes up the entire A or B subarray, then there's nothing to merge
+                        // this only happens for very small subarrays, like √4 = 2, 2 * (2 internal buffers) = 4,
+                        // which also only happens when cache.len is small or 0 since it'd otherwise use MergeExternal
+                        if (A.length() == 0) continue;
+                    } else if (pull[0].from < pull[0].to) {
+                        B.end -= pull[0].count;
+                        if (B.length() == 0) continue;
+                    }
+                }
+                if (start == pull[1].range.start) {
+                    if (pull[1].from > pull[1].to) {
+                        A.start += pull[1].count;
+                        if (A.length() == 0) continue;
+                    } else if (pull[1].from < pull[1].to) {
+                        B.end -= pull[1].count;
+                        if (B.length() == 0) continue;
+                    }
+                }
+                
+                if (lessThan(items[B.end - 1], items[A.start])) {
+                    // the two ranges are in reverse order, so a simple rotation should fix it
+                    mem.rotate(T, items[A.start..B.end], A.length());
+                } else if (lessThan(items[A.end], items[A.end - 1])) {
+                    // these two ranges weren't already in order, so we'll need to merge them!
+                    var findA: usize = undefined;
+                    
+                    // break the remainder of A into blocks. firstA is the uneven-sized first A block
+                    var blockA = Range.init(A.start, A.end);
+                    var firstA = Range.init(A.start, A.start + blockA.length() % block_size);
+                    
+                    // swap the first value of each A block with the value in buffer1
+                    var indexA = buffer1.start;
+                    index = firstA.end;
+                    while (index < blockA.end) : ({indexA += 1; index += block_size;}) {
+                        mem.swap(T, &items[indexA], &items[index]);
+                    }
+                    
+                    // start rolling the A blocks through the B blocks!
+                    // whenever we leave an A block behind, we'll need to merge the previous A block with any B blocks that follow it, so track that information as well
+                    var lastA = firstA;
+                    var lastB = Range.init(0, 0);
+                    var blockB = Range.init(B.start, B.start + math.min(block_size, B.length()));
+                    blockA.start += firstA.length();
+                    indexA = buffer1.start;
+                    
+                    // if the first unevenly sized A block fits into the cache, copy it there for when we go to Merge it
+                    // otherwise, if the second buffer is available, block swap the contents into that
+                    if (lastA.length() <= cache.len) {
+                        mem.copy(T, cache[0..], items[lastA.start..lastA.end]);
+                    } else if (buffer2.length() > 0) {
+                        blockSwap(T, items, lastA.start, buffer2.start, lastA.length());
+                    }
+                    
+                    if (blockA.length() > 0) {
+                        while (true) {
+                            // if there's a previous B block and the first value of the minimum A block is <= the last value of the previous B block,
+                            // then drop that minimum A block behind. or if there are no B blocks left then keep dropping the remaining A blocks.
+                            if ((lastB.length() > 0 and !lessThan(items[lastB.end - 1], items[indexA])) or blockB.length() == 0) {
+                                // figure out where to split the previous B block, and rotate it at the split
+                                const B_split = binaryFirst(T, items, items[indexA], lastB, lessThan);
+                                const B_remaining = lastB.end - B_split;
+                                
+                                // swap the minimum A block to the beginning of the rolling A blocks
+                                var minA = blockA.start;
+                                findA = minA + block_size;
+                                while (findA < blockA.end) : (findA += block_size) {
+                                    if (lessThan(items[findA], items[minA])) {
+                                        minA = findA;
+                                    }
+                                }
+                                blockSwap(T, items, blockA.start, minA, block_size);
+                                
+                                // swap the first item of the previous A block back with its original value, which is stored in buffer1
+                                mem.swap(T, &items[blockA.start], &items[indexA]);
+                                indexA += 1;
+                                
+                                // locally merge the previous A block with the B values that follow it
+                                // if lastA fits into the external cache we'll use that (with MergeExternal),
+                                // or if the second internal buffer exists we'll use that (with MergeInternal),
+                                // or failing that we'll use a strictly in-place merge algorithm (MergeInPlace)
+                                
+                                if (lastA.length() <= cache.len) {
+                                    mergeExternal(T, items, lastA, Range.init(lastA.end, B_split), lessThan, cache[0..]);
+                                } else if (buffer2.length() > 0) {
+                                    mergeInternal(T, items, lastA, Range.init(lastA.end, B_split), lessThan, buffer2);
+                                } else {
+                                    mergeInPlace(T, items, lastA, Range.init(lastA.end, B_split), lessThan);
+                                }
+                                
+                                if (buffer2.length() > 0 or block_size <= cache.len) {
+                                    // copy the previous A block into the cache or buffer2, since that's where we need it to be when we go to merge it anyway
+                                    if (block_size <= cache.len) {
+                                        mem.copy(T, cache[0..], items[blockA.start..blockA.start + block_size]);
+                                    } else {
+                                        blockSwap(T, items, blockA.start, buffer2.start, block_size);
+                                    }
+                                    
+                                    // this is equivalent to rotating, but faster
+                                    // the area normally taken up by the A block is either the contents of buffer2, or data we don't need anymore since we memcopied it
+                                    // either way, we don't need to retain the order of those items, so instead of rotating we can just block swap B to where it belongs
+                                    blockSwap(T, items, B_split, blockA.start + block_size - B_remaining, B_remaining);
+                                } else {
+                                    // we are unable to use the 'buffer2' trick to speed up the rotation operation since buffer2 doesn't exist, so perform a normal rotation
+                                    mem.rotate(T, items[B_split..blockA.start + block_size], blockA.start - B_split);
+                                }
+                                
+                                // update the range for the remaining A blocks, and the range remaining from the B block after it was split
+                                lastA = Range.init(blockA.start - B_remaining, blockA.start - B_remaining + block_size);
+                                lastB = Range.init(lastA.end, lastA.end + B_remaining);
+                                
+                                // if there are no more A blocks remaining, this step is finished!
+                                blockA.start += block_size;
+                                if (blockA.length() == 0)
+                                    break;
+                                
+                            } else if (blockB.length() < block_size) {
+                                // move the last B block, which is unevenly sized, to before the remaining A blocks, by using a rotation
+                                // the cache is disabled here since it might contain the contents of the previous A block
+                                mem.rotate(T, items[blockA.start..blockB.end], blockB.start - blockA.start);
+                                
+                                lastB = Range.init(blockA.start, blockA.start + blockB.length());
+                                blockA.start += blockB.length();
+                                blockA.end += blockB.length();
+                                blockB.end = blockB.start;
+                            } else {
+                                // roll the leftmost A block to the end by swapping it with the next B block
+                                blockSwap(T, items, blockA.start, blockB.start, block_size);
+                                lastB = Range.init(blockA.start, blockA.start + block_size);
+                                
+                                blockA.start += block_size;
+                                blockA.end += block_size;
+                                blockB.start += block_size;
+                                
+                                if (blockB.end > B.end - block_size) {
+                                    blockB.end = B.end;
+                                } else {
+                                    blockB.end += block_size;
+                                }
+                            }
+                        }
+                    }
+                    
+                    // merge the last A block with the remaining B values
+                    if (lastA.length() <= cache.len) {
+                        mergeExternal(T, items, lastA, Range.init(lastA.end, B.end), lessThan, cache[0..]);
+                    } else if (buffer2.length() > 0) {
+                        mergeInternal(T, items, lastA, Range.init(lastA.end, B.end), lessThan, buffer2);
+                    } else {
+                        mergeInPlace(T, items, lastA, Range.init(lastA.end, B.end), lessThan);
+                    }
+                }
+            }
+            
+            // when we're finished with this merge step we should have the one or two internal buffers left over, where the second buffer is all jumbled up
+            // insertion sort the second buffer, then redistribute the buffers back into the items using the opposite process used for creating the buffer
+            
+            // while an unstable sort like quicksort could be applied here, in benchmarks it was consistently slightly slower than a simple insertion sort,
+            // even for tens of millions of items. this may be because insertion sort is quite fast when the data is already somewhat sorted, like it is here
+            insertionSort(T, items[buffer2.start..buffer2.end], lessThan);
+            
+            pull_index = 0;
+            while (pull_index < 2) : (pull_index += 1) {
+                var unique = pull[pull_index].count * 2;
+                if (pull[pull_index].from > pull[pull_index].to) {
+                    // the values were pulled out to the left, so redistribute them back to the right
+                    var buffer = Range.init(pull[pull_index].range.start, pull[pull_index].range.start + pull[pull_index].count);
+                    while (buffer.length() > 0) {
+                        index = findFirstForward(T, items, items[buffer.start], Range.init(buffer.end, pull[pull_index].range.end), lessThan, unique);
+                        const amount = index - buffer.end;
+                        mem.rotate(T, items[buffer.start..index], buffer.length());
+                        buffer.start += (amount + 1);
+                        buffer.end += amount;
+                        unique -= 2;
+                    }
+                } else if (pull[pull_index].from < pull[pull_index].to) {
+                    // the values were pulled out to the right, so redistribute them back to the left
+                    var buffer = Range.init(pull[pull_index].range.end - pull[pull_index].count, pull[pull_index].range.end);
+                    while (buffer.length() > 0) {
+                        index = findLastBackward(T, items, items[buffer.end - 1], Range.init(pull[pull_index].range.start, buffer.start), lessThan, unique);
+                        const amount = buffer.start - index;
+                        mem.rotate(T, items[index..buffer.end], amount);
+                        buffer.start -= amount;
+                        buffer.end -= (amount + 1);
+                        unique -= 2;
+                    }
+                }
+            }
+        }
+        
+        // double the size of each A and B subarray that will be merged in the next level
+        if (!iterator.nextLevel()) break;
     }
 }
 
-fn quicksort(comptime T: type, array: []T, left: usize, right: usize, comptime cmp: fn(a: &const T, b: &const T)->Cmp) {
-    var i = left;
-    var j = right;
-    const p = (i + j) / 2;
+// merge operation without a buffer
+fn mergeInPlace(comptime T: type, items: []T, A_arg: &const Range, B_arg: &const Range, lessThan: fn(&const T,&const T)->bool) {
+    if (A_arg.length() == 0 or B_arg.length() == 0) return;
+    
+    // this just repeatedly binary searches into B and rotates A into position.
+    // the paper suggests using the 'rotation-based Hwang and Lin algorithm' here,
+    // but I decided to stick with this because it had better situational performance
+    // 
+    // (Hwang and Lin is designed for merging subarrays of very different sizes,
+    // but WikiSort almost always uses subarrays that are roughly the same size)
+    // 
+    // normally this is incredibly suboptimal, but this function is only called
+    // when none of the A or B blocks in any subarray contained 2√A unique values,
+    // which places a hard limit on the number of times this will ACTUALLY need
+    // to binary search and rotate.
+    // 
+    // according to my analysis the worst case is √A rotations performed on √A items
+    // once the constant factors are removed, which ends up being O(n)
+    // 
+    // again, this is NOT a general-purpose solution – it only works well in this case!
+    // kind of like how the O(n^2) insertion sort is used in some places
 
-    while (i <= j) {
-        while (cmp(array[i], array[p]) == Cmp.Less) {
-            i += 1;
+    var A = *A_arg;
+    var B = *B_arg;
+    
+    while (true) {
+        // find the first place in B where the first item in A needs to be inserted
+        const mid = binaryFirst(T, items, items[A.start], B, lessThan);
+        
+        // rotate A into place
+        const amount = mid - A.end;
+        mem.rotate(T, items[A.start..mid], A.length());
+        if (B.end == mid) break;
+        
+        // calculate the new A and B ranges
+        B.start = mid;
+        A = Range.init(A.start + amount, B.start);
+        A.start = binaryLast(T, items, items[A.start], A, lessThan);
+        if (A.length() == 0) break;
+    }
+}
+
+// merge operation using an internal buffer
+fn mergeInternal(comptime T: type, items: []T, A: &const Range, B: &const Range, lessThan: fn(&const T,&const T)->bool, buffer: &const Range) {
+    // whenever we find a value to add to the final array, swap it with the value that's already in that spot
+    // when this algorithm is finished, 'buffer' will contain its original contents, but in a different order
+    var A_count: usize = 0;
+    var B_count: usize = 0;
+    var insert: usize = 0;
+    
+    if (B.length() > 0 and A.length() > 0) {
+        while (true) {
+            if (!lessThan(items[B.start + B_count], items[buffer.start + A_count])) {
+                mem.swap(T, &items[A.start + insert], &items[buffer.start + A_count]);
+                A_count += 1;
+                insert += 1;
+                if (A_count >= A.length()) break;
+            } else {
+                mem.swap(T, &items[A.start + insert], &items[B.start + B_count]);
+                B_count += 1;
+                insert += 1;
+                if (B_count >= B.length()) break;
+            }
         }
-        while (cmp(array[j], array[p]) == Cmp.Greater) {
-            j -= 1;
+    }
+    
+    // swap the remainder of A into the final array
+    blockSwap(T, items, buffer.start + A_count, A.start + insert, A.length() - A_count);
+}
+
+fn blockSwap(comptime T: type, items: []T, start1: usize, start2: usize, block_size: usize) {
+    var index: usize = 0;
+    while (index < block_size) : (index += 1) {
+        mem.swap(T, &items[start1 + index], &items[start2 + index]);
+    }
+}
+
+// combine a linear search with a binary search to reduce the number of comparisons in situations
+// where have some idea as to how many unique values there are and where the next value might be
+fn findFirstForward(comptime T: type, items: []T, value: &const T, range: &const Range, lessThan: fn(&const T,&const T)->bool, unique: usize) -> usize {
+    if (range.length() == 0) return range.start;
+    const skip = math.max(range.length()/unique, usize(1));
+    
+    var index = range.start + skip;
+    while (lessThan(items[index - 1], value)) : (index += skip) {
+        if (index >= range.end - skip) {
+            return binaryFirst(T, items, value, Range.init(index, range.end), lessThan);
         }
-        if (i <= j) {
-            const tmp = array[i];
-            array[i] = array[j];
-            array[j] = tmp;
-            i += 1;
-            if (j > 0) j -= 1;
+    }
+    
+    return binaryFirst(T, items, value, Range.init(index - skip, index), lessThan);
+}
+
+fn findFirstBackward(comptime T: type, items: []T, value: &const T, range: &const Range, lessThan: fn(&const T,&const T)->bool, unique: usize) -> usize {
+    if (range.length() == 0) return range.start;
+    const skip = math.max(range.length()/unique, usize(1));
+    
+    var index = range.end - skip;
+    while (index > range.start and !lessThan(items[index - 1], value)) : (index -= skip) {
+        if (index < range.start + skip) {
+            return binaryFirst(T, items, value, Range.init(range.start, index), lessThan);
         }
     }
+    
+    return binaryFirst(T, items, value, Range.init(index, index + skip), lessThan);
+}
 
-    if (left < j) quicksort(T, array, left, j, cmp);
-    if (i < right) quicksort(T, array, i, right, cmp);
+fn findLastForward(comptime T: type, items: []T, value: &const T, range: &const Range, lessThan: fn(&const T,&const T)->bool, unique: usize) -> usize {
+    if (range.length() == 0) return range.start;
+    const skip = math.max(range.length()/unique, usize(1));
+    
+    var index = range.start + skip;
+    while (!lessThan(value, items[index - 1])) : (index += skip) {
+        if (index >= range.end - skip) {
+            return binaryLast(T, items, value, Range.init(index, range.end), lessThan);
+        }
+    }
+    
+    return binaryLast(T, items, value, Range.init(index - skip, index), lessThan);
 }
 
-pub fn i32asc(a: &const i32, b: &const i32) -> Cmp {
-   return if (*a > *b) Cmp.Greater else if (*a < *b) Cmp.Less else Cmp.Equal
+fn findLastBackward(comptime T: type, items: []T, value: &const T, range: &const Range, lessThan: fn(&const T,&const T)->bool, unique: usize) -> usize {
+    if (range.length() == 0) return range.start;
+    const skip = math.max(range.length()/unique, usize(1));
+    
+    var index = range.end - skip;
+    while (index > range.start and lessThan(value, items[index - 1])) : (index -= skip) {
+        if (index < range.start + skip) {
+            return binaryLast(T, items, value, Range.init(range.start, index), lessThan);
+        }
+    }
+    
+    return binaryLast(T, items, value, Range.init(index, index + skip), lessThan);
 }
 
-pub fn i32desc(a: &const i32, b: &const i32) -> Cmp {
-    reverse(i32asc(a, b))
+fn binaryFirst(comptime T: type, items: []T, value: &const T, range: &const Range, lessThan: fn(&const T,&const T)->bool) -> usize {
+    var start = range.start;
+    var end = range.end - 1;
+    if (range.start >= range.end) return range.end;
+    while (start < end) {
+        const mid = start + (end - start)/2;
+        if (lessThan(items[mid], value)) {
+            start = mid + 1;
+        } else {
+            end = mid;
+        }
+    }
+    if (start == range.end - 1 and lessThan(items[start], value)) {
+        start += 1;
+    }
+    return start;
 }
 
-pub fn u8asc(a: &const u8, b: &const u8) -> Cmp {
-    if (*a > *b) Cmp.Greater else if (*a < *b) Cmp.Less else Cmp.Equal
+fn binaryLast(comptime T: type, items: []T, value: &const T, range: &const Range, lessThan: fn(&const T,&const T)->bool) -> usize {
+    var start = range.start;
+    var end = range.end - 1;
+    if (range.start >= range.end) return range.end;
+    while (start < end) {
+        const mid = start + (end - start)/2;
+        if (!lessThan(value, items[mid])) {
+            start = mid + 1;
+        } else {
+            end = mid;
+        }
+    }
+    if (start == range.end - 1 and !lessThan(value, items[start])) {
+        start += 1;
+    }
+    return start;
 }
 
-pub fn u8desc(a: &const u8, b: &const u8) -> Cmp {
-    reverse(u8asc(a, b))
+fn mergeInto(comptime T: type, from: []T, A: &const Range, B: &const Range, lessThan: fn(&const T,&const T)->bool, into: []T) {
+    var A_index: usize = A.start;
+    var B_index: usize = B.start;
+    const A_last = A.end;
+    const B_last = B.end;
+    var insert_index: usize = 0;
+    
+    while (true) {
+        if (!lessThan(from[B_index], from[A_index])) {
+            into[insert_index] = from[A_index];
+            A_index += 1;
+            insert_index += 1;
+            if (A_index == A_last) {
+                // copy the remainder of B into the final array
+                mem.copy(T, into[insert_index..], from[B_index..B_last]);
+                break;
+            }
+        } else {
+            into[insert_index] = from[B_index];
+            B_index += 1;
+            insert_index += 1;
+            if (B_index == B_last) {
+                // copy the remainder of A into the final array
+                mem.copy(T, into[insert_index..], from[A_index..A_last]);
+                break;
+            }
+        }
+    }
 }
 
-fn reverse(was: Cmp) -> Cmp {
-    if (was == Cmp.Greater) Cmp.Less else if (was == Cmp.Less) Cmp.Greater else Cmp.Equal
+fn mergeExternal(comptime T: type, items: []T, A: &const Range, B: &const Range, lessThan: fn(&const T,&const T)->bool, cache: []T) {
+    // A fits into the cache, so use that instead of the internal buffer
+    var A_index: usize = 0;
+    var B_index: usize = B.start;
+    var insert_index: usize = A.start;
+    const A_last = A.length();
+    const B_last = B.end;
+    
+    if (B.length() > 0 and A.length() > 0) {
+        while (true) {
+            if (!lessThan(items[B_index], cache[A_index])) {
+                items[insert_index] = cache[A_index];
+                A_index += 1;
+                insert_index += 1;
+                if (A_index == A_last) break;
+            } else {
+                items[insert_index] = items[B_index];
+                B_index += 1;
+                insert_index += 1;
+                if (B_index == B_last) break;
+            }
+        }
+    }
+    
+    // copy the remainder of A into the final array
+    mem.copy(T, items[insert_index..], cache[A_index..A_last]);
+}
+
+fn swap(comptime T: type, items: []T, lessThan: fn(lhs: &const T, rhs: &const T)->bool, order: &[8]u8, x: usize, y: usize) {
+    if (lessThan(items[y], items[x]) or
+        ((*order)[x] > (*order)[y] and !lessThan(items[x], items[y])))
+    {
+        mem.swap(T, &items[x], &items[y]);
+        mem.swap(u8, &(*order)[x], &(*order)[y]);
+    }
+}
+
+fn i32asc(lhs: &const i32, rhs: &const i32) -> bool {
+    return *lhs < *rhs;
 }
 
-// ---------------------------------------
-// tests
+fn i32desc(lhs: &const i32, rhs: &const i32) -> bool {
+    return *rhs < *lhs;
+}
+
+fn u8asc(lhs: &const u8, rhs: &const u8) -> bool {
+    return *lhs < *rhs;
+}
+
+fn u8desc(lhs: &const u8, rhs: &const u8) -> bool {
+    return *rhs < *lhs;
+}
 
 test "stable sort" {
     testStableSort();
@@ -113,7 +1004,7 @@ fn testStableSort() {
         },
     };
     for (cases) |*case| {
-        sort_stable(IdAndValue, (*case)[0..], cmpByValue);
+        insertionSort(IdAndValue, (*case)[0..], cmpByValue);
         for (*case) |item, i| {
             assert(item.id == expected[i].id);
             assert(item.value == expected[i].value);
@@ -121,14 +1012,19 @@ fn testStableSort() {
     }
 }
 const IdAndValue = struct {
-    id: i32,
+    id: usize,
     value: i32,
 };
-fn cmpByValue(a: &const IdAndValue, b: &const IdAndValue) -> Cmp {
+fn cmpByValue(a: &const IdAndValue, b: &const IdAndValue) -> bool {
     return i32asc(a.value, b.value);
 }
 
-test "testSort" {
+test "std.sort" {
+    if (builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.i386) {
+        // TODO get this test passing
+        // https://github.com/zig-lang/zig/issues/537
+        return;
+    }
     const u8cases = [][]const []const u8 {
         [][]const u8{"", ""},
         [][]const u8{"a", "a"},
@@ -164,7 +1060,12 @@ test "testSort" {
     }
 }
 
-test "testSortDesc" {
+test "std.sort descending" {
+    if (builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.i386) {
+        // TODO get this test passing
+        // https://github.com/zig-lang/zig/issues/537
+        return;
+    }
     const rev_cases = [][]const []const i32 {
         [][]const i32{[]i32{}, []i32{}},
         [][]const i32{[]i32{1}, []i32{1}},
@@ -182,3 +1083,74 @@ test "testSortDesc" {
         assert(mem.eql(i32, slice, case[1]));
     }
 }
+
+test "another sort case" {
+    if (builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.i386) {
+        // TODO get this test passing
+        // https://github.com/zig-lang/zig/issues/537
+        return;
+    }
+    var arr = []i32{ 5, 3, 1, 2, 4 };
+    sort(i32, arr[0..], i32asc);
+
+    assert(mem.eql(i32, arr, []i32{ 1, 2, 3, 4, 5 }));
+}
+
+test "sort fuzz testing" {
+    if (builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.i386) {
+        // TODO get this test passing
+        // https://github.com/zig-lang/zig/issues/537
+        return;
+    }
+    var rng = std.rand.Rand.init(0x12345678);
+    const test_case_count = 10;
+    var i: usize = 0;
+    while (i < test_case_count) : (i += 1) {
+        fuzzTest(&rng);
+    }
+}
+
+var fixed_buffer_mem: [100 * 1024]u8 = undefined;
+
+fn fuzzTest(rng: &std.rand.Rand) {
+    const array_size = rng.range(usize, 0, 1000);
+    var fixed_allocator = mem.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
+    var array = %%fixed_allocator.allocator.alloc(IdAndValue, array_size);
+    // populate with random data
+    for (array) |*item, index| {
+        item.id = index;
+        item.value = rng.range(i32, 0, 100);
+    }
+    sort(IdAndValue, array, cmpByValue);
+
+    var index: usize = 1;
+    while (index < array.len) : (index += 1) {
+        if (array[index].value == array[index - 1].value) {
+            assert(array[index].id > array[index - 1].id);
+        } else {
+            assert(array[index].value > array[index - 1].value);
+        }
+    }
+}
+
+pub fn min(comptime T: type, items: []T, lessThan: fn(lhs: &const T, rhs: &const T)->bool) -> T {
+    var i: usize = 0;
+    var smallest = items[0];
+    for (items[1..]) |item| {
+        if (lessThan(item, smallest)) {
+            smallest = item;
+        }
+    }
+    return smallest;
+}
+
+pub fn max(comptime T: type, items: []T, lessThan: fn(lhs: &const T, rhs: &const T)->bool) -> T {
+    var i: usize = 0;
+    var biggest = items[0];
+    for (items[1..]) |item| {
+        if (lessThan(biggest, item)) {
+            biggest = item;
+        }
+    }
+    return biggest;
+}
diff --git a/std/special/bootstrap.zig b/std/special/bootstrap.zig
index 924e537ddd..177e245400 100644
--- a/std/special/bootstrap.zig
+++ b/std/special/bootstrap.zig
@@ -5,20 +5,20 @@ const root = @import("@root");
 const std = @import("std");
 const builtin = @import("builtin");
 
-const is_windows = builtin.os == builtin.Os.windows;
-const want_main_symbol = builtin.link_libc;
-const want_start_symbol = !want_main_symbol and !is_windows;
-const want_WinMainCRTStartup = is_windows and !builtin.link_libc;
-
 var argc_ptr: &usize = undefined;
 
-
-export nakedcc fn _start() -> noreturn {
-    if (!want_start_symbol) {
-        @setGlobalLinkage(_start, builtin.GlobalLinkage.Internal);
-        unreachable;
+comptime {
+    const strong_linkage = builtin.GlobalLinkage.Strong;
+    if (builtin.link_libc) {
+        @export("main", main, strong_linkage);
+    } else if (builtin.os == builtin.Os.windows) {
+        @export("WinMainCRTStartup", WinMainCRTStartup, strong_linkage);
+    } else {
+        @export("_start", _start, strong_linkage);
     }
+}
 
+nakedcc fn _start() -> noreturn {
     switch (builtin.arch) {
         builtin.Arch.x86_64 => {
             argc_ptr = asm("lea (%%rsp), %[argc]": [argc] "=r" (-> &usize));
@@ -28,17 +28,14 @@ export nakedcc fn _start() -> noreturn {
         },
         else => @compileError("unsupported arch"),
     }
-    posixCallMainAndExit()
+    // If LLVM inlines stack variables into _start, they will overwrite
+    // the command line argument data.
+    @noInlineCall(posixCallMainAndExit);
 }
 
-export fn WinMainCRTStartup() -> noreturn {
-    if (!want_WinMainCRTStartup) {
-        @setGlobalLinkage(WinMainCRTStartup, builtin.GlobalLinkage.Internal);
-        unreachable;
-    }
+extern fn WinMainCRTStartup() -> noreturn {
     @setAlignStack(16);
 
-    std.debug.user_main_fn = root.main;
     root.main() %% std.os.windows.ExitProcess(1);
     std.os.windows.ExitProcess(0);
 }
@@ -58,17 +55,10 @@ fn callMain(argc: usize, argv: &&u8, envp: &?&u8) -> %void {
     while (envp[env_count] != null) : (env_count += 1) {}
     std.os.posix_environ_raw = @ptrCast(&&u8, envp)[0..env_count];
 
-    std.debug.user_main_fn = root.main;
-
     return root.main();
 }
 
-export fn main(c_argc: i32, c_argv: &&u8, c_envp: &?&u8) -> i32 {
-    if (!want_main_symbol) {
-        @setGlobalLinkage(main, builtin.GlobalLinkage.Internal);
-        unreachable;
-    }
-
+extern fn main(c_argc: i32, c_argv: &&u8, c_envp: &?&u8) -> i32 {
     callMain(usize(c_argc), c_argv, c_envp) %% return 1;
     return 0;
 }
diff --git a/std/special/bootstrap_lib.zig b/std/special/bootstrap_lib.zig
index 7412d64fa6..3c7789f30a 100644
--- a/std/special/bootstrap_lib.zig
+++ b/std/special/bootstrap_lib.zig
@@ -2,7 +2,11 @@
 
 const std = @import("std");
 
-export stdcallcc fn _DllMainCRTStartup(hinstDLL: std.os.windows.HINSTANCE, fdwReason: std.os.windows.DWORD,
+comptime {
+    @export("_DllMainCRTStartup", _DllMainCRTStartup);
+}
+
+stdcallcc fn _DllMainCRTStartup(hinstDLL: std.os.windows.HINSTANCE, fdwReason: std.os.windows.DWORD,
     lpReserved: std.os.windows.LPVOID) -> std.os.windows.BOOL
 {
     return std.os.windows.TRUE;
diff --git a/std/special/build_runner.zig b/std/special/build_runner.zig
index 430eeb395e..e54d85e6ef 100644
--- a/std/special/build_runner.zig
+++ b/std/special/build_runner.zig
@@ -45,21 +45,17 @@ pub fn main() -> %void {
 
     var stderr_file = io.getStdErr();
     var stderr_file_stream: io.FileOutStream = undefined;
-    var stderr_stream: %&io.OutStream = if (stderr_file) |*f| {
+    var stderr_stream: %&io.OutStream = if (stderr_file) |*f| x: {
         stderr_file_stream = io.FileOutStream.init(f);
-        &stderr_file_stream.stream
-    } else |err| {
-        err
-    };
+        break :x &stderr_file_stream.stream;
+    } else |err| err;
 
     var stdout_file = io.getStdOut();
     var stdout_file_stream: io.FileOutStream = undefined;
-    var stdout_stream: %&io.OutStream = if (stdout_file) |*f| {
+    var stdout_stream: %&io.OutStream = if (stdout_file) |*f| x: {
         stdout_file_stream = io.FileOutStream.init(f);
-        &stdout_file_stream.stream
-    } else |err| {
-        err
-    };
+        break :x &stdout_file_stream.stream;
+    } else |err| err;
 
     while (arg_it.next(allocator)) |err_or_arg| {
         const arg = %return unwrapArg(err_or_arg);
diff --git a/std/special/builtin.zig b/std/special/builtin.zig
index 51e6646574..e6c09863ca 100644
--- a/std/special/builtin.zig
+++ b/std/special/builtin.zig
@@ -35,25 +35,26 @@ export fn memcpy(noalias dest: ?&u8, noalias src: ?&const u8, n: usize) {
         (??dest)[index] = (??src)[index];
 }
 
-export fn __stack_chk_fail() -> noreturn {
-    if (builtin.mode == builtin.Mode.ReleaseFast or builtin.os == builtin.Os.windows) {
-        @setGlobalLinkage(__stack_chk_fail, builtin.GlobalLinkage.Internal);
-        unreachable;
+comptime {
+    if (builtin.mode != builtin.Mode.ReleaseFast and builtin.os != builtin.Os.windows) {
+        @export("__stack_chk_fail", __stack_chk_fail, builtin.GlobalLinkage.Strong);
     }
+}
+extern fn __stack_chk_fail() -> noreturn {
     @panic("stack smashing detected");
 }
 
 const math = @import("../math/index.zig");
 
-export fn fmodf(x: f32, y: f32) -> f32 { generic_fmod(f32, x, y) }
-export fn fmod(x: f64, y: f64) -> f64 { generic_fmod(f64, x, y) }
+export fn fmodf(x: f32, y: f32) -> f32 { return generic_fmod(f32, x, y); }
+export fn fmod(x: f64, y: f64) -> f64 { return generic_fmod(f64, x, y); }
 
 // TODO add intrinsics for these (and probably the double version too)
 // and have the math stuff use the intrinsic. same as @mod and @rem
-export fn floorf(x: f32) -> f32 { math.floor(x) }
-export fn ceilf(x: f32) -> f32 { math.ceil(x) }
-export fn floor(x: f64) -> f64 { math.floor(x) }
-export fn ceil(x: f64) -> f64 { math.ceil(x) }
+export fn floorf(x: f32) -> f32 { return math.floor(x); }
+export fn ceilf(x: f32) -> f32 { return math.ceil(x); }
+export fn floor(x: f64) -> f64 { return math.floor(x); }
+export fn ceil(x: f64) -> f64 { return math.ceil(x); }
 
 fn generic_fmod(comptime T: type, x: T, y: T) -> T {
     @setDebugSafety(this, false);
@@ -83,7 +84,7 @@ fn generic_fmod(comptime T: type, x: T, y: T) -> T {
     // normalize x and y
     if (ex == 0) {
         i = ux << exp_bits;
-        while (i >> bits_minus_1 == 0) : ({ex -= 1; i <<= 1}) {}
+        while (i >> bits_minus_1 == 0) : (b: {ex -= 1; break :b i <<= 1;}) {}
         ux <<= log2uint(@bitCast(u32, -ex + 1));
     } else {
         ux &= @maxValue(uint) >> exp_bits;
@@ -91,7 +92,7 @@ fn generic_fmod(comptime T: type, x: T, y: T) -> T {
     }
     if (ey == 0) {
         i = uy << exp_bits;
-        while (i >> bits_minus_1 == 0) : ({ey -= 1; i <<= 1}) {}
+        while (i >> bits_minus_1 == 0) : (b: {ey -= 1; break :b i <<= 1;}) {}
         uy <<= log2uint(@bitCast(u32, -ey + 1));
     } else {
         uy &= @maxValue(uint) >> exp_bits;
@@ -114,7 +115,7 @@ fn generic_fmod(comptime T: type, x: T, y: T) -> T {
             return 0 * x;
         ux = i;
     }
-    while (ux >> digits == 0) : ({ux <<= 1; ex -= 1}) {}
+    while (ux >> digits == 0) : (b: {ux <<= 1; break :b ex -= 1;}) {}
 
     // scale result up
     if (ex > 0) {
diff --git a/std/special/compiler_rt/aulldiv.zig b/std/special/compiler_rt/aulldiv.zig
index 511aa91f80..9d4faf95b9 100644
--- a/std/special/compiler_rt/aulldiv.zig
+++ b/std/special/compiler_rt/aulldiv.zig
@@ -1,66 +1,55 @@
-const builtin = @import("builtin");
-const linkage = if (builtin.is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Strong;
-const is_win32 = builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.i386;
-
-export nakedcc fn _aulldiv() {
-    if (is_win32) {
-        @setDebugSafety(this, false);
-        @setGlobalLinkage(_aulldiv, linkage);
-        asm volatile (
-            \\.intel_syntax noprefix
-            \\
-            \\         push        ebx
-            \\         push        esi
-            \\         mov         eax,dword ptr [esp+18h]
-            \\         or          eax,eax
-            \\         jne         L1
-            \\         mov         ecx,dword ptr [esp+14h]
-            \\         mov         eax,dword ptr [esp+10h]
-            \\         xor         edx,edx
-            \\         div         ecx
-            \\         mov         ebx,eax
-            \\         mov         eax,dword ptr [esp+0Ch]
-            \\         div         ecx
-            \\         mov         edx,ebx
-            \\         jmp         L2
-            \\ L1:
-            \\         mov         ecx,eax
-            \\         mov         ebx,dword ptr [esp+14h]
-            \\         mov         edx,dword ptr [esp+10h]
-            \\         mov         eax,dword ptr [esp+0Ch]
-            \\ L3:
-            \\         shr         ecx,1
-            \\         rcr         ebx,1
-            \\         shr         edx,1
-            \\         rcr         eax,1
-            \\         or          ecx,ecx
-            \\         jne         L3
-            \\         div         ebx
-            \\         mov         esi,eax
-            \\         mul         dword ptr [esp+18h]
-            \\         mov         ecx,eax
-            \\         mov         eax,dword ptr [esp+14h]
-            \\         mul         esi
-            \\         add         edx,ecx
-            \\         jb          L4
-            \\         cmp         edx,dword ptr [esp+10h]
-            \\         ja          L4
-            \\         jb          L5
-            \\         cmp         eax,dword ptr [esp+0Ch]
-            \\         jbe         L5
-            \\ L4:
-            \\         dec         esi
-            \\ L5:
-            \\         xor         edx,edx
-            \\         mov         eax,esi
-            \\ L2:
-            \\         pop         esi
-            \\         pop         ebx
-            \\         ret         10h
-        );
-        unreachable;
-    }
-
-    @setGlobalLinkage(_aulldiv, builtin.GlobalLinkage.Internal);
-    unreachable;
+pub nakedcc fn _aulldiv() {
+    @setDebugSafety(this, false);
+    asm volatile (
+        \\.intel_syntax noprefix
+        \\
+        \\         push        ebx
+        \\         push        esi
+        \\         mov         eax,dword ptr [esp+18h]
+        \\         or          eax,eax
+        \\         jne         L1
+        \\         mov         ecx,dword ptr [esp+14h]
+        \\         mov         eax,dword ptr [esp+10h]
+        \\         xor         edx,edx
+        \\         div         ecx
+        \\         mov         ebx,eax
+        \\         mov         eax,dword ptr [esp+0Ch]
+        \\         div         ecx
+        \\         mov         edx,ebx
+        \\         jmp         L2
+        \\ L1:
+        \\         mov         ecx,eax
+        \\         mov         ebx,dword ptr [esp+14h]
+        \\         mov         edx,dword ptr [esp+10h]
+        \\         mov         eax,dword ptr [esp+0Ch]
+        \\ L3:
+        \\         shr         ecx,1
+        \\         rcr         ebx,1
+        \\         shr         edx,1
+        \\         rcr         eax,1
+        \\         or          ecx,ecx
+        \\         jne         L3
+        \\         div         ebx
+        \\         mov         esi,eax
+        \\         mul         dword ptr [esp+18h]
+        \\         mov         ecx,eax
+        \\         mov         eax,dword ptr [esp+14h]
+        \\         mul         esi
+        \\         add         edx,ecx
+        \\         jb          L4
+        \\         cmp         edx,dword ptr [esp+10h]
+        \\         ja          L4
+        \\         jb          L5
+        \\         cmp         eax,dword ptr [esp+0Ch]
+        \\         jbe         L5
+        \\ L4:
+        \\         dec         esi
+        \\ L5:
+        \\         xor         edx,edx
+        \\         mov         eax,esi
+        \\ L2:
+        \\         pop         esi
+        \\         pop         ebx
+        \\         ret         10h
+    );
 }
diff --git a/std/special/compiler_rt/aullrem.zig b/std/special/compiler_rt/aullrem.zig
index e218890959..b6c54d33ae 100644
--- a/std/special/compiler_rt/aullrem.zig
+++ b/std/special/compiler_rt/aullrem.zig
@@ -1,67 +1,56 @@
-const builtin = @import("builtin");
-const linkage = if (builtin.is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Strong;
-const is_win32 = builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.i386;
-
-export nakedcc fn _aullrem() {
-    if (is_win32) {
-        @setDebugSafety(this, false);
-        @setGlobalLinkage(_aullrem, linkage);
-        asm volatile (
-            \\.intel_syntax noprefix
-            \\
-            \\         push        ebx
-            \\         mov         eax,dword ptr [esp+14h]
-            \\         or          eax,eax
-            \\         jne         L1a
-            \\         mov         ecx,dword ptr [esp+10h]
-            \\         mov         eax,dword ptr [esp+0Ch]
-            \\         xor         edx,edx
-            \\         div         ecx
-            \\         mov         eax,dword ptr [esp+8]
-            \\         div         ecx
-            \\         mov         eax,edx
-            \\         xor         edx,edx
-            \\         jmp         L2a
-            \\ L1a:
-            \\         mov         ecx,eax
-            \\         mov         ebx,dword ptr [esp+10h]
-            \\         mov         edx,dword ptr [esp+0Ch]
-            \\         mov         eax,dword ptr [esp+8]
-            \\ L3a:
-            \\         shr         ecx,1
-            \\         rcr         ebx,1
-            \\         shr         edx,1
-            \\         rcr         eax,1
-            \\         or          ecx,ecx
-            \\         jne         L3a
-            \\         div         ebx
-            \\         mov         ecx,eax
-            \\         mul         dword ptr [esp+14h]
-            \\         xchg        eax,ecx
-            \\         mul         dword ptr [esp+10h]
-            \\         add         edx,ecx
-            \\         jb          L4a
-            \\         cmp         edx,dword ptr [esp+0Ch]
-            \\         ja          L4a
-            \\         jb          L5a
-            \\         cmp         eax,dword ptr [esp+8]
-            \\         jbe         L5a
-            \\ L4a:
-            \\         sub         eax,dword ptr [esp+10h]
-            \\         sbb         edx,dword ptr [esp+14h]
-            \\ L5a:
-            \\         sub         eax,dword ptr [esp+8]
-            \\         sbb         edx,dword ptr [esp+0Ch]
-            \\         neg         edx
-            \\         neg         eax
-            \\         sbb         edx,0
-            \\ L2a:
-            \\         pop         ebx
-            \\         ret         10h
-        );
-        unreachable;
-    }
-
-    @setGlobalLinkage(_aullrem, builtin.GlobalLinkage.Internal);
-    unreachable;
+pub nakedcc fn _aullrem() {
+    @setDebugSafety(this, false);
+    asm volatile (
+        \\.intel_syntax noprefix
+        \\
+        \\         push        ebx
+        \\         mov         eax,dword ptr [esp+14h]
+        \\         or          eax,eax
+        \\         jne         L1a
+        \\         mov         ecx,dword ptr [esp+10h]
+        \\         mov         eax,dword ptr [esp+0Ch]
+        \\         xor         edx,edx
+        \\         div         ecx
+        \\         mov         eax,dword ptr [esp+8]
+        \\         div         ecx
+        \\         mov         eax,edx
+        \\         xor         edx,edx
+        \\         jmp         L2a
+        \\ L1a:
+        \\         mov         ecx,eax
+        \\         mov         ebx,dword ptr [esp+10h]
+        \\         mov         edx,dword ptr [esp+0Ch]
+        \\         mov         eax,dword ptr [esp+8]
+        \\ L3a:
+        \\         shr         ecx,1
+        \\         rcr         ebx,1
+        \\         shr         edx,1
+        \\         rcr         eax,1
+        \\         or          ecx,ecx
+        \\         jne         L3a
+        \\         div         ebx
+        \\         mov         ecx,eax
+        \\         mul         dword ptr [esp+14h]
+        \\         xchg        eax,ecx
+        \\         mul         dword ptr [esp+10h]
+        \\         add         edx,ecx
+        \\         jb          L4a
+        \\         cmp         edx,dword ptr [esp+0Ch]
+        \\         ja          L4a
+        \\         jb          L5a
+        \\         cmp         eax,dword ptr [esp+8]
+        \\         jbe         L5a
+        \\ L4a:
+        \\         sub         eax,dword ptr [esp+10h]
+        \\         sbb         edx,dword ptr [esp+14h]
+        \\ L5a:
+        \\         sub         eax,dword ptr [esp+8]
+        \\         sbb         edx,dword ptr [esp+0Ch]
+        \\         neg         edx
+        \\         neg         eax
+        \\         sbb         edx,0
+        \\ L2a:
+        \\         pop         ebx
+        \\         ret         10h
+    );
 }
diff --git a/std/special/compiler_rt/comparetf2.zig b/std/special/compiler_rt/comparetf2.zig
index f1552ca61f..b88c35019b 100644
--- a/std/special/compiler_rt/comparetf2.zig
+++ b/std/special/compiler_rt/comparetf2.zig
@@ -20,11 +20,9 @@ const infRep = exponentMask;
 
 const builtin = @import("builtin");
 const is_test = builtin.is_test;
-const linkage = @import("index.zig").linkage;
 
-export fn __letf2(a: f128, b: f128) -> c_int {
+pub extern fn __letf2(a: f128, b: f128) -> c_int {
     @setDebugSafety(this, is_test);
-    @setGlobalLinkage(__letf2, linkage);
 
     const aInt = @bitCast(rep_t, a);
     const bInt = @bitCast(rep_t, b);
@@ -40,35 +38,25 @@ export fn __letf2(a: f128, b: f128) -> c_int {
 
     // If at least one of a and b is positive, we get the same result comparing
     // a and b as signed integers as we would with a floating-point compare.
-    return if ((aInt & bInt) >= 0) {
-        if (aInt < bInt) {
+    return if ((aInt & bInt) >= 0)
+        if (aInt < bInt)
             LE_LESS
-        } else if (aInt == bInt) {
+        else if (aInt == bInt)
             LE_EQUAL
-        } else {
+        else
             LE_GREATER
-        }
-    } else {
+    else
         // Otherwise, both are negative, so we need to flip the sense of the
         // comparison to get the correct result.  (This assumes a twos- or ones-
         // complement integer representation; if integers are represented in a
         // sign-magnitude representation, then this flip is incorrect).
-        if (aInt > bInt) {
+        if (aInt > bInt)
             LE_LESS
-        } else if (aInt == bInt) {
+        else if (aInt == bInt)
             LE_EQUAL
-        } else {
+        else
             LE_GREATER
-        }
-    };
-}
-
-// Alias for libgcc compatibility
-// TODO https://github.com/zig-lang/zig/issues/420
-export fn __cmptf2(a: f128, b: f128) -> c_int {
-    @setGlobalLinkage(__cmptf2, linkage);
-    @setDebugSafety(this, is_test);
-    return __letf2(a, b);
+    ;
 }
 
 // TODO https://github.com/zig-lang/zig/issues/305
@@ -78,8 +66,7 @@ const GE_EQUAL = c_int(0);
 const GE_GREATER = c_int(1);
 const GE_UNORDERED = c_int(-1); // Note: different from LE_UNORDERED
 
-export fn __getf2(a: f128, b: f128) -> c_int {
-    @setGlobalLinkage(__getf2, linkage);
+pub extern fn __getf2(a: f128, b: f128) -> c_int {
     @setDebugSafety(this, is_test);
 
     const aInt = @bitCast(srep_t, a);
@@ -89,57 +76,27 @@ export fn __getf2(a: f128, b: f128) -> c_int {
 
     if (aAbs > infRep or bAbs > infRep) return GE_UNORDERED;
     if ((aAbs | bAbs) == 0) return GE_EQUAL;
-    return if ((aInt & bInt) >= 0) {
-        if (aInt < bInt) {
+    return if ((aInt & bInt) >= 0)
+        if (aInt < bInt)
             GE_LESS
-        } else if (aInt == bInt) {
+        else if (aInt == bInt)
             GE_EQUAL
-        } else {
+        else
             GE_GREATER
-        }
-    } else {
-        if (aInt > bInt) {
+    else
+        if (aInt > bInt)
             GE_LESS
-        } else if (aInt == bInt) {
+        else if (aInt == bInt)
             GE_EQUAL
-        } else {
+        else
             GE_GREATER
-        }
-    };
+    ;
 }
 
-export fn __unordtf2(a: f128, b: f128) -> c_int {
-    @setGlobalLinkage(__unordtf2, linkage);
+pub extern fn __unordtf2(a: f128, b: f128) -> c_int {
     @setDebugSafety(this, is_test);
 
     const aAbs = @bitCast(rep_t, a) & absMask;
     const bAbs = @bitCast(rep_t, b) & absMask;
     return c_int(aAbs > infRep or bAbs > infRep);
 }
-
-// The following are alternative names for the preceding routines.
-// TODO use aliases https://github.com/zig-lang/zig/issues/462
-
-export fn __eqtf2(a: f128, b: f128) -> c_int {
-    @setGlobalLinkage(__eqtf2, linkage);
-    @setDebugSafety(this, is_test);
-    return __letf2(a, b);
-}
-
-export fn __lttf2(a: f128, b: f128) -> c_int {
-    @setGlobalLinkage(__lttf2, linkage);
-    @setDebugSafety(this, is_test);
-    return __letf2(a, b);
-}
-
-export fn __netf2(a: f128, b: f128) -> c_int {
-    @setGlobalLinkage(__netf2, linkage);
-    @setDebugSafety(this, is_test);
-    return __letf2(a, b);
-}
-
-export fn __gttf2(a: f128, b: f128) -> c_int {
-    @setGlobalLinkage(__gttf2, linkage);
-    @setDebugSafety(this, is_test);
-    return __getf2(a, b);
-}
diff --git a/std/special/compiler_rt/fixunsdfdi.zig b/std/special/compiler_rt/fixunsdfdi.zig
index 5f730bbc85..7e33987997 100644
--- a/std/special/compiler_rt/fixunsdfdi.zig
+++ b/std/special/compiler_rt/fixunsdfdi.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunsdfdi(a: f64) -> u64 {
+pub extern fn __fixunsdfdi(a: f64) -> u64 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunsdfdi, linkage);
     return fixuint(f64, u64, a);
 }
 
diff --git a/std/special/compiler_rt/fixunsdfsi.zig b/std/special/compiler_rt/fixunsdfsi.zig
index 784d5fde4f..e710e1852b 100644
--- a/std/special/compiler_rt/fixunsdfsi.zig
+++ b/std/special/compiler_rt/fixunsdfsi.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunsdfsi(a: f64) -> u32 {
+pub extern fn __fixunsdfsi(a: f64) -> u32 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunsdfsi, linkage);
     return fixuint(f64, u32, a);
 }
 
diff --git a/std/special/compiler_rt/fixunsdfti.zig b/std/special/compiler_rt/fixunsdfti.zig
index 579455c2f9..79d924f0a8 100644
--- a/std/special/compiler_rt/fixunsdfti.zig
+++ b/std/special/compiler_rt/fixunsdfti.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunsdfti(a: f64) -> u128 {
+pub extern fn __fixunsdfti(a: f64) -> u128 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunsdfti, linkage);
     return fixuint(f64, u128, a);
 }
 
diff --git a/std/special/compiler_rt/fixunssfdi.zig b/std/special/compiler_rt/fixunssfdi.zig
index eab553d8c9..f72f62d68c 100644
--- a/std/special/compiler_rt/fixunssfdi.zig
+++ b/std/special/compiler_rt/fixunssfdi.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunssfdi(a: f32) -> u64 {
+pub extern fn __fixunssfdi(a: f32) -> u64 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunssfdi, linkage);
     return fixuint(f32, u64, a);
 }
 
diff --git a/std/special/compiler_rt/fixunssfsi.zig b/std/special/compiler_rt/fixunssfsi.zig
index 18c0e66677..4c9a5001ab 100644
--- a/std/special/compiler_rt/fixunssfsi.zig
+++ b/std/special/compiler_rt/fixunssfsi.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunssfsi(a: f32) -> u32 {
+pub extern fn __fixunssfsi(a: f32) -> u32 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunssfsi, linkage);
     return fixuint(f32, u32, a);
 }
 
diff --git a/std/special/compiler_rt/fixunssfti.zig b/std/special/compiler_rt/fixunssfti.zig
index f513604247..59b94cfc51 100644
--- a/std/special/compiler_rt/fixunssfti.zig
+++ b/std/special/compiler_rt/fixunssfti.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunssfti(a: f32) -> u128 {
+pub extern fn __fixunssfti(a: f32) -> u128 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunssfti, linkage);
     return fixuint(f32, u128, a);
 }
 
diff --git a/std/special/compiler_rt/fixunstfdi.zig b/std/special/compiler_rt/fixunstfdi.zig
index 85212e2176..06b117a414 100644
--- a/std/special/compiler_rt/fixunstfdi.zig
+++ b/std/special/compiler_rt/fixunstfdi.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunstfdi(a: f128) -> u64 {
+pub extern fn __fixunstfdi(a: f128) -> u64 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunstfdi, linkage);
     return fixuint(f128, u64, a);
 }
 
diff --git a/std/special/compiler_rt/fixunstfsi.zig b/std/special/compiler_rt/fixunstfsi.zig
index 33c85c9224..8a5efe711d 100644
--- a/std/special/compiler_rt/fixunstfsi.zig
+++ b/std/special/compiler_rt/fixunstfsi.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunstfsi(a: f128) -> u32 {
+pub extern fn __fixunstfsi(a: f128) -> u32 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunstfsi, linkage);
     return fixuint(f128, u32, a);
 }
 
diff --git a/std/special/compiler_rt/fixunstfti.zig b/std/special/compiler_rt/fixunstfti.zig
index 1bf7fbab4b..d8b654d3a3 100644
--- a/std/special/compiler_rt/fixunstfti.zig
+++ b/std/special/compiler_rt/fixunstfti.zig
@@ -1,10 +1,8 @@
 const fixuint = @import("fixuint.zig").fixuint;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __fixunstfti(a: f128) -> u128 {
+pub extern fn __fixunstfti(a: f128) -> u128 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__fixunstfti, linkage);
     return fixuint(f128, u128, a);
 }
 
diff --git a/std/special/compiler_rt/index.zig b/std/special/compiler_rt/index.zig
index b9f93a601a..5e8c91c8d5 100644
--- a/std/special/compiler_rt/index.zig
+++ b/std/special/compiler_rt/index.zig
@@ -1,33 +1,74 @@
-comptime {
-    _ = @import("comparetf2.zig");
-    _ = @import("fixunsdfdi.zig");
-    _ = @import("fixunsdfsi.zig");
-    _ = @import("fixunsdfti.zig");
-    _ = @import("fixunssfdi.zig");
-    _ = @import("fixunssfsi.zig");
-    _ = @import("fixunssfti.zig");
-    _ = @import("fixunstfdi.zig");
-    _ = @import("fixunstfsi.zig");
-    _ = @import("fixunstfti.zig");
-    _ = @import("udivmoddi4.zig");
-    _ = @import("udivmodti4.zig");
-    _ = @import("udivti3.zig");
-    _ = @import("umodti3.zig");
-    _ = @import("aulldiv.zig");
-    _ = @import("aullrem.zig");
-}
-
 const builtin = @import("builtin");
 const is_test = builtin.is_test;
-const assert = @import("../../debug.zig").assert;
 
+comptime {
+    const linkage = if (is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Weak;
+    const strong_linkage = if (is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Strong;
+
+    @export("__letf2", @import("comparetf2.zig").__letf2, linkage);
+    @export("__getf2", @import("comparetf2.zig").__getf2, linkage);
+
+    if (!is_test) {
+        // only create these aliases when not testing
+        @export("__cmptf2", @import("comparetf2.zig").__letf2, linkage);
+        @export("__eqtf2", @import("comparetf2.zig").__letf2, linkage);
+        @export("__lttf2", @import("comparetf2.zig").__letf2, linkage);
+        @export("__netf2", @import("comparetf2.zig").__letf2, linkage);
+        @export("__gttf2", @import("comparetf2.zig").__getf2, linkage);
+    }
+
+    @export("__unordtf2", @import("comparetf2.zig").__unordtf2, linkage);
+
+    @export("__fixunssfsi", @import("fixunssfsi.zig").__fixunssfsi, linkage);
+    @export("__fixunssfdi", @import("fixunssfdi.zig").__fixunssfdi, linkage);
+    @export("__fixunssfti", @import("fixunssfti.zig").__fixunssfti, linkage);
+
+    @export("__fixunsdfsi", @import("fixunsdfsi.zig").__fixunsdfsi, linkage);
+    @export("__fixunsdfdi", @import("fixunsdfdi.zig").__fixunsdfdi, linkage);
+    @export("__fixunsdfti", @import("fixunsdfti.zig").__fixunsdfti, linkage);
+
+    @export("__fixunstfsi", @import("fixunstfsi.zig").__fixunstfsi, linkage);
+    @export("__fixunstfdi", @import("fixunstfdi.zig").__fixunstfdi, linkage);
+    @export("__fixunstfti", @import("fixunstfti.zig").__fixunstfti, linkage);
+
+    @export("__udivmoddi4", @import("udivmoddi4.zig").__udivmoddi4, linkage);
+    @export("__udivmodti4", @import("udivmodti4.zig").__udivmodti4, linkage);
 
-const win32 = builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.i386;
-const win64 = builtin.os == builtin.Os.windows and builtin.arch == builtin.Arch.x86_64;
-const win32_nocrt = win32 and !builtin.link_libc;
-const win64_nocrt = win64 and !builtin.link_libc;
-pub const linkage = if (builtin.is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Weak;
-const strong_linkage = if (builtin.is_test) builtin.GlobalLinkage.Internal else builtin.GlobalLinkage.Strong;
+    @export("__udivti3", @import("udivti3.zig").__udivti3, linkage);
+    @export("__umodti3", @import("umodti3.zig").__umodti3, linkage);
+
+    @export("__udivsi3", __udivsi3, linkage);
+    @export("__udivdi3", __udivdi3, linkage);
+    @export("__umoddi3", __umoddi3, linkage);
+    @export("__udivmodsi4", __udivmodsi4, linkage);
+
+    if (isArmArch()) {
+        @export("__aeabi_uldivmod", __aeabi_uldivmod, linkage);
+        @export("__aeabi_uidivmod", __aeabi_uidivmod, linkage);
+        @export("__aeabi_uidiv", __udivsi3, linkage);
+    }
+    if (builtin.os == builtin.Os.windows) {
+        switch (builtin.arch) {
+            builtin.Arch.i386 => {
+                if (!builtin.link_libc) {
+                    @export("_chkstk", _chkstk, strong_linkage);
+                    @export("__chkstk_ms", __chkstk_ms, linkage);
+                }
+                @export("_aulldiv", @import("aulldiv.zig")._aulldiv, strong_linkage);
+                @export("_aullrem", @import("aullrem.zig")._aullrem, strong_linkage);
+            },
+            builtin.Arch.x86_64 => {
+                if (!builtin.link_libc) {
+                    @export("__chkstk", __chkstk, strong_linkage);
+                    @export("___chkstk_ms", ___chkstk_ms, linkage);
+                }
+            },
+            else => {},
+        }
+    }
+}
+
+const assert = @import("../../debug.zig").assert;
 
 const __udivmoddi4 = @import("udivmoddi4.zig").__udivmoddi4;
 
@@ -41,15 +82,13 @@ pub coldcc fn panic(msg: []const u8) -> noreturn {
     }
 }
 
-export fn __udivdi3(a: u64, b: u64) -> u64 {
+extern fn __udivdi3(a: u64, b: u64) -> u64 {
     @setDebugSafety(this, is_test);
-    @setGlobalLinkage(__udivdi3, linkage);
     return __udivmoddi4(a, b, null);
 }
 
-export fn __umoddi3(a: u64, b: u64) -> u64 {
+extern fn __umoddi3(a: u64, b: u64) -> u64 {
     @setDebugSafety(this, is_test);
-    @setGlobalLinkage(__umoddi3, linkage);
 
     var r: u64 = undefined;
     _ = __udivmoddi4(a, b, &r);
@@ -60,17 +99,11 @@ const AeabiUlDivModResult = extern struct {
     quot: u64,
     rem: u64,
 };
-export fn __aeabi_uldivmod(numerator: u64, denominator: u64) -> AeabiUlDivModResult {
+extern fn __aeabi_uldivmod(numerator: u64, denominator: u64) -> AeabiUlDivModResult {
     @setDebugSafety(this, is_test);
-    if (comptime isArmArch()) {
-        @setGlobalLinkage(__aeabi_uldivmod, linkage);
-        var result: AeabiUlDivModResult = undefined;
-        result.quot = __udivmoddi4(numerator, denominator, &result.rem);
-        return result;
-    }
-
-    @setGlobalLinkage(__aeabi_uldivmod, builtin.GlobalLinkage.Internal);
-    unreachable;
+    var result: AeabiUlDivModResult = undefined;
+    result.quot = __udivmoddi4(numerator, denominator, &result.rem);
+    return result;
 }
 
 fn isArmArch() -> bool {
@@ -115,156 +148,124 @@ fn isArmArch() -> bool {
     };
 }
 
-export nakedcc fn __aeabi_uidivmod() {
+nakedcc fn __aeabi_uidivmod() {
     @setDebugSafety(this, false);
-
-    if (comptime isArmArch()) {
-        @setGlobalLinkage(__aeabi_uidivmod, linkage);
-        asm volatile (
-            \\ push    { lr }
-            \\ sub     sp, sp, #4
-            \\ mov     r2, sp
-            \\ bl      __udivmodsi4
-            \\ ldr     r1, [sp]
-            \\ add     sp, sp, #4
-            \\ pop     { pc }
-        ::: "r2", "r1");
-        unreachable;
-    }
-
-    @setGlobalLinkage(__aeabi_uidivmod, builtin.GlobalLinkage.Internal);
+    asm volatile (
+        \\ push    { lr }
+        \\ sub     sp, sp, #4
+        \\ mov     r2, sp
+        \\ bl      __udivmodsi4
+        \\ ldr     r1, [sp]
+        \\ add     sp, sp, #4
+        \\ pop     { pc }
+    ::: "r2", "r1");
 }
 
 // _chkstk (_alloca) routine - probe stack between %esp and (%esp-%eax) in 4k increments,
 // then decrement %esp by %eax.  Preserves all registers except %esp and flags.
 // This routine is windows specific
 // http://msdn.microsoft.com/en-us/library/ms648426.aspx
-export nakedcc fn _chkstk() align(4) {
+nakedcc fn _chkstk() align(4) {
     @setDebugSafety(this, false);
 
-    if (win32_nocrt) {
-        @setGlobalLinkage(_chkstk, strong_linkage);
-        asm volatile (
-            \\         push   %%ecx
-            \\         push   %%eax
-            \\         cmp    $0x1000,%%eax
-            \\         lea    12(%%esp),%%ecx
-            \\         jb     1f
-            \\ 2:
-            \\         sub    $0x1000,%%ecx
-            \\         test   %%ecx,(%%ecx)
-            \\         sub    $0x1000,%%eax
-            \\         cmp    $0x1000,%%eax
-            \\         ja     2b
-            \\ 1:
-            \\         sub    %%eax,%%ecx
-            \\         test   %%ecx,(%%ecx)
-            \\         pop    %%eax
-            \\         pop    %%ecx
-            \\         ret
-        );
-        unreachable;
-    }
-
-    @setGlobalLinkage(_chkstk, builtin.GlobalLinkage.Internal);
+    asm volatile (
+        \\         push   %%ecx
+        \\         push   %%eax
+        \\         cmp    $0x1000,%%eax
+        \\         lea    12(%%esp),%%ecx
+        \\         jb     1f
+        \\ 2:
+        \\         sub    $0x1000,%%ecx
+        \\         test   %%ecx,(%%ecx)
+        \\         sub    $0x1000,%%eax
+        \\         cmp    $0x1000,%%eax
+        \\         ja     2b
+        \\ 1:
+        \\         sub    %%eax,%%ecx
+        \\         test   %%ecx,(%%ecx)
+        \\         pop    %%eax
+        \\         pop    %%ecx
+        \\         ret
+    );
 }
 
-export nakedcc fn __chkstk() align(4) {
+nakedcc fn __chkstk() align(4) {
     @setDebugSafety(this, false);
 
-    if (win64_nocrt) {
-        @setGlobalLinkage(__chkstk, strong_linkage);
-        asm volatile (
-            \\        push   %%rcx
-            \\        push   %%rax
-            \\        cmp    $0x1000,%%rax
-            \\        lea    24(%%rsp),%%rcx
-            \\        jb     1f
-            \\2:
-            \\        sub    $0x1000,%%rcx
-            \\        test   %%rcx,(%%rcx)
-            \\        sub    $0x1000,%%rax
-            \\        cmp    $0x1000,%%rax
-            \\        ja     2b
-            \\1:
-            \\        sub    %%rax,%%rcx
-            \\        test   %%rcx,(%%rcx)
-            \\        pop    %%rax
-            \\        pop    %%rcx
-            \\        ret
-        );
-        unreachable;
-    }
-
-    @setGlobalLinkage(__chkstk, builtin.GlobalLinkage.Internal);
+    asm volatile (
+        \\        push   %%rcx
+        \\        push   %%rax
+        \\        cmp    $0x1000,%%rax
+        \\        lea    24(%%rsp),%%rcx
+        \\        jb     1f
+        \\2:
+        \\        sub    $0x1000,%%rcx
+        \\        test   %%rcx,(%%rcx)
+        \\        sub    $0x1000,%%rax
+        \\        cmp    $0x1000,%%rax
+        \\        ja     2b
+        \\1:
+        \\        sub    %%rax,%%rcx
+        \\        test   %%rcx,(%%rcx)
+        \\        pop    %%rax
+        \\        pop    %%rcx
+        \\        ret
+    );
 }
 
 // _chkstk routine
 // This routine is windows specific
 // http://msdn.microsoft.com/en-us/library/ms648426.aspx
-export nakedcc fn __chkstk_ms() align(4) {
+nakedcc fn __chkstk_ms() align(4) {
     @setDebugSafety(this, false);
 
-    if (win32_nocrt) {
-        @setGlobalLinkage(__chkstk_ms, linkage);
-        asm volatile (
-            \\         push   %%ecx
-            \\         push   %%eax
-            \\         cmp    $0x1000,%%eax
-            \\         lea    12(%%esp),%%ecx
-            \\         jb     1f
-            \\ 2:
-            \\         sub    $0x1000,%%ecx
-            \\         test   %%ecx,(%%ecx)
-            \\         sub    $0x1000,%%eax
-            \\         cmp    $0x1000,%%eax
-            \\         ja     2b
-            \\ 1:
-            \\         sub    %%eax,%%ecx
-            \\         test   %%ecx,(%%ecx)
-            \\         pop    %%eax
-            \\         pop    %%ecx
-            \\         ret
-        );
-        unreachable;
-    }
-
-    @setGlobalLinkage(__chkstk_ms, builtin.GlobalLinkage.Internal);
+    asm volatile (
+        \\         push   %%ecx
+        \\         push   %%eax
+        \\         cmp    $0x1000,%%eax
+        \\         lea    12(%%esp),%%ecx
+        \\         jb     1f
+        \\ 2:
+        \\         sub    $0x1000,%%ecx
+        \\         test   %%ecx,(%%ecx)
+        \\         sub    $0x1000,%%eax
+        \\         cmp    $0x1000,%%eax
+        \\         ja     2b
+        \\ 1:
+        \\         sub    %%eax,%%ecx
+        \\         test   %%ecx,(%%ecx)
+        \\         pop    %%eax
+        \\         pop    %%ecx
+        \\         ret
+    );
 }
 
-export nakedcc fn ___chkstk_ms() align(4) {
+nakedcc fn ___chkstk_ms() align(4) {
     @setDebugSafety(this, false);
 
-    if (win64_nocrt) {
-        @setGlobalLinkage(___chkstk_ms, linkage);
-        asm volatile (
-            \\        push   %%rcx
-            \\        push   %%rax
-            \\        cmp    $0x1000,%%rax
-            \\        lea    24(%%rsp),%%rcx
-            \\        jb     1f
-            \\2:
-            \\        sub    $0x1000,%%rcx
-            \\        test   %%rcx,(%%rcx)
-            \\        sub    $0x1000,%%rax
-            \\        cmp    $0x1000,%%rax
-            \\        ja     2b
-            \\1:
-            \\        sub    %%rax,%%rcx
-            \\        test   %%rcx,(%%rcx)
-            \\        pop    %%rax
-            \\        pop    %%rcx
-            \\        ret
-        );
-        unreachable;
-    }
-
-    @setGlobalLinkage(___chkstk_ms, builtin.GlobalLinkage.Internal);
+    asm volatile (
+        \\        push   %%rcx
+        \\        push   %%rax
+        \\        cmp    $0x1000,%%rax
+        \\        lea    24(%%rsp),%%rcx
+        \\        jb     1f
+        \\2:
+        \\        sub    $0x1000,%%rcx
+        \\        test   %%rcx,(%%rcx)
+        \\        sub    $0x1000,%%rax
+        \\        cmp    $0x1000,%%rax
+        \\        ja     2b
+        \\1:
+        \\        sub    %%rax,%%rcx
+        \\        test   %%rcx,(%%rcx)
+        \\        pop    %%rax
+        \\        pop    %%rcx
+        \\        ret
+    );
 }
 
-export fn __udivmodsi4(a: u32, b: u32, rem: &u32) -> u32 {
+extern fn __udivmodsi4(a: u32, b: u32, rem: &u32) -> u32 {
     @setDebugSafety(this, is_test);
-    @setGlobalLinkage(__udivmodsi4, linkage);
 
     const d = __udivsi3(a, b);
     *rem = u32(i32(a) -% (i32(d) * i32(b)));
@@ -272,19 +273,8 @@ export fn __udivmodsi4(a: u32, b: u32, rem: &u32) -> u32 {
 }
 
 
-// TODO make this an alias instead of an extra function call
-// https://github.com/andrewrk/zig/issues/256
-
-export fn __aeabi_uidiv(n: u32, d: u32) -> u32 {
+extern fn __udivsi3(n: u32, d: u32) -> u32 {
     @setDebugSafety(this, is_test);
-    @setGlobalLinkage(__aeabi_uidiv, linkage);
-
-    return __udivsi3(n, d);
-}
-
-export fn __udivsi3(n: u32, d: u32) -> u32 {
-    @setDebugSafety(this, is_test);
-    @setGlobalLinkage(__udivsi3, linkage);
 
     const n_uword_bits: c_uint = u32.bit_count;
     // special cases
@@ -480,4 +470,3 @@ fn test_one_udivsi3(a: u32, b: u32, expected_q: u32) {
     const q: u32 = __udivsi3(a, b);
     assert(q == expected_q);
 }
-
diff --git a/std/special/compiler_rt/udivmoddi4.zig b/std/special/compiler_rt/udivmoddi4.zig
index 8005538d9a..4e2117cfa5 100644
--- a/std/special/compiler_rt/udivmoddi4.zig
+++ b/std/special/compiler_rt/udivmoddi4.zig
@@ -1,10 +1,8 @@
 const udivmod = @import("udivmod.zig").udivmod;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __udivmoddi4(a: u64, b: u64, maybe_rem: ?&u64) -> u64 {
+pub extern fn __udivmoddi4(a: u64, b: u64, maybe_rem: ?&u64) -> u64 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__udivmoddi4, linkage);
     return udivmod(u64, a, b, maybe_rem);
 }
 
diff --git a/std/special/compiler_rt/udivmodti4.zig b/std/special/compiler_rt/udivmodti4.zig
index 2ee2fdb57f..c56a958f27 100644
--- a/std/special/compiler_rt/udivmodti4.zig
+++ b/std/special/compiler_rt/udivmodti4.zig
@@ -1,10 +1,8 @@
 const udivmod = @import("udivmod.zig").udivmod;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __udivmodti4(a: u128, b: u128, maybe_rem: ?&u128) -> u128 {
+pub extern fn __udivmodti4(a: u128, b: u128, maybe_rem: ?&u128) -> u128 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__udivmodti4, linkage);
     return udivmod(u128, a, b, maybe_rem);
 }
 
diff --git a/std/special/compiler_rt/udivti3.zig b/std/special/compiler_rt/udivti3.zig
index 3764449758..115c748cfb 100644
--- a/std/special/compiler_rt/udivti3.zig
+++ b/std/special/compiler_rt/udivti3.zig
@@ -1,9 +1,7 @@
 const __udivmodti4 = @import("udivmodti4.zig").__udivmodti4;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __udivti3(a: u128, b: u128) -> u128 {
+pub extern fn __udivti3(a: u128, b: u128) -> u128 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__udivti3, linkage);
     return __udivmodti4(a, b, null);
 }
diff --git a/std/special/compiler_rt/umodti3.zig b/std/special/compiler_rt/umodti3.zig
index 0ad9e127b3..9f680369eb 100644
--- a/std/special/compiler_rt/umodti3.zig
+++ b/std/special/compiler_rt/umodti3.zig
@@ -1,10 +1,8 @@
 const __udivmodti4 = @import("udivmodti4.zig").__udivmodti4;
 const builtin = @import("builtin");
-const linkage = @import("index.zig").linkage;
 
-export fn __umodti3(a: u128, b: u128) -> u128 {
+pub extern fn __umodti3(a: u128, b: u128) -> u128 {
     @setDebugSafety(this, builtin.is_test);
-    @setGlobalLinkage(__umodti3, linkage);
     var r: u128 = undefined;
     _ = __udivmodti4(a, b, &r);
     return r;
author	Andrew Kelley <superjoe30@gmail.com>	2017-12-23 12:00:25 -0500
committer	Andrew Kelley <superjoe30@gmail.com>	2017-12-23 12:00:25 -0500
commit	fe660462837231353b846bf398637ca84f67bfc9 (patch)
tree	37336cfcf44810d73d111a57207b843a01fd205f /std
parent	fe39ca01bcbee0077b21d5ddc2776df974e8c6d3 (diff)
parent	39c7bd24e4f768b23074b8634ac637b175b7639f (diff)
download	zig-fe660462837231353b846bf398637ca84f67bfc9.tar.gz zig-fe660462837231353b846bf398637ca84f67bfc9.zip