Merge pull request #1198 from ziglang/m-n-threading

M:N threading
author: Andrew Kelley <superjoe30@gmail.com> 2018-07-09 22:06:47 -0400
committer: GitHub <noreply@github.com> 2018-07-09 22:06:47 -0400
commit: ccef60a64033a25dbe2351c27f28257546b2ae5b (patch)
tree: 67390c7e43f9852cf3786f2eed35ebf04e15510d
parent: 10cc49db1ca1f9b3ac63277c0742e05f6412f3c6 (diff)
parent: c89aac85c440ea4cbccf1abdbd6acf84a33077e3 (diff)
download: zig-ccef60a64033a25dbe2351c27f28257546b2ae5b.tar.gz
zig-ccef60a64033a25dbe2351c27f28257546b2ae5b.zip
16 files changed, 1774 insertions, 142 deletions
diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig
index d17fc94c82..fe94a4460a 100644
--- a/src-self-hosted/main.zig
+++ b/src-self-hosted/main.zig
@@ -384,7 +384,8 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Mo
     const zig_lib_dir = introspect.resolveZigLibDir(allocator) catch os.exit(1);
     defer allocator.free(zig_lib_dir);
 
-    var loop = try event.Loop.init(allocator);
+    var loop: event.Loop = undefined;
+    try loop.initMultiThreaded(allocator);
 
     var module = try Module.create(
         &loop,
@@ -493,8 +494,6 @@ async fn processBuildEvents(module: *Module, watch: bool) void {
         switch (build_event) {
             Module.Event.Ok => {
                 std.debug.warn("Build succeeded\n");
-                // for now we stop after 1
-                module.loop.stop();
                 return;
             },
             Module.Event.Error => |err| {
diff --git a/src-self-hosted/module.zig b/src-self-hosted/module.zig
index cf27c826c8..5ce1a7965a 100644
--- a/src-self-hosted/module.zig
+++ b/src-self-hosted/module.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 const os = std.os;
 const io = std.io;
 const mem = std.mem;
+const Allocator = mem.Allocator;
 const Buffer = std.Buffer;
 const llvm = @import("llvm.zig");
 const c = @import("c.zig");
@@ -13,6 +14,7 @@ const ArrayList = std.ArrayList;
 const errmsg = @import("errmsg.zig");
 const ast = std.zig.ast;
 const event = std.event;
+const assert = std.debug.assert;
 
 pub const Module = struct {
     loop: *event.Loop,
@@ -81,6 +83,8 @@ pub const Module = struct {
     link_out_file: ?[]const u8,
     events: *event.Channel(Event),
 
+    exported_symbol_names: event.Locked(Decl.Table),
+
     // TODO handle some of these earlier and report them in a way other than error codes
     pub const BuildError = error{
         OutOfMemory,
@@ -232,6 +236,7 @@ pub const Module = struct {
             .test_name_prefix = null,
             .emit_file_type = Emit.Binary,
             .link_out_file = null,
+            .exported_symbol_names = event.Locked(Decl.Table).init(loop, Decl.Table.init(loop.allocator)),
         });
     }
 
@@ -272,38 +277,91 @@ pub const Module = struct {
                 return;
             };
             await (async self.events.put(Event.Ok) catch unreachable);
+            // for now we stop after 1
+            return;
         }
     }
 
     async fn addRootSrc(self: *Module) !void {
         const root_src_path = self.root_src_path orelse @panic("TODO handle null root src path");
+        // TODO async/await os.path.real
         const root_src_real_path = os.path.real(self.a(), root_src_path) catch |err| {
             try printError("unable to get real path '{}': {}", root_src_path, err);
             return err;
         };
         errdefer self.a().free(root_src_real_path);
 
+        // TODO async/await readFileAlloc()
         const source_code = io.readFileAlloc(self.a(), root_src_real_path) catch |err| {
             try printError("unable to open '{}': {}", root_src_real_path, err);
             return err;
         };
         errdefer self.a().free(source_code);
 
-        var tree = try std.zig.parse(self.a(), source_code);
-        defer tree.deinit();
-
-        //var it = tree.root_node.decls.iterator();
-        //while (it.next()) |decl_ptr| {
-        //    const decl = decl_ptr.*;
-        //    switch (decl.id) {
-        //        ast.Node.Comptime => @panic("TODO"),
-        //        ast.Node.VarDecl => @panic("TODO"),
-        //        ast.Node.UseDecl => @panic("TODO"),
-        //        ast.Node.FnDef => @panic("TODO"),
-        //        ast.Node.TestDecl => @panic("TODO"),
-        //        else => unreachable,
-        //    }
-        //}
+        var parsed_file = ParsedFile{
+            .tree = try std.zig.parse(self.a(), source_code),
+            .realpath = root_src_real_path,
+        };
+        errdefer parsed_file.tree.deinit();
+
+        const tree = &parsed_file.tree;
+
+        // create empty struct for it
+        const decls = try Scope.Decls.create(self.a(), null);
+        errdefer decls.destroy();
+
+        var it = tree.root_node.decls.iterator(0);
+        while (it.next()) |decl_ptr| {
+            const decl = decl_ptr.*;
+            switch (decl.id) {
+                ast.Node.Id.Comptime => @panic("TODO"),
+                ast.Node.Id.VarDecl => @panic("TODO"),
+                ast.Node.Id.FnProto => {
+                    const fn_proto = @fieldParentPtr(ast.Node.FnProto, "base", decl);
+
+                    const name = if (fn_proto.name_token) |name_token| tree.tokenSlice(name_token) else {
+                        @panic("TODO add compile error");
+                        //try self.addCompileError(
+                        //    &parsed_file,
+                        //    fn_proto.fn_token,
+                        //    fn_proto.fn_token + 1,
+                        //    "missing function name",
+                        //);
+                        continue;
+                    };
+
+                    const fn_decl = try self.a().create(Decl.Fn{
+                        .base = Decl{
+                            .id = Decl.Id.Fn,
+                            .name = name,
+                            .visib = parseVisibToken(tree, fn_proto.visib_token),
+                            .resolution = Decl.Resolution.Unresolved,
+                        },
+                        .value = Decl.Fn.Val{ .Unresolved = {} },
+                        .fn_proto = fn_proto,
+                    });
+                    errdefer self.a().destroy(fn_decl);
+
+                    // TODO make this parallel
+                    try await try async self.addTopLevelDecl(tree, &fn_decl.base);
+                },
+                ast.Node.Id.TestDecl => @panic("TODO"),
+                else => unreachable,
+            }
+        }
+    }
+
+    async fn addTopLevelDecl(self: *Module, tree: *ast.Tree, decl: *Decl) !void {
+        const is_export = decl.isExported(tree);
+
+        {
+            const exported_symbol_names = await try async self.exported_symbol_names.acquire();
+            defer exported_symbol_names.release();
+
+            if (try exported_symbol_names.value.put(decl.name, decl)) |other_decl| {
+                @panic("TODO report compile error");
+            }
+        }
     }
 
     pub fn link(self: *Module, out_file: ?[]const u8) !void {
@@ -350,3 +408,172 @@ fn printError(comptime format: []const u8, args: ...) !void {
     const out_stream = &stderr_file_out_stream.stream;
     try out_stream.print(format, args);
 }
+
+fn parseVisibToken(tree: *ast.Tree, optional_token_index: ?ast.TokenIndex) Visib {
+    if (optional_token_index) |token_index| {
+        const token = tree.tokens.at(token_index);
+        assert(token.id == Token.Id.Keyword_pub);
+        return Visib.Pub;
+    } else {
+        return Visib.Private;
+    }
+}
+
+pub const Scope = struct {
+    id: Id,
+    parent: ?*Scope,
+
+    pub const Id = enum {
+        Decls,
+        Block,
+    };
+
+    pub const Decls = struct {
+        base: Scope,
+        table: Decl.Table,
+
+        pub fn create(a: *Allocator, parent: ?*Scope) !*Decls {
+            const self = try a.create(Decls{
+                .base = Scope{
+                    .id = Id.Decls,
+                    .parent = parent,
+                },
+                .table = undefined,
+            });
+            errdefer a.destroy(self);
+
+            self.table = Decl.Table.init(a);
+            errdefer self.table.deinit();
+
+            return self;
+        }
+
+        pub fn destroy(self: *Decls) void {
+            self.table.deinit();
+            self.table.allocator.destroy(self);
+            self.* = undefined;
+        }
+    };
+
+    pub const Block = struct {
+        base: Scope,
+    };
+};
+
+pub const Visib = enum {
+    Private,
+    Pub,
+};
+
+pub const Decl = struct {
+    id: Id,
+    name: []const u8,
+    visib: Visib,
+    resolution: Resolution,
+
+    pub const Table = std.HashMap([]const u8, *Decl, mem.hash_slice_u8, mem.eql_slice_u8);
+
+    pub fn isExported(base: *const Decl, tree: *ast.Tree) bool {
+        switch (base.id) {
+            Id.Fn => {
+                const fn_decl = @fieldParentPtr(Fn, "base", base);
+                return fn_decl.isExported(tree);
+            },
+            else => return false,
+        }
+    }
+
+    pub const Resolution = enum {
+        Unresolved,
+        InProgress,
+        Invalid,
+        Ok,
+    };
+
+    pub const Id = enum {
+        Var,
+        Fn,
+        CompTime,
+    };
+
+    pub const Var = struct {
+        base: Decl,
+    };
+
+    pub const Fn = struct {
+        base: Decl,
+        value: Val,
+        fn_proto: *const ast.Node.FnProto,
+
+        // TODO https://github.com/ziglang/zig/issues/683 and then make this anonymous
+        pub const Val = union {
+            Unresolved: void,
+            Ok: *Value.Fn,
+        };
+
+        pub fn externLibName(self: Fn, tree: *ast.Tree) ?[]const u8 {
+            return if (self.fn_proto.extern_export_inline_token) |tok_index| x: {
+                const token = tree.tokens.at(tok_index);
+                break :x switch (token.id) {
+                    Token.Id.Extern => tree.tokenSlicePtr(token),
+                    else => null,
+                };
+            } else null;
+        }
+
+        pub fn isExported(self: Fn, tree: *ast.Tree) bool {
+            if (self.fn_proto.extern_export_inline_token) |tok_index| {
+                const token = tree.tokens.at(tok_index);
+                return token.id == Token.Id.Keyword_export;
+            } else {
+                return false;
+            }
+        }
+    };
+
+    pub const CompTime = struct {
+        base: Decl,
+    };
+};
+
+pub const Value = struct {
+    pub const Fn = struct {};
+};
+
+pub const Type = struct {
+    id: Id,
+
+    pub const Id = enum {
+        Type,
+        Void,
+        Bool,
+        NoReturn,
+        Int,
+        Float,
+        Pointer,
+        Array,
+        Struct,
+        ComptimeFloat,
+        ComptimeInt,
+        Undefined,
+        Null,
+        Optional,
+        ErrorUnion,
+        ErrorSet,
+        Enum,
+        Union,
+        Fn,
+        Opaque,
+        Promise,
+    };
+
+    pub const Struct = struct {
+        base: Type,
+        decls: *Scope.Decls,
+    };
+};
+
+pub const ParsedFile = struct {
+    tree: ast.Tree,
+    realpath: []const u8,
+};
diff --git a/src/ir.cpp b/src/ir.cpp
index 505a32247e..2dc6ddad2c 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13278,7 +13278,7 @@ static TypeTableEntry *ir_analyze_instruction_call(IrAnalyze *ira, IrInstruction
             FnTableEntry *fn_table_entry = fn_ref->value.data.x_bound_fn.fn;
             IrInstruction *first_arg_ptr = fn_ref->value.data.x_bound_fn.first_arg;
             return ir_analyze_fn_call(ira, call_instruction, fn_table_entry, fn_table_entry->type_entry,
-                nullptr, first_arg_ptr, is_comptime, call_instruction->fn_inline);
+                fn_ref, first_arg_ptr, is_comptime, call_instruction->fn_inline);
         } else {
             ir_add_error_node(ira, fn_ref->source_node,
                 buf_sprintf("type '%s' not a function", buf_ptr(&fn_ref->value.type->name)));
diff --git a/std/atomic/queue_mpsc.zig b/std/atomic/queue_mpsc.zig
index 8030565d7a..978e189453 100644
--- a/std/atomic/queue_mpsc.zig
+++ b/std/atomic/queue_mpsc.zig
@@ -15,6 +15,8 @@ pub fn QueueMpsc(comptime T: type) type {
 
         pub const Node = std.atomic.Stack(T).Node;
 
+        /// Not thread-safe. The call to init() must complete before any other functions are called.
+        /// No deinitialization required.
         pub fn init() Self {
             return Self{
                 .inboxes = []std.atomic.Stack(T){
@@ -26,12 +28,15 @@ pub fn QueueMpsc(comptime T: type) type {
             };
         }
 
+        /// Fully thread-safe. put() may be called from any thread at any time.
         pub fn put(self: *Self, node: *Node) void {
             const inbox_index = @atomicLoad(usize, &self.inbox_index, AtomicOrder.SeqCst);
             const inbox = &self.inboxes[inbox_index];
             inbox.push(node);
         }
 
+        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
+        /// the next call to get().
         pub fn get(self: *Self) ?*Node {
             if (self.outbox.pop()) |node| {
                 return node;
@@ -43,6 +48,43 @@ pub fn QueueMpsc(comptime T: type) type {
             }
             return self.outbox.pop();
         }
+
+        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
+        /// the next call to isEmpty().
+        pub fn isEmpty(self: *Self) bool {
+            if (!self.outbox.isEmpty()) return false;
+            const prev_inbox_index = @atomicRmw(usize, &self.inbox_index, AtomicRmwOp.Xor, 0x1, AtomicOrder.SeqCst);
+            const prev_inbox = &self.inboxes[prev_inbox_index];
+            while (prev_inbox.pop()) |node| {
+                self.outbox.push(node);
+            }
+            return self.outbox.isEmpty();
+        }
+
+        /// For debugging only. No API guarantees about what this does.
+        pub fn dump(self: *Self) void {
+            {
+                var it = self.outbox.root;
+                while (it) |node| {
+                    std.debug.warn("0x{x} -> ", @ptrToInt(node));
+                    it = node.next;
+                }
+            }
+            const inbox_index = self.inbox_index;
+            const inboxes = []*std.atomic.Stack(T){
+                &self.inboxes[self.inbox_index],
+                &self.inboxes[1 - self.inbox_index],
+            };
+            for (inboxes) |inbox| {
+                var it = inbox.root;
+                while (it) |node| {
+                    std.debug.warn("0x{x} -> ", @ptrToInt(node));
+                    it = node.next;
+                }
+            }
+
+            std.debug.warn("null\n");
+        }
     };
 }
 
diff --git a/std/c/darwin.zig b/std/c/darwin.zig
index e3b53d9bea..133ef62f05 100644
--- a/std/c/darwin.zig
+++ b/std/c/darwin.zig
@@ -6,6 +6,30 @@ pub extern "c" fn __getdirentries64(fd: c_int, buf_ptr: [*]u8, buf_len: usize, b
 pub extern "c" fn mach_absolute_time() u64;
 pub extern "c" fn mach_timebase_info(tinfo: ?*mach_timebase_info_data) void;
 
+pub extern "c" fn kqueue() c_int;
+pub extern "c" fn kevent(
+    kq: c_int,
+    changelist: [*]const Kevent,
+    nchanges: c_int,
+    eventlist: [*]Kevent,
+    nevents: c_int,
+    timeout: ?*const timespec,
+) c_int;
+
+pub extern "c" fn kevent64(
+    kq: c_int,
+    changelist: [*]const kevent64_s,
+    nchanges: c_int,
+    eventlist: [*]kevent64_s,
+    nevents: c_int,
+    flags: c_uint,
+    timeout: ?*const timespec,
+) c_int;
+
+pub extern "c" fn sysctl(name: [*]c_int, namelen: c_uint, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
+pub extern "c" fn sysctlbyname(name: [*]const u8, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) c_int;
+pub extern "c" fn sysctlnametomib(name: [*]const u8, mibp: ?*c_int, sizep: ?*usize) c_int;
+
 pub use @import("../os/darwin_errno.zig");
 
 pub const _errno = __error;
@@ -86,3 +110,51 @@ pub const pthread_attr_t = extern struct {
     __sig: c_long,
     __opaque: [56]u8,
 };
+
+/// Renamed from `kevent` to `Kevent` to avoid conflict with function name.
+pub const Kevent = extern struct {
+    ident: usize,
+    filter: i16,
+    flags: u16,
+    fflags: u32,
+    data: isize,
+    udata: usize,
+};
+
+// sys/types.h on macos uses #pragma pack(4) so these checks are
+// to make sure the struct is laid out the same. These values were
+// produced from C code using the offsetof macro.
+const std = @import("../index.zig");
+const assert = std.debug.assert;
+
+comptime {
+    assert(@offsetOf(Kevent, "ident") == 0);
+    assert(@offsetOf(Kevent, "filter") == 8);
+    assert(@offsetOf(Kevent, "flags") == 10);
+    assert(@offsetOf(Kevent, "fflags") == 12);
+    assert(@offsetOf(Kevent, "data") == 16);
+    assert(@offsetOf(Kevent, "udata") == 24);
+}
+
+pub const kevent64_s = extern struct {
+    ident: u64,
+    filter: i16,
+    flags: u16,
+    fflags: u32,
+    data: i64,
+    udata: u64,
+    ext: [2]u64,
+};
+
+// sys/types.h on macos uses #pragma pack() so these checks are
+// to make sure the struct is laid out the same. These values were
+// produced from C code using the offsetof macro.
+comptime {
+    assert(@offsetOf(kevent64_s, "ident") == 0);
+    assert(@offsetOf(kevent64_s, "filter") == 8);
+    assert(@offsetOf(kevent64_s, "flags") == 10);
+    assert(@offsetOf(kevent64_s, "fflags") == 12);
+    assert(@offsetOf(kevent64_s, "data") == 16);
+    assert(@offsetOf(kevent64_s, "udata") == 24);
+    assert(@offsetOf(kevent64_s, "ext") == 32);
+}
diff --git a/std/debug/index.zig b/std/debug/index.zig
index 54a9af4b9e..3070e0b40b 100644
--- a/std/debug/index.zig
+++ b/std/debug/index.zig
@@ -12,6 +12,11 @@ const builtin = @import("builtin");
 pub const FailingAllocator = @import("failing_allocator.zig").FailingAllocator;
 pub const failing_allocator = FailingAllocator.init(global_allocator, 0);
 
+pub const runtime_safety = switch (builtin.mode) {
+    builtin.Mode.Debug, builtin.Mode.ReleaseSafe => true,
+    builtin.Mode.ReleaseFast, builtin.Mode.ReleaseSmall => false,
+};
+
 /// Tries to write to stderr, unbuffered, and ignores any error returned.
 /// Does not append a newline.
 /// TODO atomic/multithread support
@@ -1125,7 +1130,7 @@ fn readILeb128(in_stream: var) !i64 {
 
 /// This should only be used in temporary test programs.
 pub const global_allocator = &global_fixed_allocator.allocator;
-var global_fixed_allocator = std.heap.FixedBufferAllocator.init(global_allocator_mem[0..]);
+var global_fixed_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(global_allocator_mem[0..]);
 var global_allocator_mem: [100 * 1024]u8 = undefined;
 
 // TODO make thread safe
diff --git a/std/event.zig b/std/event.zig
index c6ac04a9d0..de51f8c87e 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -4,6 +4,7 @@ const assert = std.debug.assert;
 const event = this;
 const mem = std.mem;
 const posix = std.os.posix;
+const windows = std.os.windows;
 const AtomicRmwOp = builtin.AtomicRmwOp;
 const AtomicOrder = builtin.AtomicOrder;
 
@@ -11,53 +12,69 @@ pub const TcpServer = struct {
     handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
 
     loop: *Loop,
-    sockfd: i32,
+    sockfd: ?i32,
     accept_coro: ?promise,
     listen_address: std.net.Address,
 
     waiting_for_emfile_node: PromiseNode,
+    listen_resume_node: event.Loop.ResumeNode,
 
     const PromiseNode = std.LinkedList(promise).Node;
 
-    pub fn init(loop: *Loop) !TcpServer {
-        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
-        errdefer std.os.close(sockfd);
-
+    pub fn init(loop: *Loop) TcpServer {
         // TODO can't initialize handler coroutine here because we need well defined copy elision
         return TcpServer{
             .loop = loop,
-            .sockfd = sockfd,
+            .sockfd = null,
             .accept_coro = null,
             .handleRequestFn = undefined,
             .waiting_for_emfile_node = undefined,
             .listen_address = undefined,
+            .listen_resume_node = event.Loop.ResumeNode{
+                .id = event.Loop.ResumeNode.Id.Basic,
+                .handle = undefined,
+            },
         };
     }
 
-    pub fn listen(self: *TcpServer, address: *const std.net.Address, handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void) !void {
+    pub fn listen(
+        self: *TcpServer,
+        address: *const std.net.Address,
+        handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
+    ) !void {
         self.handleRequestFn = handleRequestFn;
 
-        try std.os.posixBind(self.sockfd, &address.os_addr);
-        try std.os.posixListen(self.sockfd, posix.SOMAXCONN);
-        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(self.sockfd));
+        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
+        errdefer std.os.close(sockfd);
+        self.sockfd = sockfd;
+
+        try std.os.posixBind(sockfd, &address.os_addr);
+        try std.os.posixListen(sockfd, posix.SOMAXCONN);
+        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(sockfd));
 
         self.accept_coro = try async<self.loop.allocator> TcpServer.handler(self);
         errdefer cancel self.accept_coro.?;
 
-        try self.loop.addFd(self.sockfd, self.accept_coro.?);
-        errdefer self.loop.removeFd(self.sockfd);
+        self.listen_resume_node.handle = self.accept_coro.?;
+        try self.loop.addFd(sockfd, &self.listen_resume_node);
+        errdefer self.loop.removeFd(sockfd);
+    }
+
+    /// Stop listening
+    pub fn close(self: *TcpServer) void {
+        self.loop.removeFd(self.sockfd.?);
+        std.os.close(self.sockfd.?);
     }
 
     pub fn deinit(self: *TcpServer) void {
-        self.loop.removeFd(self.sockfd);
         if (self.accept_coro) |accept_coro| cancel accept_coro;
-        std.os.close(self.sockfd);
+        if (self.sockfd) |sockfd| std.os.close(sockfd);
     }
 
     pub async fn handler(self: *TcpServer) void {
         while (true) {
             var accepted_addr: std.net.Address = undefined;
-            if (std.os.posixAccept(self.sockfd, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
+            if (std.os.posixAccept(self.sockfd.?, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
                 var socket = std.os.File.openHandle(accepted_fd);
                 _ = async<self.loop.allocator> self.handleRequestFn(self, accepted_addr, socket) catch |err| switch (err) {
                     error.OutOfMemory => {
@@ -95,46 +112,276 @@ pub const TcpServer = struct {
 
 pub const Loop = struct {
     allocator: *mem.Allocator,
-    keep_running: bool,
     next_tick_queue: std.atomic.QueueMpsc(promise),
     os_data: OsData,
+    final_resume_node: ResumeNode,
+    dispatch_lock: u8, // TODO make this a bool
+    pending_event_count: usize,
+    extra_threads: []*std.os.Thread,
 
-    const OsData = switch (builtin.os) {
-        builtin.Os.linux => struct {
-            epollfd: i32,
-        },
-        else => struct {},
-    };
+    // pre-allocated eventfds. all permanently active.
+    // this is how we send promises to be resumed on other threads.
+    available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
+    eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
 
     pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
 
+    pub const ResumeNode = struct {
+        id: Id,
+        handle: promise,
+
+        pub const Id = enum {
+            Basic,
+            Stop,
+            EventFd,
+        };
+
+        pub const EventFd = switch (builtin.os) {
+            builtin.Os.macosx => MacOsEventFd,
+            builtin.Os.linux => struct {
+                base: ResumeNode,
+                epoll_op: u32,
+                eventfd: i32,
+            },
+            builtin.Os.windows => struct {
+                base: ResumeNode,
+                completion_key: usize,
+            },
+            else => @compileError("unsupported OS"),
+        };
+
+        const MacOsEventFd = struct {
+            base: ResumeNode,
+            kevent: posix.Kevent,
+        };
+    };
+
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initSingleThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        return self.initInternal(allocator, 1);
+    }
+
     /// The allocator must be thread-safe because we use it for multiplexing
     /// coroutines onto kernel threads.
-    pub fn init(allocator: *mem.Allocator) !Loop {
-        var self = Loop{
-            .keep_running = true,
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initMultiThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        const core_count = try std.os.cpuCount(allocator);
+        return self.initInternal(allocator, core_count);
+    }
+
+    /// Thread count is the total thread count. The thread pool size will be
+    /// max(thread_count - 1, 0)
+    fn initInternal(self: *Loop, allocator: *mem.Allocator, thread_count: usize) !void {
+        self.* = Loop{
+            .pending_event_count = 0,
             .allocator = allocator,
             .os_data = undefined,
             .next_tick_queue = std.atomic.QueueMpsc(promise).init(),
+            .dispatch_lock = 1, // start locked so threads go directly into epoll wait
+            .extra_threads = undefined,
+            .available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init(),
+            .eventfd_resume_nodes = undefined,
+            .final_resume_node = ResumeNode{
+                .id = ResumeNode.Id.Stop,
+                .handle = undefined,
+            },
         };
-        try self.initOsData();
-        errdefer self.deinitOsData();
+        const extra_thread_count = thread_count - 1;
+        self.eventfd_resume_nodes = try self.allocator.alloc(
+            std.atomic.Stack(ResumeNode.EventFd).Node,
+            extra_thread_count,
+        );
+        errdefer self.allocator.free(self.eventfd_resume_nodes);
+
+        self.extra_threads = try self.allocator.alloc(*std.os.Thread, extra_thread_count);
+        errdefer self.allocator.free(self.extra_threads);
 
-        return self;
+        try self.initOsData(extra_thread_count);
+        errdefer self.deinitOsData();
     }
 
     /// must call stop before deinit
     pub fn deinit(self: *Loop) void {
         self.deinitOsData();
+        self.allocator.free(self.extra_threads);
     }
 
-    const InitOsDataError = std.os.LinuxEpollCreateError;
+    const InitOsDataError = std.os.LinuxEpollCreateError || mem.Allocator.Error || std.os.LinuxEventFdError ||
+        std.os.SpawnThreadError || std.os.LinuxEpollCtlError || std.os.BsdKEventError ||
+        std.os.WindowsCreateIoCompletionPortError;
 
-    fn initOsData(self: *Loop) InitOsDataError!void {
+    const wakeup_bytes = []u8{0x1} ** 8;
+
+    fn initOsData(self: *Loop, extra_thread_count: usize) InitOsDataError!void {
         switch (builtin.os) {
             builtin.Os.linux => {
-                self.os_data.epollfd = try std.os.linuxEpollCreate(std.os.linux.EPOLL_CLOEXEC);
+                errdefer {
+                    while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                }
+                for (self.eventfd_resume_nodes) |*eventfd_node| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            .eventfd = try std.os.linuxEventFd(1, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK),
+                            .epoll_op = posix.EPOLL_CTL_ADD,
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                }
+
+                self.os_data.epollfd = try std.os.linuxEpollCreate(posix.EPOLL_CLOEXEC);
                 errdefer std.os.close(self.os_data.epollfd);
+
+                self.os_data.final_eventfd = try std.os.linuxEventFd(0, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK);
+                errdefer std.os.close(self.os_data.final_eventfd);
+
+                self.os_data.final_eventfd_event = posix.epoll_event{
+                    .events = posix.EPOLLIN,
+                    .data = posix.epoll_data{ .ptr = @ptrToInt(&self.final_resume_node) },
+                };
+                try std.os.linuxEpollCtl(
+                    self.os_data.epollfd,
+                    posix.EPOLL_CTL_ADD,
+                    self.os_data.final_eventfd,
+                    &self.os_data.final_eventfd_event,
+                );
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    // writing 8 bytes to an eventfd cannot fail
+                    std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
+            },
+            builtin.Os.macosx => {
+                self.os_data.kqfd = try std.os.bsdKQueue();
+                errdefer std.os.close(self.os_data.kqfd);
+
+                self.os_data.kevents = try self.allocator.alloc(posix.Kevent, extra_thread_count);
+                errdefer self.allocator.free(self.os_data.kevents);
+
+                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+
+                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            // this one is for sending events
+                            .kevent = posix.Kevent{
+                                .ident = i,
+                                .filter = posix.EVFILT_USER,
+                                .flags = posix.EV_CLEAR | posix.EV_ADD | posix.EV_DISABLE,
+                                .fflags = 0,
+                                .data = 0,
+                                .udata = @ptrToInt(&eventfd_node.data.base),
+                            },
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                    const kevent_array = (*[1]posix.Kevent)(&eventfd_node.data.kevent);
+                    _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
+                    eventfd_node.data.kevent.flags = posix.EV_CLEAR | posix.EV_ENABLE;
+                    eventfd_node.data.kevent.fflags = posix.NOTE_TRIGGER;
+                    // this one is for waiting for events
+                    self.os_data.kevents[i] = posix.Kevent{
+                        .ident = i,
+                        .filter = posix.EVFILT_USER,
+                        .flags = 0,
+                        .fflags = 0,
+                        .data = 0,
+                        .udata = @ptrToInt(&eventfd_node.data.base),
+                    };
+                }
+
+                // Pre-add so that we cannot get error.SystemResources
+                // later when we try to activate it.
+                self.os_data.final_kevent = posix.Kevent{
+                    .ident = extra_thread_count,
+                    .filter = posix.EVFILT_USER,
+                    .flags = posix.EV_ADD | posix.EV_DISABLE,
+                    .fflags = 0,
+                    .data = 0,
+                    .udata = @ptrToInt(&self.final_resume_node),
+                };
+                const kevent_array = (*[1]posix.Kevent)(&self.os_data.final_kevent);
+                _ = try std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null);
+                self.os_data.final_kevent.flags = posix.EV_ENABLE;
+                self.os_data.final_kevent.fflags = posix.NOTE_TRIGGER;
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch unreachable;
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
+            },
+            builtin.Os.windows => {
+                self.os_data.extra_thread_count = extra_thread_count;
+
+                self.os_data.io_port = try std.os.windowsCreateIoCompletionPort(
+                    windows.INVALID_HANDLE_VALUE,
+                    null,
+                    undefined,
+                    undefined,
+                );
+                errdefer std.os.close(self.os_data.io_port);
+
+                for (self.eventfd_resume_nodes) |*eventfd_node, i| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            // this one is for sending events
+                            .completion_key = @ptrToInt(&eventfd_node.data.base),
+                        },
+                        .next = undefined,
+                    };
+                    self.available_eventfd_resume_nodes.push(eventfd_node);
+                }
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    var i: usize = 0;
+                    while (i < extra_thread_index) : (i += 1) {
+                        while (true) {
+                            const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                            std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
+                            break;
+                        }
+                    }
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
             },
             else => {},
         }
@@ -142,65 +389,281 @@ pub const Loop = struct {
 
     fn deinitOsData(self: *Loop) void {
         switch (builtin.os) {
-            builtin.Os.linux => std.os.close(self.os_data.epollfd),
+            builtin.Os.linux => {
+                std.os.close(self.os_data.final_eventfd);
+                while (self.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                std.os.close(self.os_data.epollfd);
+                self.allocator.free(self.eventfd_resume_nodes);
+            },
+            builtin.Os.macosx => {
+                self.allocator.free(self.os_data.kevents);
+                std.os.close(self.os_data.kqfd);
+            },
+            builtin.Os.windows => {
+                std.os.close(self.os_data.io_port);
+            },
             else => {},
         }
     }
 
-    pub fn addFd(self: *Loop, fd: i32, prom: promise) !void {
+    /// resume_node must live longer than the promise that it holds a reference to.
+    pub fn addFd(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+        errdefer {
+            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+        }
+        try self.modFd(
+            fd,
+            posix.EPOLL_CTL_ADD,
+            std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
+            resume_node,
+        );
+    }
+
+    pub fn modFd(self: *Loop, fd: i32, op: u32, events: u32, resume_node: *ResumeNode) !void {
         var ev = std.os.linux.epoll_event{
-            .events = std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
-            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(prom) },
+            .events = events,
+            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(resume_node) },
         };
-        try std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_ADD, fd, &ev);
+        try std.os.linuxEpollCtl(self.os_data.epollfd, op, fd, &ev);
     }
 
     pub fn removeFd(self: *Loop, fd: i32) void {
+        self.removeFdNoCounter(fd);
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+    }
+
+    fn removeFdNoCounter(self: *Loop, fd: i32) void {
         std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_DEL, fd, undefined) catch {};
     }
-    async fn waitFd(self: *Loop, fd: i32) !void {
+
+    pub async fn waitFd(self: *Loop, fd: i32) !void {
         defer self.removeFd(fd);
         suspend |p| {
-            try self.addFd(fd, p);
+            // TODO explicitly put this memory in the coroutine frame #1194
+            var resume_node = ResumeNode{
+                .id = ResumeNode.Id.Basic,
+                .handle = p,
+            };
+            try self.addFd(fd, &resume_node);
         }
     }
 
-    pub fn stop(self: *Loop) void {
-        // TODO make atomic
-        self.keep_running = false;
-        // TODO activate an fd in the epoll set which should cancel all the promises
-    }
-
-    /// bring your own linked list node. this means it can't fail.
+    /// Bring your own linked list node. This means it can't fail.
     pub fn onNextTick(self: *Loop, node: *NextTickNode) void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
         self.next_tick_queue.put(node);
     }
 
     pub fn run(self: *Loop) void {
-        while (self.keep_running) {
-            // TODO multiplex the next tick queue and the epoll event results onto a thread pool
-            while (self.next_tick_queue.get()) |node| {
-                resume node.data;
-            }
-            if (!self.keep_running) break;
-
-            self.dispatchOsEvents();
+        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+        self.workerRun();
+        for (self.extra_threads) |extra_thread| {
+            extra_thread.wait();
         }
     }
 
-    fn dispatchOsEvents(self: *Loop) void {
-        switch (builtin.os) {
-            builtin.Os.linux => {
-                var events: [16]std.os.linux.epoll_event = undefined;
-                const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
-                for (events[0..count]) |ev| {
-                    const p = @intToPtr(promise, ev.data.ptr);
-                    resume p;
+    fn workerRun(self: *Loop) void {
+        start_over: while (true) {
+            if (@atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) == 0) {
+                while (self.next_tick_queue.get()) |next_tick_node| {
+                    const handle = next_tick_node.data;
+                    if (self.next_tick_queue.isEmpty()) {
+                        // last node, just resume it
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
+
+                    // non-last node, stick it in the epoll/kqueue set so that
+                    // other threads can get to it
+                    if (self.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
+                        const eventfd_node = &resume_stack_node.data;
+                        eventfd_node.base.handle = handle;
+                        switch (builtin.os) {
+                            builtin.Os.macosx => {
+                                const kevent_array = (*[1]posix.Kevent)(&eventfd_node.kevent);
+                                const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+                                _ = std.os.bsdKEvent(self.os_data.kqfd, kevent_array, eventlist, null) catch {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            builtin.Os.linux => {
+                                // the pending count is already accounted for
+                                const epoll_events = posix.EPOLLONESHOT | std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET;
+                                self.modFd(eventfd_node.eventfd, eventfd_node.epoll_op, epoll_events, &eventfd_node.base) catch {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            builtin.Os.windows => {
+                                // this value is never dereferenced but we need it to be non-null so that
+                                // the consumer code can decide whether to read the completion key.
+                                // it has to do this for normal I/O, so we match that behavior here.
+                                const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                                std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, eventfd_node.completion_key, overlapped) catch {
+                                    // fine, we didn't need it anyway
+                                    _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                                    self.available_eventfd_resume_nodes.push(resume_stack_node);
+                                    resume handle;
+                                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                                    continue :start_over;
+                                };
+                            },
+                            else => @compileError("unsupported OS"),
+                        }
+                    } else {
+                        // threads are too busy, can't add another eventfd to wake one up
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
                 }
-            },
-            else => {},
+
+                const pending_event_count = @atomicLoad(usize, &self.pending_event_count, AtomicOrder.SeqCst);
+                if (pending_event_count == 0) {
+                    // cause all the threads to stop
+                    switch (builtin.os) {
+                        builtin.Os.linux => {
+                            // writing 8 bytes to an eventfd cannot fail
+                            std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                            return;
+                        },
+                        builtin.Os.macosx => {
+                            const final_kevent = (*[1]posix.Kevent)(&self.os_data.final_kevent);
+                            const eventlist = ([*]posix.Kevent)(undefined)[0..0];
+                            // cannot fail because we already added it and this just enables it
+                            _ = std.os.bsdKEvent(self.os_data.kqfd, final_kevent, eventlist, null) catch unreachable;
+                            return;
+                        },
+                        builtin.Os.windows => {
+                            var i: usize = 0;
+                            while (i < self.os_data.extra_thread_count) : (i += 1) {
+                                while (true) {
+                                    const overlapped = @intToPtr(?*windows.OVERLAPPED, 0x1);
+                                    std.os.windowsPostQueuedCompletionStatus(self.os_data.io_port, undefined, @ptrToInt(&self.final_resume_node), overlapped) catch continue;
+                                    break;
+                                }
+                            }
+                            return;
+                        },
+                        else => @compileError("unsupported OS"),
+                    }
+                }
+
+                _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+            }
+
+            switch (builtin.os) {
+                builtin.Os.linux => {
+                    // only process 1 event so we don't steal from other threads
+                    var events: [1]std.os.linux.epoll_event = undefined;
+                    const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
+                    for (events[0..count]) |ev| {
+                        const resume_node = @intToPtr(*ResumeNode, ev.data.ptr);
+                        const handle = resume_node.handle;
+                        const resume_node_id = resume_node.id;
+                        switch (resume_node_id) {
+                            ResumeNode.Id.Basic => {},
+                            ResumeNode.Id.Stop => return,
+                            ResumeNode.Id.EventFd => {
+                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                                event_fd_node.epoll_op = posix.EPOLL_CTL_MOD;
+                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                                self.available_eventfd_resume_nodes.push(stack_node);
+                            },
+                        }
+                        resume handle;
+                        if (resume_node_id == ResumeNode.Id.EventFd) {
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+                    }
+                },
+                builtin.Os.macosx => {
+                    var eventlist: [1]posix.Kevent = undefined;
+                    const count = std.os.bsdKEvent(self.os_data.kqfd, self.os_data.kevents, eventlist[0..], null) catch unreachable;
+                    for (eventlist[0..count]) |ev| {
+                        const resume_node = @intToPtr(*ResumeNode, ev.udata);
+                        const handle = resume_node.handle;
+                        const resume_node_id = resume_node.id;
+                        switch (resume_node_id) {
+                            ResumeNode.Id.Basic => {},
+                            ResumeNode.Id.Stop => return,
+                            ResumeNode.Id.EventFd => {
+                                const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                                const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                                self.available_eventfd_resume_nodes.push(stack_node);
+                            },
+                        }
+                        resume handle;
+                        if (resume_node_id == ResumeNode.Id.EventFd) {
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        }
+                    }
+                },
+                builtin.Os.windows => {
+                    var completion_key: usize = undefined;
+                    while (true) {
+                        var nbytes: windows.DWORD = undefined;
+                        var overlapped: ?*windows.OVERLAPPED = undefined;
+                        switch (std.os.windowsGetQueuedCompletionStatus(self.os_data.io_port, &nbytes, &completion_key, &overlapped, windows.INFINITE)) {
+                            std.os.WindowsWaitResult.Aborted => return,
+                            std.os.WindowsWaitResult.Normal => {},
+                        }
+                        if (overlapped != null) break;
+                    }
+                    const resume_node = @intToPtr(*ResumeNode, completion_key);
+                    const handle = resume_node.handle;
+                    const resume_node_id = resume_node.id;
+                    switch (resume_node_id) {
+                        ResumeNode.Id.Basic => {},
+                        ResumeNode.Id.Stop => return,
+                        ResumeNode.Id.EventFd => {
+                            const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                            const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                            self.available_eventfd_resume_nodes.push(stack_node);
+                        },
+                    }
+                    resume handle;
+                    if (resume_node_id == ResumeNode.Id.EventFd) {
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                    }
+                },
+                else => @compileError("unsupported OS"),
+            }
         }
     }
+
+    const OsData = switch (builtin.os) {
+        builtin.Os.linux => struct {
+            epollfd: i32,
+            final_eventfd: i32,
+            final_eventfd_event: std.os.linux.epoll_event,
+        },
+        builtin.Os.macosx => MacOsData,
+        builtin.Os.windows => struct {
+            io_port: windows.HANDLE,
+            extra_thread_count: usize,
+        },
+        else => struct {},
+    };
+
+    const MacOsData = struct {
+        kqfd: i32,
+        final_kevent: posix.Kevent,
+        kevents: []posix.Kevent,
+    };
 };
 
 /// many producer, many consumer, thread-safe, lock-free, runtime configurable buffer size
@@ -304,9 +767,7 @@ pub fn Channel(comptime T: type) type {
             // TODO integrate this function with named return values
             // so we can get rid of this extra result copy
             var result: T = undefined;
-            var debug_handle: usize = undefined;
             suspend |handle| {
-                debug_handle = @ptrToInt(handle);
                 var my_tick_node = Loop.NextTickNode{
                     .next = undefined,
                     .data = handle,
@@ -438,9 +899,8 @@ test "listen on a port, send bytes, receive bytes" {
             const self = @fieldParentPtr(Self, "tcp_server", tcp_server);
             var socket = _socket.*; // TODO https://github.com/ziglang/zig/issues/733
             defer socket.close();
-            const next_handler = async errorableHandler(self, _addr, socket) catch |err| switch (err) {
-                error.OutOfMemory => @panic("unable to handle connection: out of memory"),
-            };
+            // TODO guarantee elision of this allocation
+            const next_handler = async errorableHandler(self, _addr, socket) catch unreachable;
             (await next_handler) catch |err| {
                 std.debug.panic("unable to handle connection: {}\n", err);
             };
@@ -461,17 +921,18 @@ test "listen on a port, send bytes, receive bytes" {
     const ip4addr = std.net.parseIp4("127.0.0.1") catch unreachable;
     const addr = std.net.Address.initIp4(ip4addr, 0);
 
-    var loop = try Loop.init(std.debug.global_allocator);
-    var server = MyServer{ .tcp_server = try TcpServer.init(&loop) };
+    var loop: Loop = undefined;
+    try loop.initSingleThreaded(std.debug.global_allocator);
+    var server = MyServer{ .tcp_server = TcpServer.init(&loop) };
     defer server.tcp_server.deinit();
     try server.tcp_server.listen(addr, MyServer.handler);
 
-    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address);
+    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address, &server.tcp_server);
     defer cancel p;
     loop.run();
 }
 
-async fn doAsyncTest(loop: *Loop, address: *const std.net.Address) void {
+async fn doAsyncTest(loop: *Loop, address: *const std.net.Address, server: *TcpServer) void {
     errdefer @panic("test failure");
 
     var socket_file = try await try async event.connect(loop, address);
@@ -481,7 +942,7 @@ async fn doAsyncTest(loop: *Loop, address: *const std.net.Address) void {
     const amt_read = try socket_file.read(buf[0..]);
     const msg = buf[0..amt_read];
     assert(mem.eql(u8, msg, "hello from server\n"));
-    loop.stop();
+    server.close();
 }
 
 test "std.event.Channel" {
@@ -490,7 +951,9 @@ test "std.event.Channel" {
 
     const allocator = &da.allocator;
 
-    var loop = try Loop.init(allocator);
+    var loop: Loop = undefined;
+    // TODO make a multi threaded test
+    try loop.initSingleThreaded(allocator);
     defer loop.deinit();
 
     const channel = try Channel(i32).create(&loop, 0);
@@ -515,11 +978,246 @@ async fn testChannelGetter(loop: *Loop, channel: *Channel(i32)) void {
     const value2_promise = try async channel.get();
     const value2 = await value2_promise;
     assert(value2 == 4567);
-
-    loop.stop();
 }
 
 async fn testChannelPutter(channel: *Channel(i32)) void {
     await (async channel.put(1234) catch @panic("out of memory"));
     await (async channel.put(4567) catch @panic("out of memory"));
 }
+
+/// Thread-safe async/await lock.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub const Lock = struct {
+    loop: *Loop,
+    shared_bit: u8, // TODO make this a bool
+    queue: Queue,
+    queue_empty_bit: u8, // TODO make this a bool
+
+    const Queue = std.atomic.QueueMpsc(promise);
+
+    pub const Held = struct {
+        lock: *Lock,
+
+        pub fn release(self: Held) void {
+            // Resume the next item from the queue.
+            if (self.lock.queue.get()) |node| {
+                self.lock.loop.onNextTick(node);
+                return;
+            }
+
+            // We need to release the lock.
+            _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+            _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            // There might be a queue item. If we know the queue is empty, we can be done,
+            // because the other actor will try to obtain the lock.
+            // But if there's a queue item, we are the actor which must loop and attempt
+            // to grab the lock again.
+            if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                return;
+            }
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Great, the queue is someone else's problem.
+                    return;
+                }
+
+                // Resume the next item from the queue.
+                if (self.lock.queue.get()) |node| {
+                    self.lock.loop.onNextTick(node);
+                    return;
+                }
+
+                // Release the lock again.
+                _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                // Find out if we can be done.
+                if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                    return;
+                }
+            }
+        }
+    };
+
+    pub fn init(loop: *Loop) Lock {
+        return Lock{
+            .loop = loop,
+            .shared_bit = 0,
+            .queue = Queue.init(),
+            .queue_empty_bit = 1,
+        };
+    }
+
+    /// Must be called when not locked. Not thread safe.
+    /// All calls to acquire() and release() must complete before calling deinit().
+    pub fn deinit(self: *Lock) void {
+        assert(self.shared_bit == 0);
+        while (self.queue.get()) |node| cancel node.data;
+    }
+
+    pub async fn acquire(self: *Lock) Held {
+        s: suspend |handle| {
+            // TODO explicitly put this memory in the coroutine frame #1194
+            var my_tick_node = Loop.NextTickNode{
+                .data = handle,
+                .next = undefined,
+            };
+
+            self.queue.put(&my_tick_node);
+
+            // At this point, we are in the queue, so we might have already been resumed and this coroutine
+            // frame might be destroyed. For the rest of the suspend block we cannot access the coroutine frame.
+
+            // We set this bit so that later we can rely on the fact, that if queue_empty_bit is 1, some actor
+            // will attempt to grab the lock.
+            _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Trust that our queue entry will resume us, and allow
+                    // suspend to complete.
+                    break;
+                }
+                // We got the lock. However we might have already been resumed from the queue.
+                if (self.queue.get()) |node| {
+                    // Whether this node is us or someone else, we tail resume it.
+                    resume node.data;
+                    break;
+                } else {
+                    // We already got resumed, and there are none left in the queue, which means that
+                    // we aren't even supposed to hold the lock right now.
+                    _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                    _ = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                    // There might be a queue item. If we know the queue is empty, we can be done,
+                    // because the other actor will try to obtain the lock.
+                    // But if there's a queue item, we are the actor which must loop and attempt
+                    // to grab the lock again.
+                    if (@atomicLoad(u8, &self.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                        break;
+                    } else {
+                        continue;
+                    }
+                }
+                unreachable;
+            }
+        }
+
+        return Held{ .lock = self };
+    }
+};
+
+/// Thread-safe async/await lock that protects one piece of data.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub fn Locked(comptime T: type) type {
+    return struct {
+        lock: Lock,
+        private_data: T,
+
+        const Self = this;
+
+        pub const HeldLock = struct {
+            value: *T,
+            held: Lock.Held,
+
+            pub fn release(self: HeldLock) void {
+                self.held.release();
+            }
+        };
+
+        pub fn init(loop: *Loop, data: T) Self {
+            return Self{
+                .lock = Lock.init(loop),
+                .private_data = data,
+            };
+        }
+
+        pub fn deinit(self: *Self) void {
+            self.lock.deinit();
+        }
+
+        pub async fn acquire(self: *Self) HeldLock {
+            return HeldLock{
+            // TODO guaranteed allocation elision
+                .held = await (async self.lock.acquire() catch unreachable),
+                .value = &self.private_data,
+            };
+        }
+    };
+}
+
+test "std.event.Lock" {
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
+
+    const allocator = &da.allocator;
+
+    var loop: Loop = undefined;
+    try loop.initMultiThreaded(allocator);
+    defer loop.deinit();
+
+    var lock = Lock.init(&loop);
+    defer lock.deinit();
+
+    const handle = try async<allocator> testLock(&loop, &lock);
+    defer cancel handle;
+    loop.run();
+
+    assert(mem.eql(i32, shared_test_data, [1]i32{3 * @intCast(i32, shared_test_data.len)} ** shared_test_data.len));
+}
+
+async fn testLock(loop: *Loop, lock: *Lock) void {
+    // TODO explicitly put next tick node memory in the coroutine frame #1194
+    suspend |p| {
+        resume p;
+    }
+    const handle1 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node1 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle1,
+    };
+    loop.onNextTick(&tick_node1);
+
+    const handle2 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node2 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle2,
+    };
+    loop.onNextTick(&tick_node2);
+
+    const handle3 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node3 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle3,
+    };
+    loop.onNextTick(&tick_node3);
+
+    await handle1;
+    await handle2;
+    await handle3;
+}
+
+var shared_test_data = [1]i32{0} ** 10;
+var shared_test_index: usize = 0;
+
+async fn lockRunner(lock: *Lock) void {
+    suspend; // resumed by onNextTick
+
+    var i: usize = 0;
+    while (i < shared_test_data.len) : (i += 1) {
+        const lock_promise = async lock.acquire() catch @panic("out of memory");
+        const handle = await lock_promise;
+        defer handle.release();
+
+        shared_test_index = 0;
+        while (shared_test_index < shared_test_data.len) : (shared_test_index += 1) {
+            shared_test_data[shared_test_index] = shared_test_data[shared_test_index] + 1;
+        }
+    }
+}
diff --git a/std/heap.zig b/std/heap.zig
index 2e02733da1..ef22c8d0c5 100644
--- a/std/heap.zig
+++ b/std/heap.zig
@@ -38,7 +38,7 @@ fn cFree(self: *Allocator, old_mem: []u8) void {
 }
 
 /// This allocator makes a syscall directly for every allocation and free.
-/// TODO make this thread-safe. The windows implementation will need some atomics.
+/// Thread-safe and lock-free.
 pub const DirectAllocator = struct {
     allocator: Allocator,
     heap_handle: ?HeapHandle,
@@ -74,34 +74,34 @@ pub const DirectAllocator = struct {
                 const alloc_size = if (alignment <= os.page_size) n else n + alignment;
                 const addr = p.mmap(null, alloc_size, p.PROT_READ | p.PROT_WRITE, p.MAP_PRIVATE | p.MAP_ANONYMOUS, -1, 0);
                 if (addr == p.MAP_FAILED) return error.OutOfMemory;
-
                 if (alloc_size == n) return @intToPtr([*]u8, addr)[0..n];
 
-                var aligned_addr = addr & ~usize(alignment - 1);
-                aligned_addr += alignment;
+                const aligned_addr = (addr & ~usize(alignment - 1)) + alignment;
 
-                //We can unmap the unused portions of our mmap, but we must only
-                //  pass munmap bytes that exist outside our allocated pages or it
-                //  will happily eat us too
+                // We can unmap the unused portions of our mmap, but we must only
+                // pass munmap bytes that exist outside our allocated pages or it
+                // will happily eat us too.
 
-                //Since alignment > page_size, we are by definition on a page boundry
+                // Since alignment > page_size, we are by definition on a page boundary.
                 const unused_start = addr;
                 const unused_len = aligned_addr - 1 - unused_start;
 
-                var err = p.munmap(unused_start, unused_len);
-                debug.assert(p.getErrno(err) == 0);
+                const err = p.munmap(unused_start, unused_len);
+                assert(p.getErrno(err) == 0);
 
-                //It is impossible that there is an unoccupied page at the top of our
-                //  mmap.
+                // It is impossible that there is an unoccupied page at the top of our
+                // mmap.
 
                 return @intToPtr([*]u8, aligned_addr)[0..n];
             },
             Os.windows => {
                 const amt = n + alignment + @sizeOf(usize);
-                const heap_handle = self.heap_handle orelse blk: {
-                    const hh = os.windows.HeapCreate(os.windows.HEAP_NO_SERIALIZE, amt, 0) orelse return error.OutOfMemory;
-                    self.heap_handle = hh;
-                    break :blk hh;
+                const optional_heap_handle = @atomicLoad(?HeapHandle, &self.heap_handle, builtin.AtomicOrder.SeqCst);
+                const heap_handle = optional_heap_handle orelse blk: {
+                    const hh = os.windows.HeapCreate(0, amt, 0) orelse return error.OutOfMemory;
+                    const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh;
+                    _ = os.windows.HeapDestroy(hh);
+                    break :blk other_hh.?; // can't be null because of the cmpxchg
                 };
                 const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory;
                 const root_addr = @ptrToInt(ptr);
@@ -361,6 +361,73 @@ pub const ThreadSafeFixedBufferAllocator = struct {
     fn free(allocator: *Allocator, bytes: []u8) void {}
 };
 
+pub fn stackFallback(comptime size: usize, fallback_allocator: *Allocator) StackFallbackAllocator(size) {
+    return StackFallbackAllocator(size){
+        .buffer = undefined,
+        .fallback_allocator = fallback_allocator,
+        .fixed_buffer_allocator = undefined,
+        .allocator = Allocator{
+            .allocFn = StackFallbackAllocator(size).alloc,
+            .reallocFn = StackFallbackAllocator(size).realloc,
+            .freeFn = StackFallbackAllocator(size).free,
+        },
+    };
+}
+
+pub fn StackFallbackAllocator(comptime size: usize) type {
+    return struct {
+        const Self = this;
+
+        buffer: [size]u8,
+        allocator: Allocator,
+        fallback_allocator: *Allocator,
+        fixed_buffer_allocator: FixedBufferAllocator,
+
+        pub fn get(self: *Self) *Allocator {
+            self.fixed_buffer_allocator = FixedBufferAllocator.init(self.buffer[0..]);
+            return &self.allocator;
+        }
+
+        fn alloc(allocator: *Allocator, n: usize, alignment: u29) ![]u8 {
+            const self = @fieldParentPtr(Self, "allocator", allocator);
+            return FixedBufferAllocator.alloc(&self.fixed_buffer_allocator.allocator, n, alignment) catch
+                self.fallback_allocator.allocFn(self.fallback_allocator, n, alignment);
+        }
+
+        fn realloc(allocator: *Allocator, old_mem: []u8, new_size: usize, alignment: u29) ![]u8 {
+            const self = @fieldParentPtr(Self, "allocator", allocator);
+            const in_buffer = @ptrToInt(old_mem.ptr) >= @ptrToInt(&self.buffer) and
+                @ptrToInt(old_mem.ptr) < @ptrToInt(&self.buffer) + self.buffer.len;
+            if (in_buffer) {
+                return FixedBufferAllocator.realloc(
+                    &self.fixed_buffer_allocator.allocator,
+                    old_mem,
+                    new_size,
+                    alignment,
+                ) catch {
+                    const result = try self.fallback_allocator.allocFn(
+                        self.fallback_allocator,
+                        new_size,
+                        alignment,
+                    );
+                    mem.copy(u8, result, old_mem);
+                    return result;
+                };
+            }
+            return self.fallback_allocator.reallocFn(self.fallback_allocator, old_mem, new_size, alignment);
+        }
+
+        fn free(allocator: *Allocator, bytes: []u8) void {
+            const self = @fieldParentPtr(Self, "allocator", allocator);
+            const in_buffer = @ptrToInt(bytes.ptr) >= @ptrToInt(&self.buffer) and
+                @ptrToInt(bytes.ptr) < @ptrToInt(&self.buffer) + self.buffer.len;
+            if (!in_buffer) {
+                return self.fallback_allocator.freeFn(self.fallback_allocator, bytes);
+            }
+        }
+    };
+}
+
 test "c_allocator" {
     if (builtin.link_libc) {
         var slice = c_allocator.alloc(u8, 50) catch return;
diff --git a/std/mem.zig b/std/mem.zig
index b52d3e9f68..555e1e249d 100644
--- a/std/mem.zig
+++ b/std/mem.zig
@@ -6,7 +6,7 @@ const builtin = @import("builtin");
 const mem = this;
 
 pub const Allocator = struct {
-    const Error = error{OutOfMemory};
+    pub const Error = error{OutOfMemory};
 
     /// Allocate byte_count bytes and return them in a slice, with the
     /// slice's pointer aligned at least to alignment bytes.
diff --git a/std/os/darwin.zig b/std/os/darwin.zig
index 15e5608343..4134e382fc 100644
--- a/std/os/darwin.zig
+++ b/std/os/darwin.zig
@@ -264,6 +264,224 @@ pub const SIGUSR1 = 30;
 /// user defined signal 2
 pub const SIGUSR2 = 31;
 
+/// no flag value
+pub const KEVENT_FLAG_NONE = 0x000;
+
+/// immediate timeout
+pub const KEVENT_FLAG_IMMEDIATE = 0x001;
+
+/// output events only include change
+pub const KEVENT_FLAG_ERROR_EVENTS = 0x002;
+
+/// add event to kq (implies enable)
+pub const EV_ADD = 0x0001;
+
+/// delete event from kq
+pub const EV_DELETE = 0x0002;
+
+/// enable event
+pub const EV_ENABLE = 0x0004;
+
+/// disable event (not reported)
+pub const EV_DISABLE = 0x0008;
+
+/// only report one occurrence
+pub const EV_ONESHOT = 0x0010;
+
+/// clear event state after reporting
+pub const EV_CLEAR = 0x0020;
+
+/// force immediate event output
+/// ... with or without EV_ERROR
+/// ... use KEVENT_FLAG_ERROR_EVENTS
+///     on syscalls supporting flags
+pub const EV_RECEIPT = 0x0040;
+
+/// disable event after reporting
+pub const EV_DISPATCH = 0x0080;
+
+/// unique kevent per udata value
+pub const EV_UDATA_SPECIFIC = 0x0100;
+
+/// ... in combination with EV_DELETE
+/// will defer delete until udata-specific
+/// event enabled. EINPROGRESS will be
+/// returned to indicate the deferral
+pub const EV_DISPATCH2 = EV_DISPATCH | EV_UDATA_SPECIFIC;
+
+/// report that source has vanished
+/// ... only valid with EV_DISPATCH2
+pub const EV_VANISHED = 0x0200;
+
+/// reserved by system
+pub const EV_SYSFLAGS = 0xF000;
+
+/// filter-specific flag
+pub const EV_FLAG0 = 0x1000;
+
+/// filter-specific flag
+pub const EV_FLAG1 = 0x2000;
+
+/// EOF detected
+pub const EV_EOF = 0x8000;
+
+/// error, data contains errno
+pub const EV_ERROR = 0x4000;
+
+pub const EV_POLL = EV_FLAG0;
+pub const EV_OOBAND = EV_FLAG1;
+
+pub const EVFILT_READ = -1;
+pub const EVFILT_WRITE = -2;
+
+/// attached to aio requests
+pub const EVFILT_AIO = -3;
+
+/// attached to vnodes
+pub const EVFILT_VNODE = -4;
+
+/// attached to struct proc
+pub const EVFILT_PROC = -5;
+
+/// attached to struct proc
+pub const EVFILT_SIGNAL = -6;
+
+/// timers
+pub const EVFILT_TIMER = -7;
+
+/// Mach portsets
+pub const EVFILT_MACHPORT = -8;
+
+/// Filesystem events
+pub const EVFILT_FS = -9;
+
+/// User events
+pub const EVFILT_USER = -10;
+
+/// Virtual memory events
+pub const EVFILT_VM = -12;
+
+/// Exception events
+pub const EVFILT_EXCEPT = -15;
+
+pub const EVFILT_SYSCOUNT = 17;
+
+/// On input, NOTE_TRIGGER causes the event to be triggered for output.
+pub const NOTE_TRIGGER = 0x01000000;
+
+/// ignore input fflags
+pub const NOTE_FFNOP = 0x00000000;
+
+/// and fflags
+pub const NOTE_FFAND = 0x40000000;
+
+/// or fflags
+pub const NOTE_FFOR = 0x80000000;
+
+/// copy fflags
+pub const NOTE_FFCOPY = 0xc0000000;
+
+/// mask for operations
+pub const NOTE_FFCTRLMASK = 0xc0000000;
+pub const NOTE_FFLAGSMASK = 0x00ffffff;
+
+/// low water mark
+pub const NOTE_LOWAT = 0x00000001;
+
+/// OOB data
+pub const NOTE_OOB = 0x00000002;
+
+/// vnode was removed
+pub const NOTE_DELETE = 0x00000001;
+
+/// data contents changed
+pub const NOTE_WRITE = 0x00000002;
+
+/// size increased
+pub const NOTE_EXTEND = 0x00000004;
+
+/// attributes changed
+pub const NOTE_ATTRIB = 0x00000008;
+
+/// link count changed
+pub const NOTE_LINK = 0x00000010;
+
+/// vnode was renamed
+pub const NOTE_RENAME = 0x00000020;
+
+/// vnode access was revoked
+pub const NOTE_REVOKE = 0x00000040;
+
+/// No specific vnode event: to test for EVFILT_READ      activation
+pub const NOTE_NONE = 0x00000080;
+
+/// vnode was unlocked by flock(2)
+pub const NOTE_FUNLOCK = 0x00000100;
+
+/// process exited
+pub const NOTE_EXIT = 0x80000000;
+
+/// process forked
+pub const NOTE_FORK = 0x40000000;
+
+/// process exec'd
+pub const NOTE_EXEC = 0x20000000;
+
+/// shared with EVFILT_SIGNAL
+pub const NOTE_SIGNAL = 0x08000000;
+
+/// exit status to be returned, valid for child       process only
+pub const NOTE_EXITSTATUS = 0x04000000;
+
+/// provide details on reasons for exit
+pub const NOTE_EXIT_DETAIL = 0x02000000;
+
+/// mask for signal & exit status
+pub const NOTE_PDATAMASK = 0x000fffff;
+pub const NOTE_PCTRLMASK = (~NOTE_PDATAMASK);
+
+pub const NOTE_EXIT_DETAIL_MASK = 0x00070000;
+pub const NOTE_EXIT_DECRYPTFAIL = 0x00010000;
+pub const NOTE_EXIT_MEMORY = 0x00020000;
+pub const NOTE_EXIT_CSERROR = 0x00040000;
+
+/// will react on memory          pressure
+pub const NOTE_VM_PRESSURE = 0x80000000;
+
+/// will quit on memory       pressure, possibly after cleaning up dirty state
+pub const NOTE_VM_PRESSURE_TERMINATE = 0x40000000;
+
+/// will quit immediately on      memory pressure
+pub const NOTE_VM_PRESSURE_SUDDEN_TERMINATE = 0x20000000;
+
+/// there was an error
+pub const NOTE_VM_ERROR = 0x10000000;
+
+/// data is seconds
+pub const NOTE_SECONDS = 0x00000001;
+
+/// data is microseconds
+pub const NOTE_USECONDS = 0x00000002;
+
+/// data is nanoseconds
+pub const NOTE_NSECONDS = 0x00000004;
+
+/// absolute timeout
+pub const NOTE_ABSOLUTE = 0x00000008;
+
+/// ext[1] holds leeway for power aware timers
+pub const NOTE_LEEWAY = 0x00000010;
+
+/// system does minimal timer coalescing
+pub const NOTE_CRITICAL = 0x00000020;
+
+/// system does maximum timer coalescing
+pub const NOTE_BACKGROUND = 0x00000040;
+pub const NOTE_MACH_CONTINUOUS_TIME = 0x00000080;
+
+/// data is mach absolute time units
+pub const NOTE_MACHTIME = 0x00000100;
+
 fn wstatus(x: i32) i32 {
     return x & 0o177;
 }
@@ -385,6 +603,31 @@ pub fn getdirentries64(fd: i32, buf_ptr: [*]u8, buf_len: usize, basep: *i64) usi
     return errnoWrap(@bitCast(isize, c.__getdirentries64(fd, buf_ptr, buf_len, basep)));
 }
 
+pub fn kqueue() usize {
+    return errnoWrap(c.kqueue());
+}
+
+pub fn kevent(kq: i32, changelist: []const Kevent, eventlist: []Kevent, timeout: ?*const timespec) usize {
+    return errnoWrap(c.kevent(
+        kq,
+        changelist.ptr,
+        @intCast(c_int, changelist.len),
+        eventlist.ptr,
+        @intCast(c_int, eventlist.len),
+        timeout,
+    ));
+}
+
+pub fn kevent64(
+    kq: i32,
+    changelist: []const kevent64_s,
+    eventlist: []kevent64_s,
+    flags: u32,
+    timeout: ?*const timespec,
+) usize {
+    return errnoWrap(c.kevent64(kq, changelist.ptr, changelist.len, eventlist.ptr, eventlist.len, flags, timeout));
+}
+
 pub fn mkdir(path: [*]const u8, mode: u32) usize {
     return errnoWrap(c.mkdir(path, mode));
 }
@@ -393,6 +636,18 @@ pub fn symlink(existing: [*]const u8, new: [*]const u8) usize {
     return errnoWrap(c.symlink(existing, new));
 }
 
+pub fn sysctl(name: [*]c_int, namelen: c_uint, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) usize {
+    return errnoWrap(c.sysctl(name, namelen, oldp, oldlenp, newp, newlen));
+}
+
+pub fn sysctlbyname(name: [*]const u8, oldp: ?*c_void, oldlenp: ?*usize, newp: ?*c_void, newlen: usize) usize {
+    return errnoWrap(c.sysctlbyname(name, oldp, oldlenp, newp, newlen));
+}
+
+pub fn sysctlnametomib(name: [*]const u8, mibp: ?*c_int, sizep: ?*usize) usize {
+    return errnoWrap(c.sysctlnametomib(name, wibp, sizep));
+}
+
 pub fn rename(old: [*]const u8, new: [*]const u8) usize {
     return errnoWrap(c.rename(old, new));
 }
@@ -474,6 +729,10 @@ pub const dirent = c.dirent;
 pub const sa_family_t = c.sa_family_t;
 pub const sockaddr = c.sockaddr;
 
+/// Renamed from `kevent` to `Kevent` to avoid conflict with the syscall.
+pub const Kevent = c.Kevent;
+pub const kevent64_s = c.kevent64_s;
+
 /// Renamed from `sigaction` to `Sigaction` to avoid conflict with the syscall.
 pub const Sigaction = struct {
     handler: extern fn (i32) void,
diff --git a/std/os/index.zig b/std/os/index.zig
index 0f9aea914d..79b2d2ff53 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -61,6 +61,15 @@ pub const windowsLoadDll = windows_util.windowsLoadDll;
 pub const windowsUnloadDll = windows_util.windowsUnloadDll;
 pub const createWindowsEnvBlock = windows_util.createWindowsEnvBlock;
 
+pub const WindowsCreateIoCompletionPortError = windows_util.WindowsCreateIoCompletionPortError;
+pub const windowsCreateIoCompletionPort = windows_util.windowsCreateIoCompletionPort;
+
+pub const WindowsPostQueuedCompletionStatusError = windows_util.WindowsPostQueuedCompletionStatusError;
+pub const windowsPostQueuedCompletionStatus = windows_util.windowsPostQueuedCompletionStatus;
+
+pub const WindowsWaitResult = windows_util.WindowsWaitResult;
+pub const windowsGetQueuedCompletionStatus = windows_util.windowsGetQueuedCompletionStatus;
+
 pub const WindowsWaitError = windows_util.WaitError;
 pub const WindowsOpenError = windows_util.OpenError;
 pub const WindowsWriteError = windows_util.WriteError;
@@ -2317,6 +2326,30 @@ pub fn linuxEpollWait(epfd: i32, events: []linux.epoll_event, timeout: i32) usiz
     }
 }
 
+pub const LinuxEventFdError = error{
+    InvalidFlagValue,
+    SystemResources,
+    ProcessFdQuotaExceeded,
+    SystemFdQuotaExceeded,
+
+    Unexpected,
+};
+
+pub fn linuxEventFd(initval: u32, flags: u32) LinuxEventFdError!i32 {
+    const rc = posix.eventfd(initval, flags);
+    const err = posix.getErrno(rc);
+    switch (err) {
+        0 => return @intCast(i32, rc),
+        else => return unexpectedErrorPosix(err),
+
+        posix.EINVAL => return LinuxEventFdError.InvalidFlagValue,
+        posix.EMFILE => return LinuxEventFdError.ProcessFdQuotaExceeded,
+        posix.ENFILE => return LinuxEventFdError.SystemFdQuotaExceeded,
+        posix.ENODEV => return LinuxEventFdError.SystemResources,
+        posix.ENOMEM => return LinuxEventFdError.SystemResources,
+    }
+}
+
 pub const PosixGetSockNameError = error{
     /// Insufficient resources were available in the system to perform the operation.
     SystemResources,
@@ -2576,11 +2609,17 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
                 thread: Thread,
                 inner: Context,
             };
-            extern fn threadMain(arg: windows.LPVOID) windows.DWORD {
-                if (@sizeOf(Context) == 0) {
-                    return startFn({});
-                } else {
-                    return startFn(@ptrCast(*Context, @alignCast(@alignOf(Context), arg)).*);
+            extern fn threadMain(raw_arg: windows.LPVOID) windows.DWORD {
+                const arg = if (@sizeOf(Context) == 0) {} else @ptrCast(*Context, @alignCast(@alignOf(Context), raw_arg)).*;
+                switch (@typeId(@typeOf(startFn).ReturnType)) {
+                    builtin.TypeId.Int => {
+                        return startFn(arg);
+                    },
+                    builtin.TypeId.Void => {
+                        startFn(arg);
+                        return 0;
+                    },
+                    else => @compileError("expected return type of startFn to be 'u8', 'noreturn', 'void', or '!void'"),
                 }
             }
         };
@@ -2613,10 +2652,17 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
 
     const MainFuncs = struct {
         extern fn linuxThreadMain(ctx_addr: usize) u8 {
-            if (@sizeOf(Context) == 0) {
-                return startFn({});
-            } else {
-                return startFn(@intToPtr(*const Context, ctx_addr).*);
+            const arg = if (@sizeOf(Context) == 0) {} else @intToPtr(*const Context, ctx_addr).*;
+
+            switch (@typeId(@typeOf(startFn).ReturnType)) {
+                builtin.TypeId.Int => {
+                    return startFn(arg);
+                },
+                builtin.TypeId.Void => {
+                    startFn(arg);
+                    return 0;
+                },
+                else => @compileError("expected return type of startFn to be 'u8', 'noreturn', 'void', or '!void'"),
             }
         }
         extern fn posixThreadMain(ctx: ?*c_void) ?*c_void {
@@ -2725,3 +2771,128 @@ pub fn posixFStat(fd: i32) !posix.Stat {
 
     return stat;
 }
+
+pub const CpuCountError = error{
+    OutOfMemory,
+    PermissionDenied,
+    Unexpected,
+};
+
+pub fn cpuCount(fallback_allocator: *mem.Allocator) CpuCountError!usize {
+    switch (builtin.os) {
+        builtin.Os.macosx => {
+            var count: c_int = undefined;
+            var count_len: usize = @sizeOf(c_int);
+            const rc = posix.sysctlbyname(c"hw.ncpu", @ptrCast(*c_void, &count), &count_len, null, 0);
+            const err = posix.getErrno(rc);
+            switch (err) {
+                0 => return @intCast(usize, count),
+                posix.EFAULT => unreachable,
+                posix.EINVAL => unreachable,
+                posix.ENOMEM => return CpuCountError.OutOfMemory,
+                posix.ENOTDIR => unreachable,
+                posix.EISDIR => unreachable,
+                posix.ENOENT => unreachable,
+                posix.EPERM => unreachable,
+                else => return os.unexpectedErrorPosix(err),
+            }
+        },
+        builtin.Os.linux => {
+            const usize_count = 16;
+            const allocator = std.heap.stackFallback(usize_count * @sizeOf(usize), fallback_allocator).get();
+
+            var set = try allocator.alloc(usize, usize_count);
+            defer allocator.free(set);
+
+            while (true) {
+                const rc = posix.sched_getaffinity(0, set);
+                const err = posix.getErrno(rc);
+                switch (err) {
+                    0 => {
+                        if (rc < set.len * @sizeOf(usize)) {
+                            const result = set[0 .. rc / @sizeOf(usize)];
+                            var sum: usize = 0;
+                            for (result) |x| {
+                                sum += @popCount(x);
+                            }
+                            return sum;
+                        } else {
+                            set = try allocator.realloc(usize, set, set.len * 2);
+                            continue;
+                        }
+                    },
+                    posix.EFAULT => unreachable,
+                    posix.EINVAL => unreachable,
+                    posix.EPERM => return CpuCountError.PermissionDenied,
+                    posix.ESRCH => unreachable,
+                    else => return os.unexpectedErrorPosix(err),
+                }
+            }
+        },
+        builtin.Os.windows => {
+            var system_info: windows.SYSTEM_INFO = undefined;
+            windows.GetSystemInfo(&system_info);
+            return @intCast(usize, system_info.dwNumberOfProcessors);
+        },
+        else => @compileError("unsupported OS"),
+    }
+}
+
+pub const BsdKQueueError = error{
+    /// The per-process limit on the number of open file descriptors has been reached.
+    ProcessFdQuotaExceeded,
+
+    /// The system-wide limit on the total number of open files has been reached.
+    SystemFdQuotaExceeded,
+
+    Unexpected,
+};
+
+pub fn bsdKQueue() BsdKQueueError!i32 {
+    const rc = posix.kqueue();
+    const err = posix.getErrno(rc);
+    switch (err) {
+        0 => return @intCast(i32, rc),
+        posix.EMFILE => return BsdKQueueError.ProcessFdQuotaExceeded,
+        posix.ENFILE => return BsdKQueueError.SystemFdQuotaExceeded,
+        else => return unexpectedErrorPosix(err),
+    }
+}
+
+pub const BsdKEventError = error{
+    /// The process does not have permission to register a filter.
+    AccessDenied,
+
+    /// The event could not be found to be modified or deleted.
+    EventNotFound,
+
+    /// No memory was available to register the event.
+    SystemResources,
+
+    /// The specified process to attach to does not exist.
+    ProcessNotFound,
+};
+
+pub fn bsdKEvent(
+    kq: i32,
+    changelist: []const posix.Kevent,
+    eventlist: []posix.Kevent,
+    timeout: ?*const posix.timespec,
+) BsdKEventError!usize {
+    while (true) {
+        const rc = posix.kevent(kq, changelist, eventlist, timeout);
+        const err = posix.getErrno(rc);
+        switch (err) {
+            0 => return rc,
+            posix.EACCES => return BsdKEventError.AccessDenied,
+            posix.EFAULT => unreachable,
+            posix.EBADF => unreachable,
+            posix.EINTR => continue,
+            posix.EINVAL => unreachable,
+            posix.ENOENT => return BsdKEventError.EventNotFound,
+            posix.ENOMEM => return BsdKEventError.SystemResources,
+            posix.ESRCH => return BsdKEventError.ProcessNotFound,
+            else => unreachable,
+        }
+    }
+}
diff --git a/std/os/linux/index.zig b/std/os/linux/index.zig
index 65aa659c82..69bc30bad0 100644
--- a/std/os/linux/index.zig
+++ b/std/os/linux/index.zig
@@ -523,6 +523,10 @@ pub const CLONE_NEWPID = 0x20000000;
 pub const CLONE_NEWNET = 0x40000000;
 pub const CLONE_IO = 0x80000000;
 
+pub const EFD_SEMAPHORE = 1;
+pub const EFD_CLOEXEC = O_CLOEXEC;
+pub const EFD_NONBLOCK = O_NONBLOCK;
+
 pub const MS_RDONLY = 1;
 pub const MS_NOSUID = 2;
 pub const MS_NODEV = 4;
@@ -1193,6 +1197,10 @@ pub fn fremovexattr(fd: usize, name: [*]const u8) usize {
     return syscall2(SYS_fremovexattr, fd, @ptrToInt(name));
 }
 
+pub fn sched_getaffinity(pid: i32, set: []usize) usize {
+    return syscall3(SYS_sched_getaffinity, @bitCast(usize, isize(pid)), set.len * @sizeOf(usize), @ptrToInt(set.ptr));
+}
+
 pub const epoll_data = packed union {
     ptr: usize,
     fd: i32,
@@ -1221,6 +1229,10 @@ pub fn epoll_wait(epoll_fd: i32, events: [*]epoll_event, maxevents: u32, timeout
     return syscall4(SYS_epoll_wait, @intCast(usize, epoll_fd), @ptrToInt(events), @intCast(usize, maxevents), @intCast(usize, timeout));
 }
 
+pub fn eventfd(count: u32, flags: u32) usize {
+    return syscall2(SYS_eventfd2, count, flags);
+}
+
 pub fn timerfd_create(clockid: i32, flags: u32) usize {
     return syscall2(SYS_timerfd_create, @intCast(usize, clockid), @intCast(usize, flags));
 }
diff --git a/std/os/test.zig b/std/os/test.zig
index 5a977a569a..52e6ffdc1c 100644
--- a/std/os/test.zig
+++ b/std/os/test.zig
@@ -58,3 +58,8 @@ fn start2(ctx: *i32) u8 {
     _ = @atomicRmw(i32, ctx, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
     return 0;
 }
+
+test "cpu count" {
+    const cpu_count = try std.os.cpuCount(a);
+    assert(cpu_count >= 1);
+}
diff --git a/std/os/windows/index.zig b/std/os/windows/index.zig
index d631c6adbf..f73b8ec261 100644
--- a/std/os/windows/index.zig
+++ b/std/os/windows/index.zig
@@ -59,6 +59,9 @@ pub extern "kernel32" stdcallcc fn CreateSymbolicLinkA(
     dwFlags: DWORD,
 ) BOOLEAN;
 
+
+pub extern "kernel32" stdcallcc fn CreateIoCompletionPort(FileHandle: HANDLE, ExistingCompletionPort: ?HANDLE, CompletionKey: ULONG_PTR, NumberOfConcurrentThreads: DWORD) ?HANDLE;
+
 pub extern "kernel32" stdcallcc fn CreateThread(lpThreadAttributes: ?LPSECURITY_ATTRIBUTES, dwStackSize: SIZE_T, lpStartAddress: LPTHREAD_START_ROUTINE, lpParameter: ?LPVOID, dwCreationFlags: DWORD, lpThreadId: ?LPDWORD) ?HANDLE;
 
 pub extern "kernel32" stdcallcc fn DeleteFileA(lpFileName: LPCSTR) BOOL;
@@ -106,7 +109,9 @@ pub extern "kernel32" stdcallcc fn GetFinalPathNameByHandleA(
 ) DWORD;
 
 pub extern "kernel32" stdcallcc fn GetProcessHeap() ?HANDLE;
+pub extern "kernel32" stdcallcc fn GetQueuedCompletionStatus(CompletionPort: HANDLE, lpNumberOfBytesTransferred: LPDWORD, lpCompletionKey: *ULONG_PTR, lpOverlapped: *?*OVERLAPPED, dwMilliseconds: DWORD) BOOL;
 
+pub extern "kernel32" stdcallcc fn GetSystemInfo(lpSystemInfo: *SYSTEM_INFO) void;
 pub extern "kernel32" stdcallcc fn GetSystemTimeAsFileTime(*FILETIME) void;
 
 pub extern "kernel32" stdcallcc fn HeapCreate(flOptions: DWORD, dwInitialSize: SIZE_T, dwMaximumSize: SIZE_T) ?HANDLE;
@@ -129,6 +134,9 @@ pub extern "kernel32" stdcallcc fn MoveFileExA(
     dwFlags: DWORD,
 ) BOOL;
 
+
+pub extern "kernel32" stdcallcc fn PostQueuedCompletionStatus(CompletionPort: HANDLE, dwNumberOfBytesTransferred: DWORD, dwCompletionKey: ULONG_PTR, lpOverlapped: ?*OVERLAPPED) BOOL;
+
 pub extern "kernel32" stdcallcc fn QueryPerformanceCounter(lpPerformanceCount: *LARGE_INTEGER) BOOL;
 
 pub extern "kernel32" stdcallcc fn QueryPerformanceFrequency(lpFrequency: *LARGE_INTEGER) BOOL;
@@ -204,6 +212,7 @@ pub const SIZE_T = usize;
 pub const TCHAR = if (UNICODE) WCHAR else u8;
 pub const UINT = c_uint;
 pub const ULONG_PTR = usize;
+pub const DWORD_PTR = ULONG_PTR;
 pub const UNICODE = false;
 pub const WCHAR = u16;
 pub const WORD = u16;
@@ -413,3 +422,22 @@ pub const FILETIME = extern struct {
     dwLowDateTime: DWORD,
     dwHighDateTime: DWORD,
 };
+
+pub const SYSTEM_INFO = extern struct {
+    anon1: extern union {
+        dwOemId: DWORD,
+        anon2: extern struct {
+            wProcessorArchitecture: WORD,
+            wReserved: WORD,
+        },
+    },
+    dwPageSize: DWORD,
+    lpMinimumApplicationAddress: LPVOID,
+    lpMaximumApplicationAddress: LPVOID,
+    dwActiveProcessorMask: DWORD_PTR,
+    dwNumberOfProcessors: DWORD,
+    dwProcessorType: DWORD,
+    dwAllocationGranularity: DWORD,
+    wProcessorLevel: WORD,
+    wProcessorRevision: WORD,
+};
diff --git a/std/os/windows/util.zig b/std/os/windows/util.zig
index 45b205451d..b04e8efc4b 100644
--- a/std/os/windows/util.zig
+++ b/std/os/windows/util.zig
@@ -214,3 +214,50 @@ pub fn windowsFindNextFile(handle: windows.HANDLE, find_file_data: *windows.WIN3
     }
     return true;
 }
+
+
+pub const WindowsCreateIoCompletionPortError = error {
+    Unexpected,
+};
+
+pub fn windowsCreateIoCompletionPort(file_handle: windows.HANDLE, existing_completion_port: ?windows.HANDLE, completion_key: usize, concurrent_thread_count: windows.DWORD) !windows.HANDLE {
+    const handle = windows.CreateIoCompletionPort(file_handle, existing_completion_port, completion_key, concurrent_thread_count) orelse {
+        const err = windows.GetLastError();
+        switch (err) {
+            else => return os.unexpectedErrorWindows(err),
+        }
+    };
+    return handle;
+}
+
+pub const WindowsPostQueuedCompletionStatusError = error {
+    Unexpected,
+};
+
+pub fn windowsPostQueuedCompletionStatus(completion_port: windows.HANDLE, bytes_transferred_count: windows.DWORD, completion_key: usize, lpOverlapped: ?*windows.OVERLAPPED) WindowsPostQueuedCompletionStatusError!void {
+    if (windows.PostQueuedCompletionStatus(completion_port, bytes_transferred_count, completion_key, lpOverlapped) == 0) {
+        const err = windows.GetLastError();
+        switch (err) {
+            else => return os.unexpectedErrorWindows(err),
+        }
+    }
+}
+
+pub const WindowsWaitResult = error {
+    Normal,
+    Aborted,
+};
+
+pub fn windowsGetQueuedCompletionStatus(completion_port: windows.HANDLE, bytes_transferred_count: *windows.DWORD, lpCompletionKey: *usize, lpOverlapped: *?*windows.OVERLAPPED, dwMilliseconds: windows.DWORD) WindowsWaitResult {
+    if (windows.GetQueuedCompletionStatus(completion_port, bytes_transferred_count, lpCompletionKey, lpOverlapped, dwMilliseconds) == windows.FALSE) {
+        if (std.debug.runtime_safety) {
+            const err = windows.GetLastError();
+            if (err != windows.ERROR.ABANDONED_WAIT_0) {
+                std.debug.warn("err: {}\n", err);
+            }
+            assert(err == windows.ERROR.ABANDONED_WAIT_0);
+        }
+        return WindowsWaitResult.Aborted;
+    }
+    return WindowsWaitResult.Normal;
+}
diff --git a/std/special/compiler_rt/extendXfYf2_test.zig b/std/special/compiler_rt/extendXfYf2_test.zig
index 185c83a0ef..9969607011 100644
--- a/std/special/compiler_rt/extendXfYf2_test.zig
+++ b/std/special/compiler_rt/extendXfYf2_test.zig
@@ -31,7 +31,7 @@ fn test__extendhfsf2(a: u16, expected: u32) void {
 
     if (rep == expected) {
         if (rep & 0x7fffffff > 0x7f800000) {
-            return;  // NaN is always unequal.
+            return; // NaN is always unequal.
         }
         if (x == @bitCast(f32, expected)) {
             return;
@@ -86,33 +86,33 @@ test "extenddftf2" {
 }
 
 test "extendhfsf2" {
-    test__extendhfsf2(0x7e00, 0x7fc00000);  // qNaN
-    test__extendhfsf2(0x7f00, 0x7fe00000);  // sNaN
-    test__extendhfsf2(0x7c01, 0x7f802000);  // sNaN
+    test__extendhfsf2(0x7e00, 0x7fc00000); // qNaN
+    test__extendhfsf2(0x7f00, 0x7fe00000); // sNaN
+    test__extendhfsf2(0x7c01, 0x7f802000); // sNaN
 
-    test__extendhfsf2(0, 0);  // 0
-    test__extendhfsf2(0x8000, 0x80000000);  // -0
+    test__extendhfsf2(0, 0); // 0
+    test__extendhfsf2(0x8000, 0x80000000); // -0
 
-    test__extendhfsf2(0x7c00, 0x7f800000);  // inf
-    test__extendhfsf2(0xfc00, 0xff800000);  // -inf
+    test__extendhfsf2(0x7c00, 0x7f800000); // inf
+    test__extendhfsf2(0xfc00, 0xff800000); // -inf
 
-    test__extendhfsf2(0x0001, 0x33800000);  // denormal (min), 2**-24
-    test__extendhfsf2(0x8001, 0xb3800000);  // denormal (min), -2**-24
+    test__extendhfsf2(0x0001, 0x33800000); // denormal (min), 2**-24
+    test__extendhfsf2(0x8001, 0xb3800000); // denormal (min), -2**-24
 
-    test__extendhfsf2(0x03ff, 0x387fc000);  // denormal (max), 2**-14 - 2**-24
-    test__extendhfsf2(0x83ff, 0xb87fc000);  // denormal (max), -2**-14 + 2**-24
+    test__extendhfsf2(0x03ff, 0x387fc000); // denormal (max), 2**-14 - 2**-24
+    test__extendhfsf2(0x83ff, 0xb87fc000); // denormal (max), -2**-14 + 2**-24
 
-    test__extendhfsf2(0x0400, 0x38800000);  // normal (min), 2**-14
-    test__extendhfsf2(0x8400, 0xb8800000);  // normal (min), -2**-14
+    test__extendhfsf2(0x0400, 0x38800000); // normal (min), 2**-14
+    test__extendhfsf2(0x8400, 0xb8800000); // normal (min), -2**-14
 
-    test__extendhfsf2(0x7bff, 0x477fe000);  // normal (max), 65504
-    test__extendhfsf2(0xfbff, 0xc77fe000);  // normal (max), -65504
+    test__extendhfsf2(0x7bff, 0x477fe000); // normal (max), 65504
+    test__extendhfsf2(0xfbff, 0xc77fe000); // normal (max), -65504
 
-    test__extendhfsf2(0x3c01, 0x3f802000);  // normal, 1 + 2**-10
-    test__extendhfsf2(0xbc01, 0xbf802000);  // normal, -1 - 2**-10
+    test__extendhfsf2(0x3c01, 0x3f802000); // normal, 1 + 2**-10
+    test__extendhfsf2(0xbc01, 0xbf802000); // normal, -1 - 2**-10
 
-    test__extendhfsf2(0x3555, 0x3eaaa000);  // normal, approx. 1/3
-    test__extendhfsf2(0xb555, 0xbeaaa000);  // normal, approx. -1/3
+    test__extendhfsf2(0x3555, 0x3eaaa000); // normal, approx. 1/3
+    test__extendhfsf2(0xb555, 0xbeaaa000); // normal, approx. -1/3
 }
 
 test "extendsftf2" {
author	Andrew Kelley <superjoe30@gmail.com>	2018-07-09 22:06:47 -0400
committer	GitHub <noreply@github.com>	2018-07-09 22:06:47 -0400
commit	ccef60a64033a25dbe2351c27f28257546b2ae5b (patch)
tree	67390c7e43f9852cf3786f2eed35ebf04e15510d
parent	10cc49db1ca1f9b3ac63277c0742e05f6412f3c6 (diff)
parent	c89aac85c440ea4cbccf1abdbd6acf84a33077e3 (diff)
download	zig-ccef60a64033a25dbe2351c27f28257546b2ae5b.tar.gz zig-ccef60a64033a25dbe2351c27f28257546b2ae5b.zip