From 9395162a7c41689bcd1c0c48f9eabffc1485fc74 Mon Sep 17 00:00:00 2001
From: Isaac Hier <ihier@uber.com>
Date: Mon, 2 Jul 2018 16:56:40 -0400
Subject: Debug enum issue

---
 src/ir.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'src/ir.cpp')

diff --git a/src/ir.cpp b/src/ir.cpp
index b40c2dc36d..c16f3c09b8 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19149,6 +19149,7 @@ static TypeTableEntry *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira
             if (!end_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
+            printf("%s\n", buf_ptr(&start_val->type->name));
             assert(start_val->type->id == TypeTableEntryIdInt || start_val->type->id == TypeTableEntryIdComptimeInt);
             assert(end_val->type->id == TypeTableEntryIdInt || end_val->type->id == TypeTableEntryIdComptimeInt);
             AstNode *prev_node = rangeset_add_range(&rs, &start_val->data.x_bigint, &end_val->data.x_bigint,
-- 
cgit v1.2.3


From 9cff23dbf9ff3da716a1c4397f9411eba09f6cac Mon Sep 17 00:00:00 2001
From: Isaac Hier <isaachier@gmail.com>
Date: Wed, 4 Jul 2018 13:27:10 -0400
Subject: Fix assertion crash on enum switch values

---
 src/ir.cpp                              |  7 ++++++-
 test/behavior.zig                       |  1 -
 test/cases/switch_usize_enum_prongs.zig | 11 -----------
 test/compile_errors.zig                 | 18 ++++++++++++++++++
 4 files changed, 24 insertions(+), 13 deletions(-)
 delete mode 100644 test/cases/switch_usize_enum_prongs.zig

(limited to 'src/ir.cpp')

diff --git a/src/ir.cpp b/src/ir.cpp
index c16f3c09b8..37d673bbd7 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19149,9 +19149,14 @@ static TypeTableEntry *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira
             if (!end_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
-            printf("%s\n", buf_ptr(&start_val->type->name));
+            if (start_val->type->id == TypeTableEntryIdEnum)
+                return ira->codegen->builtin_types.entry_invalid;
             assert(start_val->type->id == TypeTableEntryIdInt || start_val->type->id == TypeTableEntryIdComptimeInt);
+
+            if (end_val->type->id == TypeTableEntryIdEnum)
+                return ira->codegen->builtin_types.entry_invalid;
             assert(end_val->type->id == TypeTableEntryIdInt || end_val->type->id == TypeTableEntryIdComptimeInt);
+
             AstNode *prev_node = rangeset_add_range(&rs, &start_val->data.x_bigint, &end_val->data.x_bigint,
                     start_value->source_node);
             if (prev_node != nullptr) {
diff --git a/test/behavior.zig b/test/behavior.zig
index 803d4a5a08..d47eb8fd6c 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -52,7 +52,6 @@ comptime {
     _ = @import("cases/switch.zig");
     _ = @import("cases/switch_prong_err_enum.zig");
     _ = @import("cases/switch_prong_implicit_cast.zig");
-    _ = @import("cases/switch_usize_enum_prongs.zig");
     _ = @import("cases/syntax.zig");
     _ = @import("cases/this.zig");
     _ = @import("cases/try.zig");
diff --git a/test/cases/switch_usize_enum_prongs.zig b/test/cases/switch_usize_enum_prongs.zig
deleted file mode 100644
index b49615e887..0000000000
--- a/test/cases/switch_usize_enum_prongs.zig
+++ /dev/null
@@ -1,11 +0,0 @@
-const E = enum(usize) { One, Two };
-
-test "aoeou" {
-    foo(1);
-}
-
-fn foo(x: usize) void {
-    switch (x) {
-        E.One => {},
-    }
-}
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 7291a48a8f..8bd5480395 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -358,6 +358,24 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         ".tmp_source.zig:3:14: note: other value is here",
     );
 
+
+    cases.add(
+        "invalid cast from integral type to enum",
+        \\const E = enum(usize) { One, Two };
+        \\
+        \\export fn entry() void {
+        \\    foo(1);
+        \\}
+        \\
+        \\fn foo(x: usize) void {
+        \\    switch (x) {
+        \\        E.One => {},
+        \\    }
+        \\}
+    ,
+        ".tmp_source.zig:9:10: error: expected type 'usize', found 'E'"
+    );
+
     cases.add(
         "range operator in switch used on error set",
         \\export fn entry() void {
-- 
cgit v1.2.3


From 1a5bd8888174ef2eb1881c1dd81d418b44625cc7 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 6 Jul 2018 12:03:07 -0400
Subject: alternate implementation of previous commit

This strategy adds another field to the SwitchBr instruction,
which is the result of the CheckSwitchProngs instruction. The
type of the result is void, and is unused, except that the SwitchBr
instruction will not perform analysis if the CheckSwitchProngs
instruction did not pass analysis. This allows the CheckSwitchProngs
instruction to do implicit casting for its type checking, while
preventing duplicate compile error messages.
---
 src/all_types.hpp |  1 +
 src/ir.cpp        | 44 +++++++++++++++++++++++++++-----------------
 2 files changed, 28 insertions(+), 17 deletions(-)

(limited to 'src/ir.cpp')

diff --git a/src/all_types.hpp b/src/all_types.hpp
index 5d449491c8..4d97be468c 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -2193,6 +2193,7 @@ struct IrInstructionSwitchBr {
     size_t case_count;
     IrInstructionSwitchBrCase *cases;
     IrInstruction *is_comptime;
+    IrInstruction *switch_prongs_void;
 };
 
 struct IrInstructionSwitchVar {
diff --git a/src/ir.cpp b/src/ir.cpp
index 37d673bbd7..204ebb332a 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -1719,7 +1719,8 @@ static IrInstruction *ir_build_ctz_from(IrBuilder *irb, IrInstruction *old_instr
 }
 
 static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *target_value,
-        IrBasicBlock *else_block, size_t case_count, IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime)
+        IrBasicBlock *else_block, size_t case_count, IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime,
+        IrInstruction *switch_prongs_void)
 {
     IrInstructionSwitchBr *instruction = ir_build_instruction<IrInstructionSwitchBr>(irb, scope, source_node);
     instruction->base.value.type = irb->codegen->builtin_types.entry_unreachable;
@@ -1729,10 +1730,12 @@ static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *
     instruction->case_count = case_count;
     instruction->cases = cases;
     instruction->is_comptime = is_comptime;
+    instruction->switch_prongs_void = switch_prongs_void;
 
     ir_ref_instruction(target_value, irb->current_basic_block);
     if (is_comptime) ir_ref_instruction(is_comptime, irb->current_basic_block);
     ir_ref_bb(else_block);
+    if (switch_prongs_void) ir_ref_instruction(switch_prongs_void, irb->current_basic_block);
 
     for (size_t i = 0; i < case_count; i += 1) {
         ir_ref_instruction(cases[i].value, irb->current_basic_block);
@@ -1744,10 +1747,10 @@ static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *
 
 static IrInstruction *ir_build_switch_br_from(IrBuilder *irb, IrInstruction *old_instruction,
         IrInstruction *target_value, IrBasicBlock *else_block, size_t case_count,
-        IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime)
+        IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime, IrInstruction *switch_prongs_void)
 {
     IrInstruction *new_instruction = ir_build_switch_br(irb, old_instruction->scope, old_instruction->source_node,
-            target_value, else_block, case_count, cases, is_comptime);
+            target_value, else_block, case_count, cases, is_comptime, switch_prongs_void);
     ir_link_new_instruction(new_instruction, old_instruction);
     return new_instruction;
 }
@@ -6035,13 +6038,13 @@ static IrInstruction *ir_gen_switch_expr(IrBuilder *irb, Scope *scope, AstNode *
 
     }
 
-    ir_build_check_switch_prongs(irb, scope, node, target_value, check_ranges.items, check_ranges.length,
+    IrInstruction *switch_prongs_void = ir_build_check_switch_prongs(irb, scope, node, target_value, check_ranges.items, check_ranges.length,
             else_prong != nullptr);
 
     if (cases.length == 0) {
         ir_build_br(irb, scope, node, else_block, is_comptime);
     } else {
-        ir_build_switch_br(irb, scope, node, target_value, else_block, cases.length, cases.items, is_comptime);
+        ir_build_switch_br(irb, scope, node, target_value, else_block, cases.length, cases.items, is_comptime, switch_prongs_void);
     }
 
     if (!else_prong) {
@@ -6692,7 +6695,7 @@ static IrInstruction *ir_gen_await_expr(IrBuilder *irb, Scope *parent_scope, Ast
     cases[1].value = ir_build_const_u8(irb, parent_scope, node, 1);
     cases[1].block = cleanup_block;
     ir_build_switch_br(irb, parent_scope, node, suspend_code, irb->exec->coro_suspend_block,
-            2, cases, const_bool_false);
+            2, cases, const_bool_false, nullptr);
 
     ir_set_cursor_at_end_and_append_block(irb, cleanup_block);
     ir_gen_defers_for_block(irb, parent_scope, outer_scope, true);
@@ -6773,7 +6776,7 @@ static IrInstruction *ir_gen_suspend(IrBuilder *irb, Scope *parent_scope, AstNod
     cases[1].value = ir_mark_gen(ir_build_const_u8(irb, parent_scope, node, 1));
     cases[1].block = cleanup_block;
     ir_mark_gen(ir_build_switch_br(irb, parent_scope, node, suspend_code, irb->exec->coro_suspend_block,
-            2, cases, const_bool_false));
+            2, cases, const_bool_false, nullptr));
 
     ir_set_cursor_at_end_and_append_block(irb, cleanup_block);
     ir_gen_defers_for_block(irb, parent_scope, outer_scope, true);
@@ -7078,7 +7081,7 @@ bool ir_gen(CodeGen *codegen, AstNode *node, Scope *scope, IrExecutable *ir_exec
         cases[0].block = invalid_resume_block;
         cases[1].value = ir_build_const_u8(irb, scope, node, 1);
         cases[1].block = irb->exec->coro_final_cleanup_block;
-        ir_build_switch_br(irb, scope, node, suspend_code, irb->exec->coro_suspend_block, 2, cases, const_bool_false);
+        ir_build_switch_br(irb, scope, node, suspend_code, irb->exec->coro_suspend_block, 2, cases, const_bool_false, nullptr);
 
         ir_set_cursor_at_end_and_append_block(irb, irb->exec->coro_suspend_block);
         ir_build_coro_end(irb, scope, node);
@@ -15297,6 +15300,13 @@ static TypeTableEntry *ir_analyze_instruction_switch_br(IrAnalyze *ira,
     if (type_is_invalid(target_value->value.type))
         return ir_unreach_error(ira);
 
+    if (switch_br_instruction->switch_prongs_void != nullptr) {
+        if (type_is_invalid(switch_br_instruction->switch_prongs_void->other->value.type)) {
+            return ir_unreach_error(ira);
+        }
+    }
+
+
     size_t case_count = switch_br_instruction->case_count;
 
     bool is_comptime;
@@ -15387,7 +15397,7 @@ static TypeTableEntry *ir_analyze_instruction_switch_br(IrAnalyze *ira,
 
     IrBasicBlock *new_else_block = ir_get_new_bb(ira, switch_br_instruction->else_block, &switch_br_instruction->base);
     ir_build_switch_br_from(&ira->new_irb, &switch_br_instruction->base,
-            target_value, new_else_block, case_count, cases, nullptr);
+            target_value, new_else_block, case_count, cases, nullptr, nullptr);
     return ir_finish_anal(ira, ira->codegen->builtin_types.entry_unreachable);
 }
 
@@ -19136,27 +19146,27 @@ static TypeTableEntry *ir_analyze_instruction_check_switch_prongs(IrAnalyze *ira
             IrInstruction *start_value = range->start->other;
             if (type_is_invalid(start_value->value.type))
                 return ira->codegen->builtin_types.entry_invalid;
+            IrInstruction *casted_start_value = ir_implicit_cast(ira, start_value, switch_type);
+            if (type_is_invalid(casted_start_value->value.type))
+                return ira->codegen->builtin_types.entry_invalid;
 
             IrInstruction *end_value = range->end->other;
             if (type_is_invalid(end_value->value.type))
                 return ira->codegen->builtin_types.entry_invalid;
+            IrInstruction *casted_end_value = ir_implicit_cast(ira, end_value, switch_type);
+            if (type_is_invalid(casted_end_value->value.type))
+                return ira->codegen->builtin_types.entry_invalid;
 
-            ConstExprValue *start_val = ir_resolve_const(ira, start_value, UndefBad);
+            ConstExprValue *start_val = ir_resolve_const(ira, casted_start_value, UndefBad);
             if (!start_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
-            ConstExprValue *end_val = ir_resolve_const(ira, end_value, UndefBad);
+            ConstExprValue *end_val = ir_resolve_const(ira, casted_end_value, UndefBad);
             if (!end_val)
                 return ira->codegen->builtin_types.entry_invalid;
 
-            if (start_val->type->id == TypeTableEntryIdEnum)
-                return ira->codegen->builtin_types.entry_invalid;
             assert(start_val->type->id == TypeTableEntryIdInt || start_val->type->id == TypeTableEntryIdComptimeInt);
-
-            if (end_val->type->id == TypeTableEntryIdEnum)
-                return ira->codegen->builtin_types.entry_invalid;
             assert(end_val->type->id == TypeTableEntryIdInt || end_val->type->id == TypeTableEntryIdComptimeInt);
-
             AstNode *prev_node = rangeset_add_range(&rs, &start_val->data.x_bigint, &end_val->data.x_bigint,
                     start_value->source_node);
             if (prev_node != nullptr) {
-- 
cgit v1.2.3


From 1cf7511dc9d449473748675a5e734e81ea7c85c2 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 6 Jul 2018 16:20:46 -0400
Subject: add compile error notes for where struct definitions are

closes #1202
---
 src/analyze.cpp         | 37 +++++++++++++++++++++++++++++++++++++
 src/analyze.hpp         |  1 +
 src/ir.cpp              | 47 ++++++++++++++++++++++++++++++++++++-----------
 test/compile_errors.zig | 42 ++++++++++++++++++++++++++++++++++++------
 4 files changed, 110 insertions(+), 17 deletions(-)

(limited to 'src/ir.cpp')

diff --git a/src/analyze.cpp b/src/analyze.cpp
index ca582dfc4c..643a85634e 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -212,6 +212,43 @@ static uint8_t bits_needed_for_unsigned(uint64_t x) {
     return (upper >= x) ? base : (base + 1);
 }
 
+AstNode *type_decl_node(TypeTableEntry *type_entry) {
+    switch (type_entry->id) {
+        case TypeTableEntryIdInvalid:
+            zig_unreachable();
+        case TypeTableEntryIdStruct:
+            return type_entry->data.structure.decl_node;
+        case TypeTableEntryIdEnum:
+            return type_entry->data.enumeration.decl_node;
+        case TypeTableEntryIdUnion:
+            return type_entry->data.unionation.decl_node;
+        case TypeTableEntryIdOpaque:
+        case TypeTableEntryIdMetaType:
+        case TypeTableEntryIdVoid:
+        case TypeTableEntryIdBool:
+        case TypeTableEntryIdUnreachable:
+        case TypeTableEntryIdInt:
+        case TypeTableEntryIdFloat:
+        case TypeTableEntryIdPointer:
+        case TypeTableEntryIdArray:
+        case TypeTableEntryIdComptimeFloat:
+        case TypeTableEntryIdComptimeInt:
+        case TypeTableEntryIdUndefined:
+        case TypeTableEntryIdNull:
+        case TypeTableEntryIdOptional:
+        case TypeTableEntryIdErrorUnion:
+        case TypeTableEntryIdErrorSet:
+        case TypeTableEntryIdFn:
+        case TypeTableEntryIdNamespace:
+        case TypeTableEntryIdBlock:
+        case TypeTableEntryIdBoundFn:
+        case TypeTableEntryIdArgTuple:
+        case TypeTableEntryIdPromise:
+            return nullptr;
+    }
+    zig_unreachable();
+}
+
 bool type_is_complete(TypeTableEntry *type_entry) {
     switch (type_entry->id) {
         case TypeTableEntryIdInvalid:
diff --git a/src/analyze.hpp b/src/analyze.hpp
index c2730197e2..5168509fe0 100644
--- a/src/analyze.hpp
+++ b/src/analyze.hpp
@@ -202,5 +202,6 @@ uint32_t get_coro_frame_align_bytes(CodeGen *g);
 bool fn_type_can_fail(FnTypeId *fn_type_id);
 bool type_can_fail(TypeTableEntry *type_entry);
 bool fn_eval_cacheable(Scope *scope, TypeTableEntry *return_type);
+AstNode *type_decl_node(TypeTableEntry *type_entry);
 
 #endif
diff --git a/src/ir.cpp b/src/ir.cpp
index 204ebb332a..3ad7c77645 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -82,6 +82,7 @@ struct ConstCastSliceMismatch;
 struct ConstCastErrUnionErrSetMismatch;
 struct ConstCastErrUnionPayloadMismatch;
 struct ConstCastErrSetMismatch;
+struct ConstCastTypeMismatch;
 
 struct ConstCastOnly {
     ConstCastResultId id;
@@ -92,6 +93,7 @@ struct ConstCastOnly {
         ConstCastOptionalMismatch *optional;
         ConstCastErrUnionPayloadMismatch *error_union_payload;
         ConstCastErrUnionErrSetMismatch *error_union_error_set;
+        ConstCastTypeMismatch *type_mismatch;
         ConstCastOnly *return_type;
         ConstCastOnly *async_allocator_type;
         ConstCastOnly *null_wrap_ptr_child;
@@ -100,6 +102,11 @@ struct ConstCastOnly {
     } data;
 };
 
+struct ConstCastTypeMismatch {
+    TypeTableEntry *wanted_type;
+    TypeTableEntry *actual_type;
+};
+
 struct ConstCastOptionalMismatch {
     ConstCastOnly child;
     TypeTableEntry *wanted_child;
@@ -8128,15 +8135,7 @@ static ConstCastOnly types_match_const_cast_only(IrAnalyze *ira, TypeTableEntry
     }
 
     // pointer const
-    if (wanted_type->id == TypeTableEntryIdPointer &&
-        actual_type->id == TypeTableEntryIdPointer &&
-        (actual_type->data.pointer.ptr_len == wanted_type->data.pointer.ptr_len) &&
-        (!actual_type->data.pointer.is_const || wanted_type->data.pointer.is_const) &&
-        (!actual_type->data.pointer.is_volatile || wanted_type->data.pointer.is_volatile) &&
-        actual_type->data.pointer.bit_offset == wanted_type->data.pointer.bit_offset &&
-        actual_type->data.pointer.unaligned_bit_count == wanted_type->data.pointer.unaligned_bit_count &&
-        actual_type->data.pointer.alignment >= wanted_type->data.pointer.alignment)
-    {
+    if (wanted_type->id == TypeTableEntryIdPointer && actual_type->id == TypeTableEntryIdPointer) {
         ConstCastOnly child = types_match_const_cast_only(ira, wanted_type->data.pointer.child_type,
                 actual_type->data.pointer.child_type, source_node, !wanted_type->data.pointer.is_const);
         if (child.id != ConstCastResultIdOk) {
@@ -8145,8 +8144,17 @@ static ConstCastOnly types_match_const_cast_only(IrAnalyze *ira, TypeTableEntry
             result.data.pointer_mismatch->child = child;
             result.data.pointer_mismatch->wanted_child = wanted_type->data.pointer.child_type;
             result.data.pointer_mismatch->actual_child = actual_type->data.pointer.child_type;
+            return result;
+        }
+        if ((actual_type->data.pointer.ptr_len == wanted_type->data.pointer.ptr_len) &&
+            (!actual_type->data.pointer.is_const || wanted_type->data.pointer.is_const) &&
+            (!actual_type->data.pointer.is_volatile || wanted_type->data.pointer.is_volatile) &&
+            actual_type->data.pointer.bit_offset == wanted_type->data.pointer.bit_offset &&
+            actual_type->data.pointer.unaligned_bit_count == wanted_type->data.pointer.unaligned_bit_count &&
+            actual_type->data.pointer.alignment >= wanted_type->data.pointer.alignment)
+        {
+            return result;
         }
-        return result;
     }
 
     // slice const
@@ -8341,6 +8349,9 @@ static ConstCastOnly types_match_const_cast_only(IrAnalyze *ira, TypeTableEntry
     }
 
     result.id = ConstCastResultIdType;
+    result.data.type_mismatch = allocate_nonzero<ConstCastTypeMismatch>(1);
+    result.data.type_mismatch->wanted_type = wanted_type;
+    result.data.type_mismatch->actual_type = actual_type;
     return result;
 }
 
@@ -10154,6 +10165,21 @@ static void report_recursive_error(IrAnalyze *ira, AstNode *source_node, ConstCa
             report_recursive_error(ira, source_node, &cast_result->data.error_union_payload->child, msg);
             break;
         }
+        case ConstCastResultIdType: {
+            AstNode *wanted_decl_node = type_decl_node(cast_result->data.type_mismatch->wanted_type);
+            AstNode *actual_decl_node = type_decl_node(cast_result->data.type_mismatch->actual_type);
+            if (wanted_decl_node != nullptr) {
+                add_error_note(ira->codegen, parent_msg, wanted_decl_node,
+                    buf_sprintf("%s declared here",
+                        buf_ptr(&cast_result->data.type_mismatch->wanted_type->name)));
+            }
+            if (actual_decl_node != nullptr) {
+                add_error_note(ira->codegen, parent_msg, actual_decl_node,
+                    buf_sprintf("%s declared here",
+                        buf_ptr(&cast_result->data.type_mismatch->actual_type->name)));
+            }
+            break;
+        }
         case ConstCastResultIdFnAlign: // TODO
         case ConstCastResultIdFnCC: // TODO
         case ConstCastResultIdFnVarArgs: // TODO
@@ -10163,7 +10189,6 @@ static void report_recursive_error(IrAnalyze *ira, AstNode *source_node, ConstCa
         case ConstCastResultIdFnGenericArgCount: // TODO
         case ConstCastResultIdFnArg: // TODO
         case ConstCastResultIdFnArgNoAlias: // TODO
-        case ConstCastResultIdType: // TODO
         case ConstCastResultIdUnresolvedInferredErrSet: // TODO
         case ConstCastResultIdAsyncAllocatorType: // TODO
         case ConstCastResultIdNullWrapPtr: // TODO
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 8bd5480395..d508c7c36c 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,40 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.addCase(x: {
+        const tc = cases.create(
+            "wrong same named struct",
+            \\const a = @import("a.zig");
+            \\const b = @import("b.zig");
+            \\
+            \\export fn entry() void {
+            \\    var a1: a.Foo = undefined;
+            \\    bar(&a1);
+            \\}
+            \\
+            \\fn bar(x: *b.Foo) void {}
+        ,
+            ".tmp_source.zig:6:10: error: expected type '*Foo', found '*Foo'",
+            ".tmp_source.zig:6:10: note: pointer type child 'Foo' cannot cast into pointer type child 'Foo'",
+            "a.zig:1:17: note: Foo declared here",
+            "b.zig:1:17: note: Foo declared here",
+        );
+
+        tc.addSourceFile("a.zig",
+            \\pub const Foo = struct {
+            \\    x: i32,
+            \\};
+        );
+
+        tc.addSourceFile("b.zig",
+            \\pub const Foo = struct {
+            \\    z: f64,
+            \\};
+        );
+
+        break :x tc;
+    });
+
     cases.add(
         "enum field value references enum",
         \\pub const Foo = extern enum {
@@ -358,9 +392,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         ".tmp_source.zig:3:14: note: other value is here",
     );
 
-
-    cases.add(
-        "invalid cast from integral type to enum",
+    cases.add("invalid cast from integral type to enum",
         \\const E = enum(usize) { One, Two };
         \\
         \\export fn entry() void {
@@ -372,9 +404,7 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
         \\        E.One => {},
         \\    }
         \\}
-    ,
-        ".tmp_source.zig:9:10: error: expected type 'usize', found 'E'"
-    );
+    , ".tmp_source.zig:9:10: error: expected type 'usize', found 'E'");
 
     cases.add(
         "range operator in switch used on error set",
-- 
cgit v1.2.3


From d8295c188946b0f07d62420c2f08c940f70b03ac Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Sat, 7 Jul 2018 00:25:32 -0400
Subject: add @popCount intrinsic

---
 doc/langref.html.in     | 15 +++++++++--
 src/all_types.hpp       | 12 +++++++++
 src/analyze.cpp         |  4 +++
 src/bigint.cpp          | 31 ++++++++++++++++++++++
 src/bigint.hpp          |  2 ++
 src/codegen.cpp         | 21 ++++++++++++++-
 src/ir.cpp              | 68 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/ir_print.cpp        |  9 +++++++
 test/behavior.zig       |  7 ++---
 test/cases/popcount.zig | 24 +++++++++++++++++
 test/compile_errors.zig | 18 +++++++++++++
 11 files changed, 205 insertions(+), 6 deletions(-)
 create mode 100644 test/cases/popcount.zig

(limited to 'src/ir.cpp')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 5c1cc130ac..8eaffb64ad 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -5013,7 +5013,7 @@ comptime {
       <p>
       If <code>x</code> is zero, <code>@clz</code> returns <code>T.bit_count</code>.
       </p>
-
+      {#see_also|@ctz|@popCount#}
       {#header_close#}
       {#header_open|@cmpxchgStrong#}
       <pre><code class="zig">@cmpxchgStrong(comptime T: type, ptr: *T, expected_value: T, new_value: T, success_order: AtomicOrder, fail_order: AtomicOrder) ?T</code></pre>
@@ -5149,6 +5149,7 @@ test "main" {
       <p>
       If <code>x</code> is zero, <code>@ctz</code> returns <code>T.bit_count</code>.
       </p>
+      {#see_also|@clz|@popCount#}
       {#header_close#}
       {#header_open|@divExact#}
       <pre><code class="zig">@divExact(numerator: T, denominator: T) T</code></pre>
@@ -5631,6 +5632,16 @@ test "call foo" {
       </ul>
       {#see_also|Root Source File#}
       {#header_close#}
+      {#header_open|@popCount#}
+      <pre><code class="zig">@popCount(integer: var) var</code></pre>
+      <p>Counts the number of bits set in an integer.</p>
+      <p>
+      If <code>integer</code> is known at {#link|comptime#}, the return type is <code>comptime_int</code>.
+      Otherwise, the return type is an unsigned integer with the minimum number
+      of bits that can represent the bit count of the integer type.
+      </p>
+      {#see_also|@ctz|@clz#}
+      {#header_close#}
       {#header_open|@ptrCast#}
       <pre><code class="zig">@ptrCast(comptime DestType: type, value: var) DestType</code></pre>
       <p>
@@ -7337,7 +7348,7 @@ hljs.registerLanguage("zig", function(t) {
         a = t.IR + "\\s*\\(",
         c = {
             keyword: "const align var extern stdcallcc nakedcc volatile export pub noalias inline struct packed enum union break return try catch test continue unreachable comptime and or asm defer errdefer if else switch while for fn use bool f32 f64 void type noreturn error i8 u8 i16 u16 i32 u32 i64 u64 isize usize i8w u8w i16w i32w u32w i64w u64w isizew usizew c_short c_ushort c_int c_uint c_long c_ulong c_longlong c_ulonglong resume cancel await async orelse",
-            built_in: "atomicLoad breakpoint returnAddress frameAddress fieldParentPtr setFloatMode IntType OpaqueType compileError compileLog setCold setRuntimeSafety setEvalBranchQuota offsetOf memcpy inlineCall setGlobalLinkage divTrunc divFloor enumTagName intToPtr ptrToInt panic ptrCast intCast floatCast intToFloat floatToInt boolToInt bytesToSlice sliceToBytes errSetCast bitCast rem mod memset sizeOf alignOf alignCast maxValue minValue memberCount memberName memberType typeOf addWithOverflow subWithOverflow mulWithOverflow shlWithOverflow shlExact shrExact cInclude cDefine cUndef ctz clz import cImport errorName embedFile cmpxchgStrong cmpxchgWeak fence divExact truncate atomicRmw sqrt field typeInfo typeName newStackCall errorToInt intToError enumToInt intToEnum",
+            built_in: "atomicLoad breakpoint returnAddress frameAddress fieldParentPtr setFloatMode IntType OpaqueType compileError compileLog setCold setRuntimeSafety setEvalBranchQuota offsetOf memcpy inlineCall setGlobalLinkage divTrunc divFloor enumTagName intToPtr ptrToInt panic ptrCast intCast floatCast intToFloat floatToInt boolToInt bytesToSlice sliceToBytes errSetCast bitCast rem mod memset sizeOf alignOf alignCast maxValue minValue memberCount memberName memberType typeOf addWithOverflow subWithOverflow mulWithOverflow shlWithOverflow shlExact shrExact cInclude cDefine cUndef ctz clz popCount import cImport errorName embedFile cmpxchgStrong cmpxchgWeak fence divExact truncate atomicRmw sqrt field typeInfo typeName newStackCall errorToInt intToError enumToInt intToEnum",
             literal: "true false null undefined"
         },
         n = [e, t.CLCM, t.CBCM, s, r];
diff --git a/src/all_types.hpp b/src/all_types.hpp
index 4d97be468c..6dcf1894d8 100644
--- a/src/all_types.hpp
+++ b/src/all_types.hpp
@@ -1352,6 +1352,7 @@ enum BuiltinFnId {
     BuiltinFnIdCompileLog,
     BuiltinFnIdCtz,
     BuiltinFnIdClz,
+    BuiltinFnIdPopCount,
     BuiltinFnIdImport,
     BuiltinFnIdCImport,
     BuiltinFnIdErrName,
@@ -1477,6 +1478,7 @@ bool type_id_eql(TypeId a, TypeId b);
 enum ZigLLVMFnId {
     ZigLLVMFnIdCtz,
     ZigLLVMFnIdClz,
+    ZigLLVMFnIdPopCount,
     ZigLLVMFnIdOverflowArithmetic,
     ZigLLVMFnIdFloor,
     ZigLLVMFnIdCeil,
@@ -1499,6 +1501,9 @@ struct ZigLLVMFnKey {
         struct {
             uint32_t bit_count;
         } clz;
+        struct {
+            uint32_t bit_count;
+        } pop_count;
         struct {
             uint32_t bit_count;
         } floating;
@@ -2050,6 +2055,7 @@ enum IrInstructionId {
     IrInstructionIdUnionTag,
     IrInstructionIdClz,
     IrInstructionIdCtz,
+    IrInstructionIdPopCount,
     IrInstructionIdImport,
     IrInstructionIdCImport,
     IrInstructionIdCInclude,
@@ -2545,6 +2551,12 @@ struct IrInstructionClz {
     IrInstruction *value;
 };
 
+struct IrInstructionPopCount {
+    IrInstruction base;
+
+    IrInstruction *value;
+};
+
 struct IrInstructionUnionTag {
     IrInstruction base;
 
diff --git a/src/analyze.cpp b/src/analyze.cpp
index 643a85634e..9b60f7374a 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -5976,6 +5976,8 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.ctz.bit_count) * (uint32_t)810453934;
         case ZigLLVMFnIdClz:
             return (uint32_t)(x.data.clz.bit_count) * (uint32_t)2428952817;
+        case ZigLLVMFnIdPopCount:
+            return (uint32_t)(x.data.clz.bit_count) * (uint32_t)101195049;
         case ZigLLVMFnIdFloor:
             return (uint32_t)(x.data.floating.bit_count) * (uint32_t)1899859168;
         case ZigLLVMFnIdCeil:
@@ -5998,6 +6000,8 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
             return a.data.ctz.bit_count == b.data.ctz.bit_count;
         case ZigLLVMFnIdClz:
             return a.data.clz.bit_count == b.data.clz.bit_count;
+        case ZigLLVMFnIdPopCount:
+            return a.data.pop_count.bit_count == b.data.pop_count.bit_count;
         case ZigLLVMFnIdFloor:
         case ZigLLVMFnIdCeil:
         case ZigLLVMFnIdSqrt:
diff --git a/src/bigint.cpp b/src/bigint.cpp
index bb227a7c3d..bf18b9a1bf 100644
--- a/src/bigint.cpp
+++ b/src/bigint.cpp
@@ -1593,6 +1593,37 @@ void bigint_append_buf(Buf *buf, const BigInt *op, uint64_t base) {
     }
 }
 
+size_t bigint_popcount_unsigned(const BigInt *bi) {
+    assert(!bi->is_negative);
+    if (bi->digit_count == 0)
+        return 0;
+
+    size_t count = 0;
+    size_t bit_count = bi->digit_count * 64;
+    for (size_t i = 0; i < bit_count; i += 1) {
+        if (bit_at_index(bi, i))
+            count += 1;
+    }
+    return count;
+}
+
+size_t bigint_popcount_signed(const BigInt *bi, size_t bit_count) {
+    if (bit_count == 0)
+        return 0;
+    if (bi->digit_count == 0)
+        return 0;
+
+    BigInt twos_comp = {0};
+    to_twos_complement(&twos_comp, bi, bit_count);
+
+    size_t count = 0;
+    for (size_t i = 0; i < bit_count; i += 1) {
+        if (bit_at_index(&twos_comp, i))
+            count += 1;
+    }
+    return count;
+}
+
 size_t bigint_ctz(const BigInt *bi, size_t bit_count) {
     if (bit_count == 0)
         return 0;
diff --git a/src/bigint.hpp b/src/bigint.hpp
index 9f044c8722..48b222a227 100644
--- a/src/bigint.hpp
+++ b/src/bigint.hpp
@@ -81,6 +81,8 @@ void bigint_append_buf(Buf *buf, const BigInt *op, uint64_t base);
 
 size_t bigint_ctz(const BigInt *bi, size_t bit_count);
 size_t bigint_clz(const BigInt *bi, size_t bit_count);
+size_t bigint_popcount_signed(const BigInt *bi, size_t bit_count);
+size_t bigint_popcount_unsigned(const BigInt *bi);
 
 size_t bigint_bits_needed(const BigInt *op);
 
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 26ee106959..54e2da7d61 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -3426,14 +3426,22 @@ static LLVMValueRef ir_render_unwrap_maybe(CodeGen *g, IrExecutable *executable,
 static LLVMValueRef get_int_builtin_fn(CodeGen *g, TypeTableEntry *int_type, BuiltinFnId fn_id) {
     ZigLLVMFnKey key = {};
     const char *fn_name;
+    uint32_t n_args;
     if (fn_id == BuiltinFnIdCtz) {
         fn_name = "cttz";
+        n_args = 2;
         key.id = ZigLLVMFnIdCtz;
         key.data.ctz.bit_count = (uint32_t)int_type->data.integral.bit_count;
     } else if (fn_id == BuiltinFnIdClz) {
         fn_name = "ctlz";
+        n_args = 2;
         key.id = ZigLLVMFnIdClz;
         key.data.clz.bit_count = (uint32_t)int_type->data.integral.bit_count;
+    } else if (fn_id == BuiltinFnIdPopCount) {
+        fn_name = "ctpop";
+        n_args = 1;
+        key.id = ZigLLVMFnIdPopCount;
+        key.data.pop_count.bit_count = (uint32_t)int_type->data.integral.bit_count;
     } else {
         zig_unreachable();
     }
@@ -3448,7 +3456,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, TypeTableEntry *int_type, Bui
         int_type->type_ref,
         LLVMInt1Type(),
     };
-    LLVMTypeRef fn_type = LLVMFunctionType(int_type->type_ref, param_types, 2, false);
+    LLVMTypeRef fn_type = LLVMFunctionType(int_type->type_ref, param_types, n_args, false);
     LLVMValueRef fn_val = LLVMAddFunction(g->module, llvm_name, fn_type);
     assert(LLVMGetIntrinsicID(fn_val));
 
@@ -3481,6 +3489,14 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
     return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int);
 }
 
+static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
+    TypeTableEntry *int_type = instruction->value->value.type;
+    LLVMValueRef fn_val = get_int_builtin_fn(g, int_type, BuiltinFnIdPopCount);
+    LLVMValueRef operand = ir_llvm_value(g, instruction->value);
+    LLVMValueRef wrong_size_int = LLVMBuildCall(g->builder, fn_val, &operand, 1, "");
+    return gen_widen_or_shorten(g, false, int_type, instruction->base.value.type, wrong_size_int);
+}
+
 static LLVMValueRef ir_render_switch_br(CodeGen *g, IrExecutable *executable, IrInstructionSwitchBr *instruction) {
     LLVMValueRef target_value = ir_llvm_value(g, instruction->target_value);
     LLVMBasicBlockRef else_block = instruction->else_block->llvm_block;
@@ -4831,6 +4847,8 @@ static LLVMValueRef ir_render_instruction(CodeGen *g, IrExecutable *executable,
             return ir_render_clz(g, executable, (IrInstructionClz *)instruction);
         case IrInstructionIdCtz:
             return ir_render_ctz(g, executable, (IrInstructionCtz *)instruction);
+        case IrInstructionIdPopCount:
+            return ir_render_pop_count(g, executable, (IrInstructionPopCount *)instruction);
         case IrInstructionIdSwitchBr:
             return ir_render_switch_br(g, executable, (IrInstructionSwitchBr *)instruction);
         case IrInstructionIdPhi:
@@ -6342,6 +6360,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdCUndef, "cUndef", 1);
     create_builtin_fn(g, BuiltinFnIdCtz, "ctz", 1);
     create_builtin_fn(g, BuiltinFnIdClz, "clz", 1);
+    create_builtin_fn(g, BuiltinFnIdPopCount, "popCount", 1);
     create_builtin_fn(g, BuiltinFnIdImport, "import", 1);
     create_builtin_fn(g, BuiltinFnIdCImport, "cImport", 1);
     create_builtin_fn(g, BuiltinFnIdErrName, "errorName", 1);
diff --git a/src/ir.cpp b/src/ir.cpp
index 3ad7c77645..98b1bd85ad 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -427,6 +427,10 @@ static constexpr IrInstructionId ir_instruction_id(IrInstructionCtz *) {
     return IrInstructionIdCtz;
 }
 
+static constexpr IrInstructionId ir_instruction_id(IrInstructionPopCount *) {
+    return IrInstructionIdPopCount;
+}
+
 static constexpr IrInstructionId ir_instruction_id(IrInstructionUnionTag *) {
     return IrInstructionIdUnionTag;
 }
@@ -1725,6 +1729,15 @@ static IrInstruction *ir_build_ctz_from(IrBuilder *irb, IrInstruction *old_instr
     return new_instruction;
 }
 
+static IrInstruction *ir_build_pop_count(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *value) {
+    IrInstructionPopCount *instruction = ir_build_instruction<IrInstructionPopCount>(irb, scope, source_node);
+    instruction->value = value;
+
+    ir_ref_instruction(value, irb->current_basic_block);
+
+    return &instruction->base;
+}
+
 static IrInstruction *ir_build_switch_br(IrBuilder *irb, Scope *scope, AstNode *source_node, IrInstruction *target_value,
         IrBasicBlock *else_block, size_t case_count, IrInstructionSwitchBrCase *cases, IrInstruction *is_comptime,
         IrInstruction *switch_prongs_void)
@@ -3841,6 +3854,16 @@ static IrInstruction *ir_gen_builtin_fn_call(IrBuilder *irb, Scope *scope, AstNo
                 IrInstruction *ctz = ir_build_ctz(irb, scope, node, arg0_value);
                 return ir_lval_wrap(irb, scope, ctz, lval);
             }
+        case BuiltinFnIdPopCount:
+            {
+                AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
+                IrInstruction *arg0_value = ir_gen_node(irb, arg0_node, scope);
+                if (arg0_value == irb->codegen->invalid_instruction)
+                    return arg0_value;
+
+                IrInstruction *instr = ir_build_pop_count(irb, scope, node, arg0_value);
+                return ir_lval_wrap(irb, scope, instr, lval);
+            }
         case BuiltinFnIdClz:
             {
                 AstNode *arg0_node = node->data.fn_call_expr.params.at(0);
@@ -15275,6 +15298,48 @@ static TypeTableEntry *ir_analyze_instruction_clz(IrAnalyze *ira, IrInstructionC
     }
 }
 
+static TypeTableEntry *ir_analyze_instruction_pop_count(IrAnalyze *ira, IrInstructionPopCount *instruction) {
+    IrInstruction *value = instruction->value->other;
+    if (type_is_invalid(value->value.type))
+        return ira->codegen->builtin_types.entry_invalid;
+
+    if (value->value.type->id != TypeTableEntryIdInt && value->value.type->id != TypeTableEntryIdComptimeInt) {
+        ir_add_error(ira, value,
+            buf_sprintf("expected integer type, found '%s'", buf_ptr(&value->value.type->name)));
+        return ira->codegen->builtin_types.entry_invalid;
+    }
+
+    if (instr_is_comptime(value)) {
+        ConstExprValue *val = ir_resolve_const(ira, value, UndefBad);
+        if (!val)
+            return ira->codegen->builtin_types.entry_invalid;
+        if (bigint_cmp_zero(&val->data.x_bigint) != CmpLT) {
+            size_t result = bigint_popcount_unsigned(&val->data.x_bigint);
+            ConstExprValue *out_val = ir_build_const_from(ira, &instruction->base);
+            bigint_init_unsigned(&out_val->data.x_bigint, result);
+            return ira->codegen->builtin_types.entry_num_lit_int;
+        }
+        if (value->value.type->id == TypeTableEntryIdComptimeInt) {
+            Buf *val_buf = buf_alloc();
+            bigint_append_buf(val_buf, &val->data.x_bigint, 10);
+            ir_add_error(ira, &instruction->base,
+                buf_sprintf("@popCount on negative %s value %s",
+                    buf_ptr(&value->value.type->name), buf_ptr(val_buf)));
+            return ira->codegen->builtin_types.entry_invalid;
+        }
+        size_t result = bigint_popcount_signed(&val->data.x_bigint, value->value.type->data.integral.bit_count);
+        ConstExprValue *out_val = ir_build_const_from(ira, &instruction->base);
+        bigint_init_unsigned(&out_val->data.x_bigint, result);
+        return ira->codegen->builtin_types.entry_num_lit_int;
+    }
+
+    IrInstruction *result = ir_build_pop_count(&ira->new_irb, instruction->base.scope,
+            instruction->base.source_node, value);
+    result->value.type = get_smallest_unsigned_int_type(ira->codegen, value->value.type->data.integral.bit_count);
+    ir_link_new_instruction(result, &instruction->base);
+    return result->value.type;
+}
+
 static IrInstruction *ir_analyze_union_tag(IrAnalyze *ira, IrInstruction *source_instr, IrInstruction *value) {
     if (type_is_invalid(value->value.type))
         return ira->codegen->invalid_instruction;
@@ -20534,6 +20599,8 @@ static TypeTableEntry *ir_analyze_instruction_nocast(IrAnalyze *ira, IrInstructi
             return ir_analyze_instruction_clz(ira, (IrInstructionClz *)instruction);
         case IrInstructionIdCtz:
             return ir_analyze_instruction_ctz(ira, (IrInstructionCtz *)instruction);
+        case IrInstructionIdPopCount:
+            return ir_analyze_instruction_pop_count(ira, (IrInstructionPopCount *)instruction);
         case IrInstructionIdSwitchBr:
             return ir_analyze_instruction_switch_br(ira, (IrInstructionSwitchBr *)instruction);
         case IrInstructionIdSwitchTarget:
@@ -20892,6 +20959,7 @@ bool ir_has_side_effects(IrInstruction *instruction) {
         case IrInstructionIdUnwrapOptional:
         case IrInstructionIdClz:
         case IrInstructionIdCtz:
+        case IrInstructionIdPopCount:
         case IrInstructionIdSwitchVar:
         case IrInstructionIdSwitchTarget:
         case IrInstructionIdUnionTag:
diff --git a/src/ir_print.cpp b/src/ir_print.cpp
index 5e5a71382c..780cf9e756 100644
--- a/src/ir_print.cpp
+++ b/src/ir_print.cpp
@@ -501,6 +501,12 @@ static void ir_print_ctz(IrPrint *irp, IrInstructionCtz *instruction) {
     fprintf(irp->f, ")");
 }
 
+static void ir_print_pop_count(IrPrint *irp, IrInstructionPopCount *instruction) {
+    fprintf(irp->f, "@popCount(");
+    ir_print_other_instruction(irp, instruction->value);
+    fprintf(irp->f, ")");
+}
+
 static void ir_print_switch_br(IrPrint *irp, IrInstructionSwitchBr *instruction) {
     fprintf(irp->f, "switch (");
     ir_print_other_instruction(irp, instruction->target_value);
@@ -1425,6 +1431,9 @@ static void ir_print_instruction(IrPrint *irp, IrInstruction *instruction) {
         case IrInstructionIdCtz:
             ir_print_ctz(irp, (IrInstructionCtz *)instruction);
             break;
+        case IrInstructionIdPopCount:
+            ir_print_pop_count(irp, (IrInstructionPopCount *)instruction);
+            break;
         case IrInstructionIdClz:
             ir_print_clz(irp, (IrInstructionClz *)instruction);
             break;
diff --git a/test/behavior.zig b/test/behavior.zig
index d47eb8fd6c..450dded56c 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -8,17 +8,17 @@ comptime {
     _ = @import("cases/atomics.zig");
     _ = @import("cases/bitcast.zig");
     _ = @import("cases/bool.zig");
+    _ = @import("cases/bugs/1111.zig");
     _ = @import("cases/bugs/394.zig");
     _ = @import("cases/bugs/655.zig");
     _ = @import("cases/bugs/656.zig");
     _ = @import("cases/bugs/828.zig");
     _ = @import("cases/bugs/920.zig");
-    _ = @import("cases/bugs/1111.zig");
     _ = @import("cases/byval_arg_var.zig");
     _ = @import("cases/cast.zig");
     _ = @import("cases/const_slice_child.zig");
-    _ = @import("cases/coroutines.zig");
     _ = @import("cases/coroutine_await_struct.zig");
+    _ = @import("cases/coroutines.zig");
     _ = @import("cases/defer.zig");
     _ = @import("cases/enum.zig");
     _ = @import("cases/enum_with_members.zig");
@@ -36,11 +36,12 @@ comptime {
     _ = @import("cases/math.zig");
     _ = @import("cases/merge_error_sets.zig");
     _ = @import("cases/misc.zig");
-    _ = @import("cases/optional.zig");
     _ = @import("cases/namespace_depends_on_compile_var/index.zig");
     _ = @import("cases/new_stack_call.zig");
     _ = @import("cases/null.zig");
+    _ = @import("cases/optional.zig");
     _ = @import("cases/pointers.zig");
+    _ = @import("cases/popcount.zig");
     _ = @import("cases/pub_enum/index.zig");
     _ = @import("cases/ref_var_in_if_after_if_2nd_switch_prong.zig");
     _ = @import("cases/reflection.zig");
diff --git a/test/cases/popcount.zig b/test/cases/popcount.zig
new file mode 100644
index 0000000000..7dc7f28c0e
--- /dev/null
+++ b/test/cases/popcount.zig
@@ -0,0 +1,24 @@
+const assert = @import("std").debug.assert;
+
+test "@popCount" {
+    comptime testPopCount();
+    testPopCount();
+}
+
+fn testPopCount() void {
+    {
+        var x: u32 = 0xaa;
+        assert(@popCount(x) == 4);
+    }
+    {
+        var x: u32 = 0xaaaaaaaa;
+        assert(@popCount(x) == 16);
+    }
+    {
+        var x: i16 = -1;
+        assert(@popCount(x) == 16);
+    }
+    comptime {
+        assert(@popCount(0b11111111000110001100010000100001000011000011100101010001) == 24);
+    }
+}
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index d508c7c36c..9071f0ad7e 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,24 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "@popCount - non-integer",
+        \\export fn entry(x: f32) u32 {
+        \\    return @popCount(x);
+        \\}
+    ,
+        ".tmp_source.zig:2:22: error: expected integer type, found 'f32'",
+    );
+
+    cases.add(
+        "@popCount - negative comptime_int",
+        \\comptime {
+        \\    _ = @popCount(-1);
+        \\}
+    ,
+        ".tmp_source.zig:2:9: error: @popCount on negative comptime_int value -1",
+    );
+
     cases.addCase(x: {
         const tc = cases.create(
             "wrong same named struct",
-- 
cgit v1.2.3


From eb326e15530dd6dca4ccbe7dbfde7bf048de813e Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Thu, 5 Jul 2018 15:09:02 -0400
Subject: M:N threading

 * add std.atomic.QueueMpsc.isEmpty
 * make std.debug.global_allocator thread-safe
 * std.event.Loop: now you have to choose between
   - initSingleThreaded
   - initMultiThreaded
 * std.event.Loop multiplexes coroutines onto kernel threads
 * Remove std.event.Loop.stop. Instead the event loop run() function
   returns once there are no pending coroutines.
 * fix crash in ir.cpp for calling methods under some conditions
 * small progress self-hosted compiler, analyzing top level declarations
 * Introduce std.event.Lock for synchronizing coroutines
 * introduce std.event.Locked(T) for data that only 1 coroutine should
   modify at once.
 * make the self hosted compiler use multi threaded event loop
 * make std.heap.DirectAllocator thread-safe

See #174

TODO:
 * call sched_getaffinity instead of hard coding thread pool size 4
 * support for Windows and MacOS
 * #1194
 * #1197
---
 src-self-hosted/main.zig   |   5 +-
 src-self-hosted/module.zig | 257 ++++++++++++++++++--
 src/ir.cpp                 |   2 +-
 std/atomic/queue_mpsc.zig  |  17 ++
 std/debug/index.zig        |   7 +-
 std/event.zig              | 580 +++++++++++++++++++++++++++++++++++++++------
 std/heap.zig               |  30 +--
 std/mem.zig                |   2 +-
 std/os/index.zig           |  39 ++-
 std/os/linux/index.zig     |   8 +
 10 files changed, 833 insertions(+), 114 deletions(-)

(limited to 'src/ir.cpp')

diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig
index d17fc94c82..fe94a4460a 100644
--- a/src-self-hosted/main.zig
+++ b/src-self-hosted/main.zig
@@ -384,7 +384,8 @@ fn buildOutputType(allocator: *Allocator, args: []const []const u8, out_type: Mo
     const zig_lib_dir = introspect.resolveZigLibDir(allocator) catch os.exit(1);
     defer allocator.free(zig_lib_dir);
 
-    var loop = try event.Loop.init(allocator);
+    var loop: event.Loop = undefined;
+    try loop.initMultiThreaded(allocator);
 
     var module = try Module.create(
         &loop,
@@ -493,8 +494,6 @@ async fn processBuildEvents(module: *Module, watch: bool) void {
         switch (build_event) {
             Module.Event.Ok => {
                 std.debug.warn("Build succeeded\n");
-                // for now we stop after 1
-                module.loop.stop();
                 return;
             },
             Module.Event.Error => |err| {
diff --git a/src-self-hosted/module.zig b/src-self-hosted/module.zig
index cf27c826c8..5ce1a7965a 100644
--- a/src-self-hosted/module.zig
+++ b/src-self-hosted/module.zig
@@ -2,6 +2,7 @@ const std = @import("std");
 const os = std.os;
 const io = std.io;
 const mem = std.mem;
+const Allocator = mem.Allocator;
 const Buffer = std.Buffer;
 const llvm = @import("llvm.zig");
 const c = @import("c.zig");
@@ -13,6 +14,7 @@ const ArrayList = std.ArrayList;
 const errmsg = @import("errmsg.zig");
 const ast = std.zig.ast;
 const event = std.event;
+const assert = std.debug.assert;
 
 pub const Module = struct {
     loop: *event.Loop,
@@ -81,6 +83,8 @@ pub const Module = struct {
     link_out_file: ?[]const u8,
     events: *event.Channel(Event),
 
+    exported_symbol_names: event.Locked(Decl.Table),
+
     // TODO handle some of these earlier and report them in a way other than error codes
     pub const BuildError = error{
         OutOfMemory,
@@ -232,6 +236,7 @@ pub const Module = struct {
             .test_name_prefix = null,
             .emit_file_type = Emit.Binary,
             .link_out_file = null,
+            .exported_symbol_names = event.Locked(Decl.Table).init(loop, Decl.Table.init(loop.allocator)),
         });
     }
 
@@ -272,38 +277,91 @@ pub const Module = struct {
                 return;
             };
             await (async self.events.put(Event.Ok) catch unreachable);
+            // for now we stop after 1
+            return;
         }
     }
 
     async fn addRootSrc(self: *Module) !void {
         const root_src_path = self.root_src_path orelse @panic("TODO handle null root src path");
+        // TODO async/await os.path.real
         const root_src_real_path = os.path.real(self.a(), root_src_path) catch |err| {
             try printError("unable to get real path '{}': {}", root_src_path, err);
             return err;
         };
         errdefer self.a().free(root_src_real_path);
 
+        // TODO async/await readFileAlloc()
         const source_code = io.readFileAlloc(self.a(), root_src_real_path) catch |err| {
             try printError("unable to open '{}': {}", root_src_real_path, err);
             return err;
         };
         errdefer self.a().free(source_code);
 
-        var tree = try std.zig.parse(self.a(), source_code);
-        defer tree.deinit();
-
-        //var it = tree.root_node.decls.iterator();
-        //while (it.next()) |decl_ptr| {
-        //    const decl = decl_ptr.*;
-        //    switch (decl.id) {
-        //        ast.Node.Comptime => @panic("TODO"),
-        //        ast.Node.VarDecl => @panic("TODO"),
-        //        ast.Node.UseDecl => @panic("TODO"),
-        //        ast.Node.FnDef => @panic("TODO"),
-        //        ast.Node.TestDecl => @panic("TODO"),
-        //        else => unreachable,
-        //    }
-        //}
+        var parsed_file = ParsedFile{
+            .tree = try std.zig.parse(self.a(), source_code),
+            .realpath = root_src_real_path,
+        };
+        errdefer parsed_file.tree.deinit();
+
+        const tree = &parsed_file.tree;
+
+        // create empty struct for it
+        const decls = try Scope.Decls.create(self.a(), null);
+        errdefer decls.destroy();
+
+        var it = tree.root_node.decls.iterator(0);
+        while (it.next()) |decl_ptr| {
+            const decl = decl_ptr.*;
+            switch (decl.id) {
+                ast.Node.Id.Comptime => @panic("TODO"),
+                ast.Node.Id.VarDecl => @panic("TODO"),
+                ast.Node.Id.FnProto => {
+                    const fn_proto = @fieldParentPtr(ast.Node.FnProto, "base", decl);
+
+                    const name = if (fn_proto.name_token) |name_token| tree.tokenSlice(name_token) else {
+                        @panic("TODO add compile error");
+                        //try self.addCompileError(
+                        //    &parsed_file,
+                        //    fn_proto.fn_token,
+                        //    fn_proto.fn_token + 1,
+                        //    "missing function name",
+                        //);
+                        continue;
+                    };
+
+                    const fn_decl = try self.a().create(Decl.Fn{
+                        .base = Decl{
+                            .id = Decl.Id.Fn,
+                            .name = name,
+                            .visib = parseVisibToken(tree, fn_proto.visib_token),
+                            .resolution = Decl.Resolution.Unresolved,
+                        },
+                        .value = Decl.Fn.Val{ .Unresolved = {} },
+                        .fn_proto = fn_proto,
+                    });
+                    errdefer self.a().destroy(fn_decl);
+
+                    // TODO make this parallel
+                    try await try async self.addTopLevelDecl(tree, &fn_decl.base);
+                },
+                ast.Node.Id.TestDecl => @panic("TODO"),
+                else => unreachable,
+            }
+        }
+    }
+
+    async fn addTopLevelDecl(self: *Module, tree: *ast.Tree, decl: *Decl) !void {
+        const is_export = decl.isExported(tree);
+
+        {
+            const exported_symbol_names = await try async self.exported_symbol_names.acquire();
+            defer exported_symbol_names.release();
+
+            if (try exported_symbol_names.value.put(decl.name, decl)) |other_decl| {
+                @panic("TODO report compile error");
+            }
+        }
     }
 
     pub fn link(self: *Module, out_file: ?[]const u8) !void {
@@ -350,3 +408,172 @@ fn printError(comptime format: []const u8, args: ...) !void {
     const out_stream = &stderr_file_out_stream.stream;
     try out_stream.print(format, args);
 }
+
+fn parseVisibToken(tree: *ast.Tree, optional_token_index: ?ast.TokenIndex) Visib {
+    if (optional_token_index) |token_index| {
+        const token = tree.tokens.at(token_index);
+        assert(token.id == Token.Id.Keyword_pub);
+        return Visib.Pub;
+    } else {
+        return Visib.Private;
+    }
+}
+
+pub const Scope = struct {
+    id: Id,
+    parent: ?*Scope,
+
+    pub const Id = enum {
+        Decls,
+        Block,
+    };
+
+    pub const Decls = struct {
+        base: Scope,
+        table: Decl.Table,
+
+        pub fn create(a: *Allocator, parent: ?*Scope) !*Decls {
+            const self = try a.create(Decls{
+                .base = Scope{
+                    .id = Id.Decls,
+                    .parent = parent,
+                },
+                .table = undefined,
+            });
+            errdefer a.destroy(self);
+
+            self.table = Decl.Table.init(a);
+            errdefer self.table.deinit();
+
+            return self;
+        }
+
+        pub fn destroy(self: *Decls) void {
+            self.table.deinit();
+            self.table.allocator.destroy(self);
+            self.* = undefined;
+        }
+    };
+
+    pub const Block = struct {
+        base: Scope,
+    };
+};
+
+pub const Visib = enum {
+    Private,
+    Pub,
+};
+
+pub const Decl = struct {
+    id: Id,
+    name: []const u8,
+    visib: Visib,
+    resolution: Resolution,
+
+    pub const Table = std.HashMap([]const u8, *Decl, mem.hash_slice_u8, mem.eql_slice_u8);
+
+    pub fn isExported(base: *const Decl, tree: *ast.Tree) bool {
+        switch (base.id) {
+            Id.Fn => {
+                const fn_decl = @fieldParentPtr(Fn, "base", base);
+                return fn_decl.isExported(tree);
+            },
+            else => return false,
+        }
+    }
+
+    pub const Resolution = enum {
+        Unresolved,
+        InProgress,
+        Invalid,
+        Ok,
+    };
+
+    pub const Id = enum {
+        Var,
+        Fn,
+        CompTime,
+    };
+
+    pub const Var = struct {
+        base: Decl,
+    };
+
+    pub const Fn = struct {
+        base: Decl,
+        value: Val,
+        fn_proto: *const ast.Node.FnProto,
+
+        // TODO https://github.com/ziglang/zig/issues/683 and then make this anonymous
+        pub const Val = union {
+            Unresolved: void,
+            Ok: *Value.Fn,
+        };
+
+        pub fn externLibName(self: Fn, tree: *ast.Tree) ?[]const u8 {
+            return if (self.fn_proto.extern_export_inline_token) |tok_index| x: {
+                const token = tree.tokens.at(tok_index);
+                break :x switch (token.id) {
+                    Token.Id.Extern => tree.tokenSlicePtr(token),
+                    else => null,
+                };
+            } else null;
+        }
+
+        pub fn isExported(self: Fn, tree: *ast.Tree) bool {
+            if (self.fn_proto.extern_export_inline_token) |tok_index| {
+                const token = tree.tokens.at(tok_index);
+                return token.id == Token.Id.Keyword_export;
+            } else {
+                return false;
+            }
+        }
+    };
+
+    pub const CompTime = struct {
+        base: Decl,
+    };
+};
+
+pub const Value = struct {
+    pub const Fn = struct {};
+};
+
+pub const Type = struct {
+    id: Id,
+
+    pub const Id = enum {
+        Type,
+        Void,
+        Bool,
+        NoReturn,
+        Int,
+        Float,
+        Pointer,
+        Array,
+        Struct,
+        ComptimeFloat,
+        ComptimeInt,
+        Undefined,
+        Null,
+        Optional,
+        ErrorUnion,
+        ErrorSet,
+        Enum,
+        Union,
+        Fn,
+        Opaque,
+        Promise,
+    };
+
+    pub const Struct = struct {
+        base: Type,
+        decls: *Scope.Decls,
+    };
+};
+
+pub const ParsedFile = struct {
+    tree: ast.Tree,
+    realpath: []const u8,
+};
diff --git a/src/ir.cpp b/src/ir.cpp
index 98b1bd85ad..3fc8306339 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13278,7 +13278,7 @@ static TypeTableEntry *ir_analyze_instruction_call(IrAnalyze *ira, IrInstruction
             FnTableEntry *fn_table_entry = fn_ref->value.data.x_bound_fn.fn;
             IrInstruction *first_arg_ptr = fn_ref->value.data.x_bound_fn.first_arg;
             return ir_analyze_fn_call(ira, call_instruction, fn_table_entry, fn_table_entry->type_entry,
-                nullptr, first_arg_ptr, is_comptime, call_instruction->fn_inline);
+                fn_ref, first_arg_ptr, is_comptime, call_instruction->fn_inline);
         } else {
             ir_add_error_node(ira, fn_ref->source_node,
                 buf_sprintf("type '%s' not a function", buf_ptr(&fn_ref->value.type->name)));
diff --git a/std/atomic/queue_mpsc.zig b/std/atomic/queue_mpsc.zig
index 8030565d7a..bc0a94258b 100644
--- a/std/atomic/queue_mpsc.zig
+++ b/std/atomic/queue_mpsc.zig
@@ -15,6 +15,8 @@ pub fn QueueMpsc(comptime T: type) type {
 
         pub const Node = std.atomic.Stack(T).Node;
 
+        /// Not thread-safe. The call to init() must complete before any other functions are called.
+        /// No deinitialization required.
         pub fn init() Self {
             return Self{
                 .inboxes = []std.atomic.Stack(T){
@@ -26,12 +28,15 @@ pub fn QueueMpsc(comptime T: type) type {
             };
         }
 
+        /// Fully thread-safe. put() may be called from any thread at any time.
         pub fn put(self: *Self, node: *Node) void {
             const inbox_index = @atomicLoad(usize, &self.inbox_index, AtomicOrder.SeqCst);
             const inbox = &self.inboxes[inbox_index];
             inbox.push(node);
         }
 
+        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
+        /// the next call to get().
         pub fn get(self: *Self) ?*Node {
             if (self.outbox.pop()) |node| {
                 return node;
@@ -43,6 +48,18 @@ pub fn QueueMpsc(comptime T: type) type {
             }
             return self.outbox.pop();
         }
+
+        /// Must be called by only 1 consumer at a time. Every call to get() and isEmpty() must complete before
+        /// the next call to isEmpty().
+        pub fn isEmpty(self: *Self) bool {
+            if (!self.outbox.isEmpty()) return false;
+            const prev_inbox_index = @atomicRmw(usize, &self.inbox_index, AtomicRmwOp.Xor, 0x1, AtomicOrder.SeqCst);
+            const prev_inbox = &self.inboxes[prev_inbox_index];
+            while (prev_inbox.pop()) |node| {
+                self.outbox.push(node);
+            }
+            return self.outbox.isEmpty();
+        }
     };
 }
 
diff --git a/std/debug/index.zig b/std/debug/index.zig
index 57b2dfc300..a5e1c313f0 100644
--- a/std/debug/index.zig
+++ b/std/debug/index.zig
@@ -11,6 +11,11 @@ const builtin = @import("builtin");
 
 pub const FailingAllocator = @import("failing_allocator.zig").FailingAllocator;
 
+pub const runtime_safety = switch (builtin.mode) {
+    builtin.Mode.Debug, builtin.Mode.ReleaseSafe => true,
+    builtin.Mode.ReleaseFast, builtin.Mode.ReleaseSmall => false,
+};
+
 /// Tries to write to stderr, unbuffered, and ignores any error returned.
 /// Does not append a newline.
 /// TODO atomic/multithread support
@@ -1098,7 +1103,7 @@ fn readILeb128(in_stream: var) !i64 {
 
 /// This should only be used in temporary test programs.
 pub const global_allocator = &global_fixed_allocator.allocator;
-var global_fixed_allocator = std.heap.FixedBufferAllocator.init(global_allocator_mem[0..]);
+var global_fixed_allocator = std.heap.ThreadSafeFixedBufferAllocator.init(global_allocator_mem[0..]);
 var global_allocator_mem: [100 * 1024]u8 = undefined;
 
 // TODO make thread safe
diff --git a/std/event.zig b/std/event.zig
index c6ac04a9d0..2d69d0cb16 100644
--- a/std/event.zig
+++ b/std/event.zig
@@ -11,53 +11,69 @@ pub const TcpServer = struct {
     handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
 
     loop: *Loop,
-    sockfd: i32,
+    sockfd: ?i32,
     accept_coro: ?promise,
     listen_address: std.net.Address,
 
     waiting_for_emfile_node: PromiseNode,
+    listen_resume_node: event.Loop.ResumeNode,
 
     const PromiseNode = std.LinkedList(promise).Node;
 
-    pub fn init(loop: *Loop) !TcpServer {
-        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
-        errdefer std.os.close(sockfd);
-
+    pub fn init(loop: *Loop) TcpServer {
         // TODO can't initialize handler coroutine here because we need well defined copy elision
         return TcpServer{
             .loop = loop,
-            .sockfd = sockfd,
+            .sockfd = null,
             .accept_coro = null,
             .handleRequestFn = undefined,
             .waiting_for_emfile_node = undefined,
             .listen_address = undefined,
+            .listen_resume_node = event.Loop.ResumeNode{
+                .id = event.Loop.ResumeNode.Id.Basic,
+                .handle = undefined,
+            },
         };
     }
 
-    pub fn listen(self: *TcpServer, address: *const std.net.Address, handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void) !void {
+    pub fn listen(
+        self: *TcpServer,
+        address: *const std.net.Address,
+        handleRequestFn: async<*mem.Allocator> fn (*TcpServer, *const std.net.Address, *const std.os.File) void,
+    ) !void {
         self.handleRequestFn = handleRequestFn;
 
-        try std.os.posixBind(self.sockfd, &address.os_addr);
-        try std.os.posixListen(self.sockfd, posix.SOMAXCONN);
-        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(self.sockfd));
+        const sockfd = try std.os.posixSocket(posix.AF_INET, posix.SOCK_STREAM | posix.SOCK_CLOEXEC | posix.SOCK_NONBLOCK, posix.PROTO_tcp);
+        errdefer std.os.close(sockfd);
+        self.sockfd = sockfd;
+
+        try std.os.posixBind(sockfd, &address.os_addr);
+        try std.os.posixListen(sockfd, posix.SOMAXCONN);
+        self.listen_address = std.net.Address.initPosix(try std.os.posixGetSockName(sockfd));
 
         self.accept_coro = try async<self.loop.allocator> TcpServer.handler(self);
         errdefer cancel self.accept_coro.?;
 
-        try self.loop.addFd(self.sockfd, self.accept_coro.?);
-        errdefer self.loop.removeFd(self.sockfd);
+        self.listen_resume_node.handle = self.accept_coro.?;
+        try self.loop.addFd(sockfd, &self.listen_resume_node);
+        errdefer self.loop.removeFd(sockfd);
+    }
+
+    /// Stop listening
+    pub fn close(self: *TcpServer) void {
+        self.loop.removeFd(self.sockfd.?);
+        std.os.close(self.sockfd.?);
     }
 
     pub fn deinit(self: *TcpServer) void {
-        self.loop.removeFd(self.sockfd);
         if (self.accept_coro) |accept_coro| cancel accept_coro;
-        std.os.close(self.sockfd);
+        if (self.sockfd) |sockfd| std.os.close(sockfd);
     }
 
     pub async fn handler(self: *TcpServer) void {
         while (true) {
             var accepted_addr: std.net.Address = undefined;
-            if (std.os.posixAccept(self.sockfd, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
+            if (std.os.posixAccept(self.sockfd.?, &accepted_addr.os_addr, posix.SOCK_NONBLOCK | posix.SOCK_CLOEXEC)) |accepted_fd| {
                 var socket = std.os.File.openHandle(accepted_fd);
                 _ = async<self.loop.allocator> self.handleRequestFn(self, accepted_addr, socket) catch |err| switch (err) {
                     error.OutOfMemory => {
@@ -95,32 +111,65 @@ pub const TcpServer = struct {
 
 pub const Loop = struct {
     allocator: *mem.Allocator,
-    keep_running: bool,
     next_tick_queue: std.atomic.QueueMpsc(promise),
     os_data: OsData,
+    dispatch_lock: u8, // TODO make this a bool
+    pending_event_count: usize,
+    extra_threads: []*std.os.Thread,
+    final_resume_node: ResumeNode,
 
-    const OsData = switch (builtin.os) {
-        builtin.Os.linux => struct {
-            epollfd: i32,
-        },
-        else => struct {},
+    pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
+
+    pub const ResumeNode = struct {
+        id: Id,
+        handle: promise,
+
+        pub const Id = enum {
+            Basic,
+            Stop,
+            EventFd,
+        };
+
+        pub const EventFd = struct {
+            base: ResumeNode,
+            eventfd: i32,
+        };
     };
 
-    pub const NextTickNode = std.atomic.QueueMpsc(promise).Node;
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initSingleThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        return self.initInternal(allocator, 1);
+    }
 
     /// The allocator must be thread-safe because we use it for multiplexing
     /// coroutines onto kernel threads.
-    pub fn init(allocator: *mem.Allocator) !Loop {
-        var self = Loop{
-            .keep_running = true,
+    /// After initialization, call run().
+    /// TODO copy elision / named return values so that the threads referencing *Loop
+    /// have the correct pointer value.
+    fn initMultiThreaded(self: *Loop, allocator: *mem.Allocator) !void {
+        // TODO check the actual cpu core count
+        return self.initInternal(allocator, 4);
+    }
+
+    /// Thread count is the total thread count. The thread pool size will be
+    /// max(thread_count - 1, 0)
+    fn initInternal(self: *Loop, allocator: *mem.Allocator, thread_count: usize) !void {
+        self.* = Loop{
+            .pending_event_count = 0,
             .allocator = allocator,
             .os_data = undefined,
             .next_tick_queue = std.atomic.QueueMpsc(promise).init(),
+            .dispatch_lock = 1, // start locked so threads go directly into epoll wait
+            .extra_threads = undefined,
+            .final_resume_node = ResumeNode{
+                .id = ResumeNode.Id.Stop,
+                .handle = undefined,
+            },
         };
-        try self.initOsData();
+        try self.initOsData(thread_count);
         errdefer self.deinitOsData();
-
-        return self;
     }
 
     /// must call stop before deinit
@@ -128,13 +177,70 @@ pub const Loop = struct {
         self.deinitOsData();
     }
 
-    const InitOsDataError = std.os.LinuxEpollCreateError;
+    const InitOsDataError = std.os.LinuxEpollCreateError || mem.Allocator.Error || std.os.LinuxEventFdError ||
+        std.os.SpawnThreadError || std.os.LinuxEpollCtlError;
+
+    const wakeup_bytes = []u8{0x1} ** 8;
 
-    fn initOsData(self: *Loop) InitOsDataError!void {
+    fn initOsData(self: *Loop, thread_count: usize) InitOsDataError!void {
         switch (builtin.os) {
             builtin.Os.linux => {
-                self.os_data.epollfd = try std.os.linuxEpollCreate(std.os.linux.EPOLL_CLOEXEC);
+                const extra_thread_count = thread_count - 1;
+                self.os_data.available_eventfd_resume_nodes = std.atomic.Stack(ResumeNode.EventFd).init();
+                self.os_data.eventfd_resume_nodes = try self.allocator.alloc(
+                    std.atomic.Stack(ResumeNode.EventFd).Node,
+                    extra_thread_count,
+                );
+                errdefer self.allocator.free(self.os_data.eventfd_resume_nodes);
+
+                errdefer {
+                    while (self.os_data.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                }
+                for (self.os_data.eventfd_resume_nodes) |*eventfd_node| {
+                    eventfd_node.* = std.atomic.Stack(ResumeNode.EventFd).Node{
+                        .data = ResumeNode.EventFd{
+                            .base = ResumeNode{
+                                .id = ResumeNode.Id.EventFd,
+                                .handle = undefined,
+                            },
+                            .eventfd = try std.os.linuxEventFd(1, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK),
+                        },
+                        .next = undefined,
+                    };
+                    self.os_data.available_eventfd_resume_nodes.push(eventfd_node);
+                }
+
+                self.os_data.epollfd = try std.os.linuxEpollCreate(posix.EPOLL_CLOEXEC);
                 errdefer std.os.close(self.os_data.epollfd);
+
+                self.os_data.final_eventfd = try std.os.linuxEventFd(0, posix.EFD_CLOEXEC | posix.EFD_NONBLOCK);
+                errdefer std.os.close(self.os_data.final_eventfd);
+
+                self.os_data.final_eventfd_event = posix.epoll_event{
+                    .events = posix.EPOLLIN,
+                    .data = posix.epoll_data{ .ptr = @ptrToInt(&self.final_resume_node) },
+                };
+                try std.os.linuxEpollCtl(
+                    self.os_data.epollfd,
+                    posix.EPOLL_CTL_ADD,
+                    self.os_data.final_eventfd,
+                    &self.os_data.final_eventfd_event,
+                );
+                self.extra_threads = try self.allocator.alloc(*std.os.Thread, extra_thread_count);
+                errdefer self.allocator.free(self.extra_threads);
+
+                var extra_thread_index: usize = 0;
+                errdefer {
+                    while (extra_thread_index != 0) {
+                        extra_thread_index -= 1;
+                        // writing 8 bytes to an eventfd cannot fail
+                        std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                        self.extra_threads[extra_thread_index].wait();
+                    }
+                }
+                while (extra_thread_index < extra_thread_count) : (extra_thread_index += 1) {
+                    self.extra_threads[extra_thread_index] = try std.os.spawnThread(self, workerRun);
+                }
             },
             else => {},
         }
@@ -142,65 +248,154 @@ pub const Loop = struct {
 
     fn deinitOsData(self: *Loop) void {
         switch (builtin.os) {
-            builtin.Os.linux => std.os.close(self.os_data.epollfd),
+            builtin.Os.linux => {
+                std.os.close(self.os_data.final_eventfd);
+                while (self.os_data.available_eventfd_resume_nodes.pop()) |node| std.os.close(node.data.eventfd);
+                std.os.close(self.os_data.epollfd);
+                self.allocator.free(self.os_data.eventfd_resume_nodes);
+                self.allocator.free(self.extra_threads);
+            },
             else => {},
         }
     }
 
-    pub fn addFd(self: *Loop, fd: i32, prom: promise) !void {
+    /// resume_node must live longer than the promise that it holds a reference to.
+    pub fn addFd(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
+        errdefer {
+            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+        }
+        try self.addFdNoCounter(fd, resume_node);
+    }
+
+    fn addFdNoCounter(self: *Loop, fd: i32, resume_node: *ResumeNode) !void {
         var ev = std.os.linux.epoll_event{
             .events = std.os.linux.EPOLLIN | std.os.linux.EPOLLOUT | std.os.linux.EPOLLET,
-            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(prom) },
+            .data = std.os.linux.epoll_data{ .ptr = @ptrToInt(resume_node) },
         };
         try std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_ADD, fd, &ev);
     }
 
     pub fn removeFd(self: *Loop, fd: i32) void {
+        self.removeFdNoCounter(fd);
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+    }
+
+    fn removeFdNoCounter(self: *Loop, fd: i32) void {
         std.os.linuxEpollCtl(self.os_data.epollfd, std.os.linux.EPOLL_CTL_DEL, fd, undefined) catch {};
     }
-    async fn waitFd(self: *Loop, fd: i32) !void {
+
+    pub async fn waitFd(self: *Loop, fd: i32) !void {
         defer self.removeFd(fd);
+        var resume_node = ResumeNode{
+            .id = ResumeNode.Id.Basic,
+            .handle = undefined,
+        };
         suspend |p| {
-            try self.addFd(fd, p);
+            resume_node.handle = p;
+            try self.addFd(fd, &resume_node);
         }
+        var a = &resume_node; // TODO better way to explicitly put memory in coro frame
     }
 
-    pub fn stop(self: *Loop) void {
-        // TODO make atomic
-        self.keep_running = false;
-        // TODO activate an fd in the epoll set which should cancel all the promises
-    }
-
-    /// bring your own linked list node. this means it can't fail.
+    /// Bring your own linked list node. This means it can't fail.
     pub fn onNextTick(self: *Loop, node: *NextTickNode) void {
+        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Add, 1, AtomicOrder.SeqCst);
         self.next_tick_queue.put(node);
     }
 
     pub fn run(self: *Loop) void {
-        while (self.keep_running) {
-            // TODO multiplex the next tick queue and the epoll event results onto a thread pool
-            while (self.next_tick_queue.get()) |node| {
-                resume node.data;
-            }
-            if (!self.keep_running) break;
-
-            self.dispatchOsEvents();
+        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+        self.workerRun();
+        for (self.extra_threads) |extra_thread| {
+            extra_thread.wait();
         }
     }
 
-    fn dispatchOsEvents(self: *Loop) void {
-        switch (builtin.os) {
-            builtin.Os.linux => {
-                var events: [16]std.os.linux.epoll_event = undefined;
-                const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
-                for (events[0..count]) |ev| {
-                    const p = @intToPtr(promise, ev.data.ptr);
-                    resume p;
+    fn workerRun(self: *Loop) void {
+        start_over: while (true) {
+            if (@atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) == 0) {
+                while (self.next_tick_queue.get()) |next_tick_node| {
+                    const handle = next_tick_node.data;
+                    if (self.next_tick_queue.isEmpty()) {
+                        // last node, just resume it
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
+
+                    // non-last node, stick it in the epoll set so that
+                    // other threads can get to it
+                    if (self.os_data.available_eventfd_resume_nodes.pop()) |resume_stack_node| {
+                        const eventfd_node = &resume_stack_node.data;
+                        eventfd_node.base.handle = handle;
+                        // the pending count is already accounted for
+                        self.addFdNoCounter(eventfd_node.eventfd, &eventfd_node.base) catch |_| {
+                            // fine, we didn't need it anyway
+                            _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                            self.os_data.available_eventfd_resume_nodes.push(resume_stack_node);
+                            resume handle;
+                            _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                            continue :start_over;
+                        };
+                    } else {
+                        // threads are too busy, can't add another eventfd to wake one up
+                        _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+                        resume handle;
+                        _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                        continue :start_over;
+                    }
                 }
-            },
-            else => {},
+
+                const pending_event_count = @atomicLoad(usize, &self.pending_event_count, AtomicOrder.SeqCst);
+                if (pending_event_count == 0) {
+                    // cause all the threads to stop
+                    // writing 8 bytes to an eventfd cannot fail
+                    std.os.posixWrite(self.os_data.final_eventfd, wakeup_bytes) catch unreachable;
+                    return;
+                }
+
+                _ = @atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+            }
+
+            // only process 1 event so we don't steal from other threads
+            var events: [1]std.os.linux.epoll_event = undefined;
+            const count = std.os.linuxEpollWait(self.os_data.epollfd, events[0..], -1);
+            for (events[0..count]) |ev| {
+                const resume_node = @intToPtr(*ResumeNode, ev.data.ptr);
+                const handle = resume_node.handle;
+                const resume_node_id = resume_node.id;
+                switch (resume_node_id) {
+                    ResumeNode.Id.Basic => {},
+                    ResumeNode.Id.Stop => return,
+                    ResumeNode.Id.EventFd => {
+                        const event_fd_node = @fieldParentPtr(ResumeNode.EventFd, "base", resume_node);
+                        self.removeFdNoCounter(event_fd_node.eventfd);
+                        const stack_node = @fieldParentPtr(std.atomic.Stack(ResumeNode.EventFd).Node, "data", event_fd_node);
+                        self.os_data.available_eventfd_resume_nodes.push(stack_node);
+                    },
+                }
+                resume handle;
+                if (resume_node_id == ResumeNode.Id.EventFd) {
+                    _ = @atomicRmw(usize, &self.pending_event_count, AtomicRmwOp.Sub, 1, AtomicOrder.SeqCst);
+                }
+            }
         }
     }
+
+    const OsData = switch (builtin.os) {
+        builtin.Os.linux => struct {
+            epollfd: i32,
+            // pre-allocated eventfds. all permanently active.
+            // this is how we send promises to be resumed on other threads.
+            available_eventfd_resume_nodes: std.atomic.Stack(ResumeNode.EventFd),
+            eventfd_resume_nodes: []std.atomic.Stack(ResumeNode.EventFd).Node,
+            final_eventfd: i32,
+            final_eventfd_event: posix.epoll_event,
+        },
+        else => struct {},
+    };
 };
 
 /// many producer, many consumer, thread-safe, lock-free, runtime configurable buffer size
@@ -304,9 +499,7 @@ pub fn Channel(comptime T: type) type {
             // TODO integrate this function with named return values
             // so we can get rid of this extra result copy
             var result: T = undefined;
-            var debug_handle: usize = undefined;
             suspend |handle| {
-                debug_handle = @ptrToInt(handle);
                 var my_tick_node = Loop.NextTickNode{
                     .next = undefined,
                     .data = handle,
@@ -438,9 +631,8 @@ test "listen on a port, send bytes, receive bytes" {
             const self = @fieldParentPtr(Self, "tcp_server", tcp_server);
             var socket = _socket.*; // TODO https://github.com/ziglang/zig/issues/733
             defer socket.close();
-            const next_handler = async errorableHandler(self, _addr, socket) catch |err| switch (err) {
-                error.OutOfMemory => @panic("unable to handle connection: out of memory"),
-            };
+            // TODO guarantee elision of this allocation
+            const next_handler = async errorableHandler(self, _addr, socket) catch unreachable;
             (await next_handler) catch |err| {
                 std.debug.panic("unable to handle connection: {}\n", err);
             };
@@ -461,17 +653,18 @@ test "listen on a port, send bytes, receive bytes" {
     const ip4addr = std.net.parseIp4("127.0.0.1") catch unreachable;
     const addr = std.net.Address.initIp4(ip4addr, 0);
 
-    var loop = try Loop.init(std.debug.global_allocator);
-    var server = MyServer{ .tcp_server = try TcpServer.init(&loop) };
+    var loop: Loop = undefined;
+    try loop.initSingleThreaded(std.debug.global_allocator);
+    var server = MyServer{ .tcp_server = TcpServer.init(&loop) };
     defer server.tcp_server.deinit();
     try server.tcp_server.listen(addr, MyServer.handler);
 
-    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address);
+    const p = try async<std.debug.global_allocator> doAsyncTest(&loop, server.tcp_server.listen_address, &server.tcp_server);
     defer cancel p;
     loop.run();
 }
 
-async fn doAsyncTest(loop: *Loop, address: *const std.net.Address) void {
+async fn doAsyncTest(loop: *Loop, address: *const std.net.Address, server: *TcpServer) void {
     errdefer @panic("test failure");
 
     var socket_file = try await try async event.connect(loop, address);
@@ -481,7 +674,7 @@ async fn doAsyncTest(loop: *Loop, address: *const std.net.Address) void {
     const amt_read = try socket_file.read(buf[0..]);
     const msg = buf[0..amt_read];
     assert(mem.eql(u8, msg, "hello from server\n"));
-    loop.stop();
+    server.close();
 }
 
 test "std.event.Channel" {
@@ -490,7 +683,9 @@ test "std.event.Channel" {
 
     const allocator = &da.allocator;
 
-    var loop = try Loop.init(allocator);
+    var loop: Loop = undefined;
+    // TODO make a multi threaded test
+    try loop.initSingleThreaded(allocator);
     defer loop.deinit();
 
     const channel = try Channel(i32).create(&loop, 0);
@@ -515,11 +710,248 @@ async fn testChannelGetter(loop: *Loop, channel: *Channel(i32)) void {
     const value2_promise = try async channel.get();
     const value2 = await value2_promise;
     assert(value2 == 4567);
-
-    loop.stop();
 }
 
 async fn testChannelPutter(channel: *Channel(i32)) void {
     await (async channel.put(1234) catch @panic("out of memory"));
     await (async channel.put(4567) catch @panic("out of memory"));
 }
+
+/// Thread-safe async/await lock.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub const Lock = struct {
+    loop: *Loop,
+    shared_bit: u8, // TODO make this a bool
+    queue: Queue,
+    queue_empty_bit: u8, // TODO make this a bool
+
+    const Queue = std.atomic.QueueMpsc(promise);
+
+    pub const Held = struct {
+        lock: *Lock,
+
+        pub fn release(self: Held) void {
+            // Resume the next item from the queue.
+            if (self.lock.queue.get()) |node| {
+                self.lock.loop.onNextTick(node);
+                return;
+            }
+
+            // We need to release the lock.
+            _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+            _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            // There might be a queue item. If we know the queue is empty, we can be done,
+            // because the other actor will try to obtain the lock.
+            // But if there's a queue item, we are the actor which must loop and attempt
+            // to grab the lock again.
+            if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                return;
+            }
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Great, the queue is someone else's problem.
+                    return;
+                }
+
+                // Resume the next item from the queue.
+                if (self.lock.queue.get()) |node| {
+                    self.lock.loop.onNextTick(node);
+                    return;
+                }
+
+                // Release the lock again.
+                _ = @atomicRmw(u8, &self.lock.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                _ = @atomicRmw(u8, &self.lock.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                // Find out if we can be done.
+                if (@atomicLoad(u8, &self.lock.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                    return;
+                }
+            }
+        }
+    };
+
+    pub fn init(loop: *Loop) Lock {
+        return Lock{
+            .loop = loop,
+            .shared_bit = 0,
+            .queue = Queue.init(),
+            .queue_empty_bit = 1,
+        };
+    }
+
+    /// Must be called when not locked. Not thread safe.
+    /// All calls to acquire() and release() must complete before calling deinit().
+    pub fn deinit(self: *Lock) void {
+        assert(self.shared_bit == 0);
+        while (self.queue.get()) |node| cancel node.data;
+    }
+
+    pub async fn acquire(self: *Lock) Held {
+        var my_tick_node: Loop.NextTickNode = undefined;
+
+        s: suspend |handle| {
+            my_tick_node.data = handle;
+            self.queue.put(&my_tick_node);
+
+            // At this point, we are in the queue, so we might have already been resumed and this coroutine
+            // frame might be destroyed. For the rest of the suspend block we cannot access the coroutine frame.
+
+            // We set this bit so that later we can rely on the fact, that if queue_empty_bit is 1, some actor
+            // will attempt to grab the lock.
+            _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+            while (true) {
+                const old_bit = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                if (old_bit != 0) {
+                    // We did not obtain the lock. Trust that our queue entry will resume us, and allow
+                    // suspend to complete.
+                    break;
+                }
+                // We got the lock. However we might have already been resumed from the queue.
+                if (self.queue.get()) |node| {
+                    // Whether this node is us or someone else, we tail resume it.
+                    resume node.data;
+                    break;
+                } else {
+                    // We already got resumed, and there are none left in the queue, which means that
+                    // we aren't even supposed to hold the lock right now.
+                    _ = @atomicRmw(u8, &self.queue_empty_bit, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst);
+                    _ = @atomicRmw(u8, &self.shared_bit, AtomicRmwOp.Xchg, 0, AtomicOrder.SeqCst);
+
+                    // There might be a queue item. If we know the queue is empty, we can be done,
+                    // because the other actor will try to obtain the lock.
+                    // But if there's a queue item, we are the actor which must loop and attempt
+                    // to grab the lock again.
+                    if (@atomicLoad(u8, &self.queue_empty_bit, AtomicOrder.SeqCst) == 1) {
+                        break;
+                    } else {
+                        continue;
+                    }
+                }
+                unreachable;
+            }
+        }
+
+        // TODO this workaround to force my_tick_node to be in the coroutine frame should
+        // not be necessary
+        var trash1 = &my_tick_node;
+
+        return Held{ .lock = self };
+    }
+};
+
+/// Thread-safe async/await lock that protects one piece of data.
+/// Does not make any syscalls - coroutines which are waiting for the lock are suspended, and
+/// are resumed when the lock is released, in order.
+pub fn Locked(comptime T: type) type {
+    return struct {
+        lock: Lock,
+        private_data: T,
+
+        const Self = this;
+
+        pub const HeldLock = struct {
+            value: *T,
+            held: Lock.Held,
+
+            pub fn release(self: HeldLock) void {
+                self.held.release();
+            }
+        };
+
+        pub fn init(loop: *Loop, data: T) Self {
+            return Self{
+                .lock = Lock.init(loop),
+                .private_data = data,
+            };
+        }
+
+        pub fn deinit(self: *Self) void {
+            self.lock.deinit();
+        }
+
+        pub async fn acquire(self: *Self) HeldLock {
+            return HeldLock{
+            // TODO guaranteed allocation elision
+                .held = await (async self.lock.acquire() catch unreachable),
+                .value = &self.private_data,
+            };
+        }
+    };
+}
+
+test "std.event.Lock" {
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
+
+    const allocator = &da.allocator;
+
+    var loop: Loop = undefined;
+    try loop.initMultiThreaded(allocator);
+    defer loop.deinit();
+
+    var lock = Lock.init(&loop);
+    defer lock.deinit();
+
+    const handle = try async<allocator> testLock(&loop, &lock);
+    defer cancel handle;
+    loop.run();
+
+    assert(mem.eql(i32, shared_test_data, [1]i32{3 * 10} ** 10));
+}
+
+async fn testLock(loop: *Loop, lock: *Lock) void {
+    const handle1 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node1 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle1,
+    };
+    loop.onNextTick(&tick_node1);
+
+    const handle2 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node2 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle2,
+    };
+    loop.onNextTick(&tick_node2);
+
+    const handle3 = async lockRunner(lock) catch @panic("out of memory");
+    var tick_node3 = Loop.NextTickNode{
+        .next = undefined,
+        .data = handle3,
+    };
+    loop.onNextTick(&tick_node3);
+
+    await handle1;
+    await handle2;
+    await handle3;
+
+    // TODO this is to force tick node memory to be in the coro frame
+    // there should be a way to make it explicit where the memory is
+    var a = &tick_node1;
+    var b = &tick_node2;
+    var c = &tick_node3;
+}
+
+var shared_test_data = [1]i32{0} ** 10;
+var shared_test_index: usize = 0;
+
+async fn lockRunner(lock: *Lock) void {
+    suspend; // resumed by onNextTick
+
+    var i: usize = 0;
+    while (i < 10) : (i += 1) {
+        const handle = await (async lock.acquire() catch @panic("out of memory"));
+        defer handle.release();
+
+        shared_test_index = 0;
+        while (shared_test_index < shared_test_data.len) : (shared_test_index += 1) {
+            shared_test_data[shared_test_index] = shared_test_data[shared_test_index] + 1;
+        }
+    }
+}
diff --git a/std/heap.zig b/std/heap.zig
index 2e02733da1..bcace34afe 100644
--- a/std/heap.zig
+++ b/std/heap.zig
@@ -38,7 +38,7 @@ fn cFree(self: *Allocator, old_mem: []u8) void {
 }
 
 /// This allocator makes a syscall directly for every allocation and free.
-/// TODO make this thread-safe. The windows implementation will need some atomics.
+/// Thread-safe and lock-free.
 pub const DirectAllocator = struct {
     allocator: Allocator,
     heap_handle: ?HeapHandle,
@@ -74,34 +74,34 @@ pub const DirectAllocator = struct {
                 const alloc_size = if (alignment <= os.page_size) n else n + alignment;
                 const addr = p.mmap(null, alloc_size, p.PROT_READ | p.PROT_WRITE, p.MAP_PRIVATE | p.MAP_ANONYMOUS, -1, 0);
                 if (addr == p.MAP_FAILED) return error.OutOfMemory;
-
                 if (alloc_size == n) return @intToPtr([*]u8, addr)[0..n];
 
-                var aligned_addr = addr & ~usize(alignment - 1);
-                aligned_addr += alignment;
+                const aligned_addr = (addr & ~usize(alignment - 1)) + alignment;
 
-                //We can unmap the unused portions of our mmap, but we must only
-                //  pass munmap bytes that exist outside our allocated pages or it
-                //  will happily eat us too
+                // We can unmap the unused portions of our mmap, but we must only
+                // pass munmap bytes that exist outside our allocated pages or it
+                // will happily eat us too.
 
-                //Since alignment > page_size, we are by definition on a page boundry
+                // Since alignment > page_size, we are by definition on a page boundary.
                 const unused_start = addr;
                 const unused_len = aligned_addr - 1 - unused_start;
 
-                var err = p.munmap(unused_start, unused_len);
-                debug.assert(p.getErrno(err) == 0);
+                const err = p.munmap(unused_start, unused_len);
+                assert(p.getErrno(err) == 0);
 
-                //It is impossible that there is an unoccupied page at the top of our
-                //  mmap.
+                // It is impossible that there is an unoccupied page at the top of our
+                // mmap.
 
                 return @intToPtr([*]u8, aligned_addr)[0..n];
             },
             Os.windows => {
                 const amt = n + alignment + @sizeOf(usize);
-                const heap_handle = self.heap_handle orelse blk: {
+                const optional_heap_handle = @atomicLoad(?HeapHandle, ?self.heap_handle, builtin.AtomicOrder.SeqCst);
+                const heap_handle = optional_heap_handle orelse blk: {
                     const hh = os.windows.HeapCreate(os.windows.HEAP_NO_SERIALIZE, amt, 0) orelse return error.OutOfMemory;
-                    self.heap_handle = hh;
-                    break :blk hh;
+                    const other_hh = @cmpxchgStrong(?HeapHandle, &self.heap_handle, null, hh, builtin.AtomicOrder.SeqCst, builtin.AtomicOrder.SeqCst) orelse break :blk hh;
+                    _ = os.windows.HeapDestroy(hh);
+                    break :blk other_hh;
                 };
                 const ptr = os.windows.HeapAlloc(heap_handle, 0, amt) orelse return error.OutOfMemory;
                 const root_addr = @ptrToInt(ptr);
diff --git a/std/mem.zig b/std/mem.zig
index b52d3e9f68..555e1e249d 100644
--- a/std/mem.zig
+++ b/std/mem.zig
@@ -6,7 +6,7 @@ const builtin = @import("builtin");
 const mem = this;
 
 pub const Allocator = struct {
-    const Error = error{OutOfMemory};
+    pub const Error = error{OutOfMemory};
 
     /// Allocate byte_count bytes and return them in a slice, with the
     /// slice's pointer aligned at least to alignment bytes.
diff --git a/std/os/index.zig b/std/os/index.zig
index 52b36c351c..74a1b64f6e 100644
--- a/std/os/index.zig
+++ b/std/os/index.zig
@@ -2309,6 +2309,30 @@ pub fn linuxEpollWait(epfd: i32, events: []linux.epoll_event, timeout: i32) usiz
     }
 }
 
+pub const LinuxEventFdError = error{
+    InvalidFlagValue,
+    SystemResources,
+    ProcessFdQuotaExceeded,
+    SystemFdQuotaExceeded,
+
+    Unexpected,
+};
+
+pub fn linuxEventFd(initval: u32, flags: u32) LinuxEventFdError!i32 {
+    const rc = posix.eventfd(initval, flags);
+    const err = posix.getErrno(rc);
+    switch (err) {
+        0 => return @intCast(i32, rc),
+        else => return unexpectedErrorPosix(err),
+
+        posix.EINVAL => return LinuxEventFdError.InvalidFlagValue,
+        posix.EMFILE => return LinuxEventFdError.ProcessFdQuotaExceeded,
+        posix.ENFILE => return LinuxEventFdError.SystemFdQuotaExceeded,
+        posix.ENODEV => return LinuxEventFdError.SystemResources,
+        posix.ENOMEM => return LinuxEventFdError.SystemResources,
+    }
+}
+
 pub const PosixGetSockNameError = error{
     /// Insufficient resources were available in the system to perform the operation.
     SystemResources,
@@ -2605,10 +2629,17 @@ pub fn spawnThread(context: var, comptime startFn: var) SpawnThreadError!*Thread
 
     const MainFuncs = struct {
         extern fn linuxThreadMain(ctx_addr: usize) u8 {
-            if (@sizeOf(Context) == 0) {
-                return startFn({});
-            } else {
-                return startFn(@intToPtr(*const Context, ctx_addr).*);
+            const arg = if (@sizeOf(Context) == 0) {} else @intToPtr(*const Context, ctx_addr).*;
+
+            switch (@typeId(@typeOf(startFn).ReturnType)) {
+                builtin.TypeId.Int => {
+                    return startFn(arg);
+                },
+                builtin.TypeId.Void => {
+                    startFn(arg);
+                    return 0;
+                },
+                else => @compileError("expected return type of startFn to be 'u8', 'noreturn', 'void', or '!void'"),
             }
         }
         extern fn posixThreadMain(ctx: ?*c_void) ?*c_void {
diff --git a/std/os/linux/index.zig b/std/os/linux/index.zig
index 65aa659c82..1c15be4887 100644
--- a/std/os/linux/index.zig
+++ b/std/os/linux/index.zig
@@ -523,6 +523,10 @@ pub const CLONE_NEWPID = 0x20000000;
 pub const CLONE_NEWNET = 0x40000000;
 pub const CLONE_IO = 0x80000000;
 
+pub const EFD_SEMAPHORE = 1;
+pub const EFD_CLOEXEC = O_CLOEXEC;
+pub const EFD_NONBLOCK = O_NONBLOCK;
+
 pub const MS_RDONLY = 1;
 pub const MS_NOSUID = 2;
 pub const MS_NODEV = 4;
@@ -1221,6 +1225,10 @@ pub fn epoll_wait(epoll_fd: i32, events: [*]epoll_event, maxevents: u32, timeout
     return syscall4(SYS_epoll_wait, @intCast(usize, epoll_fd), @ptrToInt(events), @intCast(usize, maxevents), @intCast(usize, timeout));
 }
 
+pub fn eventfd(count: u32, flags: u32) usize {
+    return syscall2(SYS_eventfd2, count, flags);
+}
+
 pub fn timerfd_create(clockid: i32, flags: u32) usize {
     return syscall2(SYS_timerfd_create, @intCast(usize, clockid), @intCast(usize, flags));
 }
-- 
cgit v1.2.3


From 9eb51e20ed1a040a617541303db760f80ffd3aa1 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 10:43:29 -0400
Subject: fix crash on @ptrToInt of a *void

closes #1192
---
 src/ir.cpp              | 6 ++++++
 test/compile_errors.zig | 9 +++++++++
 2 files changed, 15 insertions(+)

(limited to 'src/ir.cpp')

diff --git a/src/ir.cpp b/src/ir.cpp
index 98b1bd85ad..5e4c847e14 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19796,6 +19796,12 @@ static TypeTableEntry *ir_analyze_instruction_ptr_to_int(IrAnalyze *ira, IrInstr
         return ira->codegen->builtin_types.entry_invalid;
     }
 
+    if (!type_has_bits(target->value.type)) {
+        ir_add_error(ira, target,
+                buf_sprintf("pointer to size 0 type has no address"));
+        return ira->codegen->builtin_types.entry_invalid;
+    }
+
     if (instr_is_comptime(target)) {
         ConstExprValue *val = ir_resolve_const(ira, target, UndefBad);
         if (!val)
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 9071f0ad7e..4ed65e449d 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,15 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "@ptrToInt on *void",
+        \\export fn entry() bool {
+        \\    return @ptrToInt(&{}) == @ptrToInt(&{});
+        \\}
+    ,
+        ".tmp_source.zig:2:23: error: pointer to size 0 type has no address",
+    );
+
     cases.add(
         "@popCount - non-integer",
         \\export fn entry(x: f32) u32 {
-- 
cgit v1.2.3


From 2ee67b7642cfeef36d8ebbc08080202b5b1d1958 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 11:13:29 -0400
Subject: langref: docs for invalid error set cast and incorrect pointer
 alignment

also add detection of incorrect pointer alignment at compile-time
of pointers that were constructed with `@intToPtr`.
---
 doc/langref.html.in     | 54 ++++++++++++++++++++++++++++++++++++++++++++++---
 src/ir.cpp              |  9 +++++++++
 test/compile_errors.zig | 10 +++++++++
 3 files changed, 70 insertions(+), 3 deletions(-)

(limited to 'src/ir.cpp')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 8eaffb64ad..16e9023f26 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6649,12 +6649,60 @@ pub fn main() void {
       {#header_close#}
 
       {#header_open|Invalid Error Set Cast#}
-      <p>TODO</p>
+      <p>At compile-time:</p>
+      {#code_begin|test_err|error.B not a member of error set 'Set2'#}
+const Set1 = error{
+    A,
+    B,
+};
+const Set2 = error{
+    A,
+    C,
+};
+comptime {
+    _ = @errSetCast(Set2, Set1.B);
+}
+      {#code_end#}
+      <p>At runtime:</p>
+      {#code_begin|exe_err#}
+const Set1 = error{
+    A,
+    B,
+};
+const Set2 = error{
+    A,
+    C,
+};
+pub fn main() void {
+    _ = foo(Set1.B);
+}
+fn foo(set1: Set1) Set2 {
+    return @errSetCast(Set2, set1);
+}
+      {#code_end#}
       {#header_close#}
 
       {#header_open|Incorrect Pointer Alignment#}
-      <p>TODO</p>
-
+      <p>At compile-time:</p>
+      {#code_begin|test_err|pointer address 0x1 is not aligned to 4 bytes#}
+comptime {
+    const ptr = @intToPtr(*i32, 0x1);
+    const aligned = @alignCast(4, ptr);
+}
+      {#code_end#}
+      <p>At runtime:</p>
+      {#code_begin|exe_err#}
+pub fn main() !void {
+    var array align(4) = []u32{ 0x11111111, 0x11111111 };
+    const bytes = @sliceToBytes(array[0..]);
+    if (foo(bytes) != 0x11111111) return error.Wrong;
+}
+fn foo(bytes: []u8) u32 {
+    const slice4 = bytes[1..5];
+    const int_slice = @bytesToSlice(u32, @alignCast(4, slice4));
+    return int_slice[0];
+}
+      {#code_end#}
       {#header_close#}
       {#header_open|Wrong Union Field Access#}
       <p>TODO</p>
diff --git a/src/ir.cpp b/src/ir.cpp
index 5e4c847e14..dcd39ccfe5 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19370,6 +19370,15 @@ static IrInstruction *ir_align_cast(IrAnalyze *ira, IrInstruction *target, uint3
         if (!val)
             return ira->codegen->invalid_instruction;
 
+        if (val->data.x_ptr.special == ConstPtrSpecialHardCodedAddr &&
+            val->data.x_ptr.data.hard_coded_addr.addr % align_bytes != 0)
+        {
+            ir_add_error(ira, target,
+                    buf_sprintf("pointer address 0x%lx is not aligned to %" PRIu32 " bytes",
+                        val->data.x_ptr.data.hard_coded_addr.addr, align_bytes));
+            return ira->codegen->invalid_instruction;
+        }
+
         IrInstruction *result = ir_create_const(&ira->new_irb, target->scope, target->source_node, result_type);
         copy_const_val(&result->value, val, false);
         result->value.type = result_type;
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 4ed65e449d..1b76c01564 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,16 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "bad @alignCast at comptime",
+        \\comptime {
+        \\    const ptr = @intToPtr(*i32, 0x1);
+        \\    const aligned = @alignCast(4, ptr);
+        \\}
+    ,
+        ".tmp_source.zig:3:35: error: pointer address 0x1 is not aligned to 4 bytes",
+    );
+
     cases.add(
         "@ptrToInt on *void",
         \\export fn entry() bool {
-- 
cgit v1.2.3


From 0ac1b83885c7f2a97a8ac25657afcb5c9b80afb4 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Mon, 9 Jul 2018 17:13:31 -0400
Subject: fix non-portable format specifier

---
 src/ir.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/ir.cpp')

diff --git a/src/ir.cpp b/src/ir.cpp
index dcd39ccfe5..505a32247e 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -19374,7 +19374,7 @@ static IrInstruction *ir_align_cast(IrAnalyze *ira, IrInstruction *target, uint3
             val->data.x_ptr.data.hard_coded_addr.addr % align_bytes != 0)
         {
             ir_add_error(ira, target,
-                    buf_sprintf("pointer address 0x%lx is not aligned to %" PRIu32 " bytes",
+                    buf_sprintf("pointer address 0x%" ZIG_PRI_x64 " is not aligned to %" PRIu32 " bytes",
                         val->data.x_ptr.data.hard_coded_addr.addr, align_bytes));
             return ira->codegen->invalid_instruction;
         }
-- 
cgit v1.2.3


From 28f9230b40ee7aa179705c39616aaf2a5f303b73 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 10 Jul 2018 10:12:08 -0400
Subject: fix crash when calling comptime-known undefined function ptr

closes #880
closes #1212
---
 src/ir.cpp              |  2 ++
 test/compile_errors.zig | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'src/ir.cpp')

diff --git a/src/ir.cpp b/src/ir.cpp
index 2dc6ddad2c..10ce3254fd 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -13271,6 +13271,8 @@ static TypeTableEntry *ir_analyze_instruction_call(IrAnalyze *ira, IrInstruction
             return ir_finish_anal(ira, cast_instruction->value.type);
         } else if (fn_ref->value.type->id == TypeTableEntryIdFn) {
             FnTableEntry *fn_table_entry = ir_resolve_fn(ira, fn_ref);
+            if (fn_table_entry == nullptr)
+                return ira->codegen->builtin_types.entry_invalid;
             return ir_analyze_fn_call(ira, call_instruction, fn_table_entry, fn_table_entry->type_entry,
                 fn_ref, nullptr, is_comptime, call_instruction->fn_inline);
         } else if (fn_ref->value.type->id == TypeTableEntryIdBoundFn) {
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index 1b76c01564..a6db8d50b4 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -1,6 +1,19 @@
 const tests = @import("tests.zig");
 
 pub fn addCases(cases: *tests.CompileErrorContext) void {
+    cases.add(
+        "use of comptime-known undefined function value",
+        \\const Cmd = struct {
+        \\    exec: fn () void,
+        \\};
+        \\export fn entry() void {
+        \\    const command = Cmd{ .exec = undefined };
+        \\    command.exec();
+        \\}
+    ,
+        ".tmp_source.zig:6:12: error: use of undefined value",
+    );
+
     cases.add(
         "bad @alignCast at comptime",
         \\comptime {
-- 
cgit v1.2.3


From 0ce6934e2631eb3beca817d3bce12ecb13aafa13 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <superjoe30@gmail.com>
Date: Tue, 10 Jul 2018 11:44:47 -0400
Subject: allow var args calls to async functions

---
 src/ir.cpp         | 53 +++++++++++++++++++++++++++++++++--------------------
 std/event/loop.zig | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 20 deletions(-)

(limited to 'src/ir.cpp')

diff --git a/src/ir.cpp b/src/ir.cpp
index 10ce3254fd..7f7436010e 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -12721,14 +12721,22 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
     // for extern functions, the var args argument is not counted.
     // for zig functions, it is.
     size_t var_args_1_or_0;
-    if (fn_type_id->cc == CallingConventionUnspecified) {
-        var_args_1_or_0 = fn_type_id->is_var_args ? 1 : 0;
-    } else {
+    if (fn_type_id->cc == CallingConventionC) {
         var_args_1_or_0 = 0;
+    } else {
+        var_args_1_or_0 = fn_type_id->is_var_args ? 1 : 0;
     }
     size_t src_param_count = fn_type_id->param_count - var_args_1_or_0;
 
     size_t call_param_count = call_instruction->arg_count + first_arg_1_or_0;
+    for (size_t i = 0; i < call_instruction->arg_count; i += 1) {
+        ConstExprValue *arg_tuple_value = &call_instruction->args[i]->other->value;
+        if (arg_tuple_value->type->id == TypeTableEntryIdArgTuple) {
+            call_param_count -= 1;
+            call_param_count += arg_tuple_value->data.x_arg_tuple.end_index -
+                arg_tuple_value->data.x_arg_tuple.start_index;
+        }
+    }
     AstNode *source_node = call_instruction->base.source_node;
 
     AstNode *fn_proto_node = fn_entry ? fn_entry->proto_node : nullptr;;
@@ -12909,11 +12917,6 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
                 buf_sprintf("calling a generic function requires compile-time known function value"));
             return ira->codegen->builtin_types.entry_invalid;
         }
-        if (call_instruction->is_async && fn_type_id->is_var_args) {
-            ir_add_error(ira, call_instruction->fn_ref,
-                buf_sprintf("compiler bug: TODO: implement var args async functions. https://github.com/ziglang/zig/issues/557"));
-            return ira->codegen->builtin_types.entry_invalid;
-        }
 
         // Count the arguments of the function type id we are creating
         size_t new_fn_arg_count = first_arg_1_or_0;
@@ -12988,18 +12991,18 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
             if (type_is_invalid(arg->value.type))
                 return ira->codegen->builtin_types.entry_invalid;
 
-            AstNode *param_decl_node = fn_proto_node->data.fn_proto.params.at(next_proto_i);
-            assert(param_decl_node->type == NodeTypeParamDecl);
-            bool is_var_args = param_decl_node->data.param_decl.is_var_args;
-            if (is_var_args && !found_first_var_arg) {
-                first_var_arg = inst_fn_type_id.param_count;
-                found_first_var_arg = true;
-            }
-
             if (arg->value.type->id == TypeTableEntryIdArgTuple) {
                 for (size_t arg_tuple_i = arg->value.data.x_arg_tuple.start_index;
                     arg_tuple_i < arg->value.data.x_arg_tuple.end_index; arg_tuple_i += 1)
                 {
+                    AstNode *param_decl_node = fn_proto_node->data.fn_proto.params.at(next_proto_i);
+                    assert(param_decl_node->type == NodeTypeParamDecl);
+                    bool is_var_args = param_decl_node->data.param_decl.is_var_args;
+                    if (is_var_args && !found_first_var_arg) {
+                        first_var_arg = inst_fn_type_id.param_count;
+                        found_first_var_arg = true;
+                    }
+
                     VariableTableEntry *arg_var = get_fn_var_by_index(parent_fn_entry, arg_tuple_i);
                     if (arg_var == nullptr) {
                         ir_add_error(ira, arg,
@@ -13020,10 +13023,20 @@ static TypeTableEntry *ir_analyze_fn_call(IrAnalyze *ira, IrInstructionCall *cal
                         return ira->codegen->builtin_types.entry_invalid;
                     }
                 }
-            } else if (!ir_analyze_fn_call_generic_arg(ira, fn_proto_node, arg, &impl_fn->child_scope,
-                &next_proto_i, generic_id, &inst_fn_type_id, casted_args, impl_fn))
-            {
-                return ira->codegen->builtin_types.entry_invalid;
+            } else {
+                AstNode *param_decl_node = fn_proto_node->data.fn_proto.params.at(next_proto_i);
+                assert(param_decl_node->type == NodeTypeParamDecl);
+                bool is_var_args = param_decl_node->data.param_decl.is_var_args;
+                if (is_var_args && !found_first_var_arg) {
+                    first_var_arg = inst_fn_type_id.param_count;
+                    found_first_var_arg = true;
+                }
+
+                if (!ir_analyze_fn_call_generic_arg(ira, fn_proto_node, arg, &impl_fn->child_scope,
+                    &next_proto_i, generic_id, &inst_fn_type_id, casted_args, impl_fn))
+                {
+                    return ira->codegen->builtin_types.entry_invalid;
+                }
             }
         }
 
diff --git a/std/event/loop.zig b/std/event/loop.zig
index 613d4f48a4..646f15875f 100644
--- a/std/event/loop.zig
+++ b/std/event/loop.zig
@@ -360,6 +360,28 @@ pub const Loop = struct {
         }
     }
 
+    /// This is equivalent to an async call, except instead of beginning execution of the async function,
+    /// it immediately returns to the caller, and the async function is queued in the event loop. It still
+    /// returns a promise to be awaited.
+    pub fn call(self: *Loop, comptime func: var, args: ...) !(promise->@typeOf(func).ReturnType) {
+        const S = struct {
+            async fn asyncFunc(loop: *Loop, handle: *promise->@typeOf(func).ReturnType, args2: ...) @typeOf(func).ReturnType {
+                suspend |p| {
+                    handle.* = p;
+                    var my_tick_node = Loop.NextTickNode{
+                        .next = undefined,
+                        .data = p,
+                    };
+                    loop.onNextTick(&my_tick_node);
+                }
+                // TODO guaranteed allocation elision for await in same func as async
+                return await (async func(args2) catch unreachable);
+            }
+        };
+        var handle: promise->@typeOf(func).ReturnType = undefined;
+        return async<self.allocator> S.asyncFunc(self, &handle, args);
+    }
+
     fn workerRun(self: *Loop) void {
         start_over: while (true) {
             if (@atomicRmw(u8, &self.dispatch_lock, AtomicRmwOp.Xchg, 1, AtomicOrder.SeqCst) == 0) {
@@ -575,3 +597,33 @@ test "std.event.Loop - basic" {
 
     loop.run();
 }
+
+test "std.event.Loop - call" {
+    var da = std.heap.DirectAllocator.init();
+    defer da.deinit();
+
+    const allocator = &da.allocator;
+
+    var loop: Loop = undefined;
+    try loop.initMultiThreaded(allocator);
+    defer loop.deinit();
+
+    var did_it = false;
+    const handle = try loop.call(testEventLoop);
+    const handle2 = try loop.call(testEventLoop2, handle, &did_it);
+    defer cancel handle2;
+
+    loop.run();
+
+    assert(did_it);
+}
+
+async fn testEventLoop() i32 {
+    return 1234;
+}
+
+async fn testEventLoop2(h: promise->i32, did_it: *bool) void {
+    const value = await h;
+    assert(value == 1234);
+    did_it.* = true;
+}
-- 
cgit v1.2.3