1 files changed, 162 insertions, 179 deletions
diff --git a/src/ir.cpp b/src/ir.cpp
index f62a58e37e..cbc00f0cfe 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -11049,6 +11049,19 @@ static ZigType *ir_resolve_type(IrAnalyze *ira, IrInstruction *type_value) {
     return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val);
 }
 
+static ZigType *ir_resolve_vector_elem_type(IrAnalyze *ira, IrInstruction *elem_type_value) {
+    ZigType *elem_type = ir_resolve_type(ira, elem_type_value);
+    if (type_is_invalid(elem_type))
+        return ira->codegen->builtin_types.entry_invalid;
+    if (!is_valid_vector_elem_type(elem_type)) {
+        ir_add_error(ira, elem_type_value,
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+                buf_ptr(&elem_type->name)));
+        return ira->codegen->builtin_types.entry_invalid;
+    }
+    return elem_type;
+}
+
 static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
     ZigType *ty = ir_resolve_type(ira, type_value);
     if (type_is_invalid(ty))
@@ -22096,242 +22109,212 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
     if (!ir_resolve_unsigned(ira, instruction->len->child, ira->codegen->builtin_types.entry_u32, &len))
         return ira->codegen->invalid_instruction;
 
-    ZigType *elem_type = ir_resolve_type(ira, instruction->elem_type->child);
+    ZigType *elem_type = ir_resolve_vector_elem_type(ira, instruction->elem_type->child);
     if (type_is_invalid(elem_type))
         return ira->codegen->invalid_instruction;
 
-    if (!is_valid_vector_elem_type(elem_type)) {
-        ir_add_error(ira, instruction->elem_type,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&elem_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
     ZigType *vector_type = get_vector_type(ira->codegen, len, elem_type);
 
     return ir_const_type(ira, &instruction->base, vector_type);
 }
 
 static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
-    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) {
-    assert(source_instr && scalar_type && a && b && mask);
-    assert(scalar_type->id == ZigTypeIdBool ||
-           scalar_type->id == ZigTypeIdInt ||
-           scalar_type->id == ZigTypeIdFloat ||
-           scalar_type->id == ZigTypeIdPointer);
-
-    ZigType *mask_type = mask->value.type;
-    if (type_is_invalid(mask_type))
+    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+    ir_assert(source_instr && scalar_type && a && b && mask, source_instr);
+    ir_assert(is_valid_vector_elem_type(scalar_type), source_instr);
+
+    uint32_t len_mask;
+    if (mask->value.type->id == ZigTypeIdVector) {
+        len_mask = mask->value.type->data.vector.len;
+    } else if (mask->value.type->id == ZigTypeIdArray) {
+        len_mask = mask->value.type->data.array.len;
+    } else {
+        ir_add_error(ira, mask,
+            buf_sprintf("expected vector or array, found '%s'",
+                buf_ptr(&mask->value.type->name)));
         return ira->codegen->invalid_instruction;
-
-    const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'";
-
-    if (mask_type->id == ZigTypeIdArray) {
-        ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type);
-        mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type);
-        if (!mask)
-            return ira->codegen->invalid_instruction;
-        mask_type = vector_type;
     }
+    mask = ir_implicit_cast(ira, mask, get_vector_type(ira->codegen, len_mask,
+                ira->codegen->builtin_types.entry_i32));
+    if (type_is_invalid(mask->value.type))
+        return ira->codegen->invalid_instruction;
 
-    if (mask_type->id != ZigTypeIdVector) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+    uint32_t len_a;
+    if (a->value.type->id == ZigTypeIdVector) {
+        len_a = a->value.type->data.vector.len;
+    } else if (a->value.type->id == ZigTypeIdArray) {
+        len_a = a->value.type->data.array.len;
+    } else if (a->value.type->id == ZigTypeIdUndefined) {
+        len_a = UINT32_MAX;
+    } else {
+        ir_add_error(ira, a,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&a->value.type->name)));
         return ira->codegen->invalid_instruction;
     }
 
-    ZigType *mask_scalar_type = mask_type->data.array.child_type;
-    if (mask_scalar_type->id != ZigTypeIdInt) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+    uint32_t len_b;
+    if (b->value.type->id == ZigTypeIdVector) {
+        len_b = b->value.type->data.vector.len;
+    } else if (b->value.type->id == ZigTypeIdArray) {
+        len_b = b->value.type->data.array.len;
+    } else if (b->value.type->id == ZigTypeIdUndefined) {
+        len_b = UINT32_MAX;
+    } else {
+        ir_add_error(ira, b,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&b->value.type->name)));
         return ira->codegen->invalid_instruction;
     }
 
-    if (mask_scalar_type->data.integral.bit_count != 32 ||
-        mask_scalar_type->data.integral.is_signed == false) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
-        return ira->codegen->invalid_instruction;
+    if (len_a == UINT32_MAX && len_b == UINT32_MAX) {
+        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_mask, scalar_type));
     }
 
-    uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len;
-    if (a->value.type->id != ZigTypeIdVector) {
-        if (a->value.type->id != ZigTypeIdUndefined) {
-            ir_add_error(ira, a,
-                buf_sprintf("expected vector of element type '%s' got '%s'",
-                    buf_ptr(&scalar_type->name),
-                    buf_ptr(&a->value.type->name)));
+    if (len_a == UINT32_MAX) {
+        len_a = len_b;
+        a = ir_const_undef(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+    } else {
+        a = ir_implicit_cast(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+        if (type_is_invalid(a->value.type))
             return ira->codegen->invalid_instruction;
-        }
+    }
+
+    if (len_b == UINT32_MAX) {
+        len_b = len_a;
+        b = ir_const_undef(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
     } else {
-        len_a = a->value.type->data.vector.len;
+        b = ir_implicit_cast(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+        if (type_is_invalid(b->value.type))
+            return ira->codegen->invalid_instruction;
     }
 
-    if (b->value.type->id != ZigTypeIdVector) {
-        if (b->value.type->id != ZigTypeIdUndefined) {
-            ir_add_error(ira, b,
-                buf_sprintf("expected vector of element type '%s' got '%s'",
-                    buf_ptr(&scalar_type->name),
-                    buf_ptr(&b->value.type->name)));
+    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
+    if (mask_val == nullptr)
+        return ira->codegen->invalid_instruction;
+
+    expand_undef_array(ira->codegen, mask_val);
+
+    for (uint32_t i = 0; i < len_mask; i += 1) {
+        ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+        if (mask_elem_val->special == ConstValSpecialUndef)
+            continue;
+        int32_t v_i32 = bigint_as_signed(&mask_elem_val->data.x_bigint);
+        uint32_t v;
+        IrInstruction *chosen_operand;
+        if (v_i32 >= 0) {
+            v = (uint32_t)v_i32;
+            chosen_operand = a;
+        } else {
+            v = (uint32_t)~v_i32;
+            chosen_operand = b;
+        }
+        if (v >= chosen_operand->value.type->data.vector.len) {
+            ErrorMsg *msg = ir_add_error(ira, mask,
+                buf_sprintf("mask index '%u' has out-of-bounds selection", i));
+            add_error_note(ira->codegen, msg, chosen_operand->source_node,
+                buf_sprintf("selected index '%u' out of bounds of %s", v,
+                    buf_ptr(&chosen_operand->value.type->name)));
+            if (chosen_operand == a && v < len_a + len_b) {
+                add_error_note(ira->codegen, msg, b->source_node,
+                    buf_create_from_str("selections from the second vector are specified with negative numbers"));
+            }
             return ira->codegen->invalid_instruction;
         }
-    } else {
-        len_b = b->value.type->data.vector.len;
     }
 
-    if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) {
-        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type));
-    }
+    ZigType *result_type = get_vector_type(ira->codegen, len_mask, scalar_type);
+    if (instr_is_comptime(a) && instr_is_comptime(b)) {
+        ConstExprValue *a_val = ir_resolve_const(ira, a, UndefOk);
+        if (a_val == nullptr)
+            return ira->codegen->invalid_instruction;
 
-    // undefined is a vector up to length of the other vector.
-    if (a->value.type->id == ZigTypeIdUndefined) {
-        a = ir_const_undef(ira, a, b->value.type);
-        len_a = b->value.type->data.vector.len;
-    } else if (b->value.type->id == ZigTypeIdUndefined) {
-        b = ir_const_undef(ira, b, a->value.type);
-        len_b = a->value.type->data.vector.len;
-    }
+        ConstExprValue *b_val = ir_resolve_const(ira, b, UndefOk);
+        if (b_val == nullptr)
+            return ira->codegen->invalid_instruction;
 
-    // FIXME I think this needs to be more sophisticated
-    if (a->value.type->data.vector.elem_type != scalar_type) {
-        ir_add_error(ira, a,
-            buf_sprintf("element type '%s' does not match '%s'",
-                buf_ptr(&a->value.type->data.vector.elem_type->name),
-                buf_ptr(&scalar_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-    if (b->value.type->data.vector.elem_type != scalar_type) {
-        ir_add_error(ira, b,
-            buf_sprintf("element type '%s' does not match '%s'",
-                buf_ptr(&b->value.type->data.vector.elem_type->name),
-                buf_ptr(&scalar_type->name)));
-        return ira->codegen->invalid_instruction;
+        expand_undef_array(ira->codegen, a_val);
+        expand_undef_array(ira->codegen, b_val);
+
+        IrInstruction *result = ir_const(ira, source_instr, result_type);
+        result->value.data.x_array.data.s_none.elements = create_const_vals(len_mask);
+        for (uint32_t i = 0; i < mask_val->type->data.vector.len; i += 1) {
+            ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+            ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+            if (mask_elem_val->special == ConstValSpecialUndef) {
+                result_elem_val->special = ConstValSpecialUndef;
+                continue;
+            }
+            int32_t v = bigint_as_signed(&mask_elem_val->data.x_bigint);
+            // We've already checked for and emitted compile errors for index out of bounds here.
+            ConstExprValue *src_elem_val = (v >= 0) ?
+                &a->value.data.x_array.data.s_none.elements[v] :
+                &b->value.data.x_array.data.s_none.elements[~v];
+            copy_const_val(result_elem_val, src_elem_val, false);
+
+            ir_assert(result_elem_val->special == ConstValSpecialStatic, source_instr);
+        }
+        result->value.special = ConstValSpecialStatic;
+        return result;
     }
 
-    if (a->value.type != b->value.type) {
-        assert(len_a != len_b);
-        uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b);
-        bool expand_b = len_b < len_a;
+    // All static analysis passed, and not comptime.
+    // For runtime codegen, vectors a and b must be the same length. Here we
+    // recursively @shuffle the smaller vector to append undefined elements
+    // to it up to the length of the longer vector. This recursion terminates
+    // in 1 call because these calls to ir_analyze_shuffle_vector guarantee
+    // len_a == len_b.
+    if (len_a != len_b) {
+        uint32_t len_min = min(len_a, len_b);
+        uint32_t len_max = max(len_a, len_b);
+
         IrInstruction *expand_mask = ir_const(ira, mask,
             get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
         expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
         uint32_t i = 0;
-        for (; i < len_min; i++)
+        for (; i < len_min; i += 1)
             bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
-        for (; i < len_max; i++)
+        for (; i < len_max; i += 1)
             bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
+
         IrInstruction *undef = ir_const_undef(ira, source_instr,
             get_vector_type(ira->codegen, len_min, scalar_type));
-        if (expand_b) {
-            if (instr_is_comptime(b)) {
-                ConstExprValue *old = b->value.data.x_array.data.s_none.elements;
-                b->value.data.x_array.data.s_none.elements =
-                    allocate<ConstExprValue>(len_a);
-                memcpy(b->value.data.x_array.data.s_none.elements, old,
-                    b->value.type->data.vector.len * sizeof(ConstExprValue));
-            } else {
-                b = ir_build_shuffle_vector(&ira->new_irb,
-                    source_instr->scope, source_instr->source_node,
-                    nullptr, b, undef, expand_mask);
-                b->value.special = ConstValSpecialRuntime;
-            }
-            b->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
-        } else {
-            if (instr_is_comptime(a)) {
-                ConstExprValue *old = a->value.data.x_array.data.s_none.elements;
-                a->value.data.x_array.data.s_none.elements =
-                    allocate<ConstExprValue>(len_b);
-                memcpy(a->value.data.x_array.data.s_none.elements, old,
-                    a->value.type->data.vector.len * sizeof(ConstExprValue));
-            } else {
-                a = ir_build_shuffle_vector(&ira->new_irb,
-                    source_instr->scope, source_instr->source_node,
-                    nullptr, a, undef, expand_mask);
-                a->value.special = ConstValSpecialRuntime;
-            }
-            a->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
-        }
-    }
-    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
-    if (!mask_val) {
-        ir_add_error(ira, mask,
-            buf_sprintf("mask must be comptime"));
-        return ira->codegen->invalid_instruction;
-    }
-    for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
-        if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
-            continue;
-        int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-        if (v >= 0 && (uint64_t)v + 1 > len_a) {
-            ErrorMsg *msg = ir_add_error(ira, mask,
-                buf_sprintf("mask index out of bounds"));
-            add_error_note(ira->codegen, msg, mask->source_node,
-                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
-            if ((uint64_t)v <= len_a + len_b)
-                add_error_note(ira->codegen, msg, mask->source_node,
-                    buf_sprintf("selections from the second vector are specified with negative numbers"));
-        } else if (v < 0 && (uint64_t)~v + 1 > len_b) {
-            ErrorMsg *msg = ir_add_error(ira, mask,
-                buf_sprintf("mask index out of bounds"));
-            add_error_note(ira->codegen, msg, mask->source_node,
-                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
-        }
-        else
-            continue;
-        return ira->codegen->invalid_instruction;
-    }
 
-    ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type);
-    if (instr_is_comptime(a) &&
-        instr_is_comptime(b)) {
-        IrInstruction *result = ir_const(ira, source_instr, result_type);
-        result->value.data.x_array.data.s_none.elements = create_const_vals(len_c);
-        for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
-            if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
-                result->value.data.x_array.data.s_none.elements[i].special =
-                    ConstValSpecialUndef;
-            int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-            if (v >= 0)
-                result->value.data.x_array.data.s_none.elements[i] =
-                    a->value.data.x_array.data.s_none.elements[v];
-            else if (v < 0)
-                result->value.data.x_array.data.s_none.elements[i] =
-                    b->value.data.x_array.data.s_none.elements[~v];
-            else
-                zig_unreachable();
-            result->value.data.x_array.data.s_none.elements[i].special =
-                ConstValSpecialStatic;
+        if (len_b < len_a) {
+            b = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, b, undef, expand_mask);
+        } else {
+            a = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, a, undef, expand_mask);
         }
-        result->value.special = ConstValSpecialStatic;
-        return result;
     }
 
-    // All static analysis passed, and not comptime
     IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
         source_instr->scope, source_instr->source_node,
         nullptr, a, b, mask);
     result->value.type = result_type;
-    result->value.special = ConstValSpecialRuntime;
     return result;
 }
 
 static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
-    ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type);
-    assert(scalar_type);
+    ZigType *scalar_type = ir_resolve_vector_elem_type(ira, instruction->scalar_type);
     if (type_is_invalid(scalar_type))
         return ira->codegen->invalid_instruction;
 
-    if (scalar_type->id != ZigTypeIdBool &&
-        scalar_type->id != ZigTypeIdInt &&
-        scalar_type->id != ZigTypeIdFloat &&
-        scalar_type->id != ZigTypeIdPointer) {
-        ir_add_error(ira, instruction->scalar_type,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&scalar_type->name)));
+    IrInstruction *a = instruction->a->child;
+    if (type_is_invalid(a->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *b = instruction->b->child;
+    if (type_is_invalid(b->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *mask = instruction->mask->child;
+    if (type_is_invalid(mask->value.type))
         return ira->codegen->invalid_instruction;
-    }
 
-    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child);
+    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
 }
 
 static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {