From 193604c837df75ab0c3fa5860f8b234263fe5b50 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Sat, 29 Jun 2019 11:32:26 -0500
Subject: stage1: add @shuffle() shufflevector support

I change the semantics of the mask operand, to make it a little more
flexible. There is no real danger in this because it is a compile-error
if you do it the LLVM way (and there is an appropiate error to tell you
this).

v2: avoid problems with double-free
---
 doc/langref.html.in | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'doc')
diff --git a/doc/langref.html.in b/doc/langref.html.in
index 374fbfcde5..7ae0ee7c1c 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8226,6 +8226,28 @@ fn foo(comptime T: type, ptr: *T) T {
       {#link|pointer|Pointers#}.
       </p>
       {#header_close#}
+
+      {#header_open|@shuffle#}
+      <pre>{#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}</pre>
+      <p>
+      Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#}
+      (and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}.
+      Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select
+      from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#}
+      operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#}
+      value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#}
+      then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and
+      the relevent <a href="https://llvm.org/docs/LangRef.html#i-shufflevector">LLVM Documentation on
+      {#syntax#}shufflevector{#endsyntax#}</a>, although note that the mask values are interpreted differently than in LLVM-IR.
+      Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match.
+      The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector,
+      and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an
+      {#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.</p>
+      <p>
+      {#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a
+      {#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length.
+      </p>
+      {#header_close#}
       {#header_close#}
 
       {#header_open|Build Mode#}
-- 
cgit v1.2.3


From 2038f4d45a597cc672380c0a5fc8dd98e928d24c Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 18 Sep 2019 15:41:56 -0400
Subject: rework the implementation

 * update documentation
   - move `@shuffle` to be sorted alphabetically
   - remove mention of LLVM
   - minor clarifications & rewording
 * introduce ir_resolve_vector_elem_type to avoid duplicate compile
   error message and duplicate vector element checking logic
 * rework ir_analyze_shuffle_vector to solve various issues
 * improve `@shuffle` to allow implicit cast of arrays
 * the shuffle tests weren't being run
---
 doc/langref.html.in              |  59 ++++---
 src/codegen.cpp                  |  19 +--
 src/ir.cpp                       | 341 +++++++++++++++++++--------------------
 test/compile_errors.zig          |  14 +-
 test/stage1/behavior.zig         |   1 +
 test/stage1/behavior/shuffle.zig |  32 ++--
 6 files changed, 233 insertions(+), 233 deletions(-)

(limited to 'doc')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 7ae0ee7c1c..8a303640e6 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -7673,6 +7673,43 @@ test "@setRuntimeSafety" {
       {#see_also|@shlExact|@shlWithOverflow#}
       {#header_close#}
 
+      {#header_open|@shuffle#}
+      <pre>{#syntax#}@shuffle(comptime E: type, a: @Vector(a_len, E), b: @Vector(b_len, E), comptime mask: @Vector(mask_len, i32)) @Vector(mask_len, E){#endsyntax#}</pre>
+      <p>
+      Constructs a new {#link|vector|Vectors#} by selecting elements from {#syntax#}a{#endsyntax#} and
+      {#syntax#}b{#endsyntax#} based on {#syntax#}mask{#endsyntax#}.
+      </p>
+      <p>
+      Each element in {#syntax#}mask{#endsyntax#} selects an element from either {#syntax#}a{#endsyntax#} or
+      {#syntax#}b{#endsyntax#}. Positive numbers select from {#syntax#}a{#endsyntax#} starting at 0.
+      Negative values select from {#syntax#}b{#endsyntax#}, starting at {#syntax#}-1{#endsyntax#} and going down.
+      It is recommended to use the {#syntax#}~{#endsyntax#} operator from indexes from {#syntax#}b{#endsyntax#}
+      so that both indexes can start from {#syntax#}0{#endsyntax#} (i.e. {#syntax#}~i32(0){#endsyntax#} is
+      {#syntax#}-1{#endsyntax#}).
+      </p>
+      <p>
+      For each element of {#syntax#}mask{#endsyntax#}, if it or the selected value from
+      {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#},
+      then the resulting element is {#syntax#}undefined{#endsyntax#}.
+      </p>
+      <p>
+      {#syntax#}a_len{#endsyntax#} and {#syntax#}b_len{#endsyntax#} may differ in length. Out-of-bounds element
+      indexes in {#syntax#}mask{#endsyntax#} result in compile errors.
+      </p>
+      <p>
+      If {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} is {#syntax#}undefined{#endsyntax#}, it
+      is equivalent to a vector of all {#syntax#}undefined{#endsyntax#} with the same length as the other vector.
+      If both vectors are {#syntax#}undefined{#endsyntax#}, {#syntax#}@shuffle{#endsyntax#} returns
+      a vector with all elements {#syntax#}undefined{#endsyntax#}.
+      </p>
+      <p>
+      {#syntax#}E{#endsyntax#} must be an {#link|integer|Integers#}, {#link|float|Floats#},
+      {#link|pointer|Pointers#}, or {#syntax#}bool{#endsyntax#}. The mask may be any vector length, and its
+      length determines the result length.
+      </p>
+      {#see_also|SIMD#}
+      {#header_close#}
+
       {#header_open|@sizeOf#}
       <pre>{#syntax#}@sizeOf(comptime T: type) comptime_int{#endsyntax#}</pre>
       <p>
@@ -8226,28 +8263,6 @@ fn foo(comptime T: type, ptr: *T) T {
       {#link|pointer|Pointers#}.
       </p>
       {#header_close#}
-
-      {#header_open|@shuffle#}
-      <pre>{#syntax#}@shuffle(comptime ElemType: type, a: @Vector(_, ElemType), b: @Vector(_, ElemType), comptime mask: @Vector(_, u32)) @Vector(mask.len, ElemType){#endsyntax#}</pre>
-      <p>
-      Does the {#syntax#}shufflevector{#endsyntax#} instruction. Each element in {#syntax#}comptime{#endsyntax#}
-      (and always {#syntax#}i32{#endsyntax#}) {#syntax#}mask{#endsyntax#} selects a element from either {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#}.
-      Positive numbers select from {#syntax#}a{#endsyntax#} (starting at 0), while negative values select
-      from {#syntax#}b{#endsyntax#} (starting at -1 and going down). It is recommended to use the {#syntax#}~{#endsyntax#}
-      operator from indexes from b so that both indexes can start from 0 (i.e. ~0 is -1). If either the {#syntax#}mask{#endsyntax#}
-      value or the value from {#syntax#}a{#endsyntax#} or {#syntax#}b{#endsyntax#} that it selects are {#syntax#}undefined{#endsyntax#}
-      then the resulting value is {#syntax#}undefined{#endsyntax#}. Also see {#link|SIMD#} and
-      the relevent <a href="https://llvm.org/docs/LangRef.html#i-shufflevector">LLVM Documentation on
-      {#syntax#}shufflevector{#endsyntax#}</a>, although note that the mask values are interpreted differently than in LLVM-IR.
-      Also, unlike LLVM-IR, the number of elements in {#syntax#}a{#endsyntax#} and {#syntax#}b{#endsyntax#} do not have to match.
-      The {#syntax#}undefined{#endsyntax#} identifier can be selected from up to the length of the other vector,
-      and yields {#syntax#}undefined{#endsyntax#}. If both vectors are {#syntax#}undefined{#endsyntax#}, yields an
-      {#syntax#}undefined{#endsyntax#} {#syntax#}ElemType{#endsyntax#} vector with length of {#syntax#}mask{#endsyntax#}.</p>
-      <p>
-      {#syntax#}ElemType{#endsyntax#} must be an {#link|integer|Integers#}, a {#link|float|Floats#}, or a
-      {#link|pointer|Pointers#}. The mask may be any vector length that the target supports, and its' length determines the result length.
-      </p>
-      {#header_close#}
       {#header_close#}
 
       {#header_open|Build Mode#}
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 2f1488635a..7676b3bbd0 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4583,7 +4583,7 @@ static LLVMValueRef ir_render_ctz(CodeGen *g, IrExecutable *executable, IrInstru
 
 static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executable, IrInstructionShuffleVector *instruction) {
     uint64_t len_a = instruction->a->value.type->data.vector.len;
-    uint64_t len_c = instruction->mask->value.type->data.vector.len;
+    uint64_t len_mask = instruction->mask->value.type->data.vector.len;
 
     // LLVM uses integers larger than the length of the first array to
     // index into the second array. This was deemed unnecessarily fragile
@@ -4591,23 +4591,24 @@ static LLVMValueRef ir_render_shuffle_vector(CodeGen *g, IrExecutable *executabl
     // second vector. These start at -1 and go down, and are easiest to use
     // with the ~ operator. Here we convert between the two formats.
     IrInstruction *mask = instruction->mask;
-    LLVMValueRef *values = allocate<LLVMValueRef>(len_c);
-    for (uint64_t i = 0;i < len_c;i++) {
+    LLVMValueRef *values = allocate<LLVMValueRef>(len_mask);
+    for (uint64_t i = 0; i < len_mask; i++) {
         if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef) {
             values[i] = LLVMGetUndef(LLVMInt32Type());
         } else {
-            int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-            if (v < 0)
-                v = (uint32_t)~v + (uint32_t)len_a;
-            values[i] = LLVMConstInt(LLVMInt32Type(), v, false);
+            int32_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
+            uint32_t index_val = (v >= 0) ? (uint32_t)v : (uint32_t)~v + (uint32_t)len_a;
+            values[i] = LLVMConstInt(LLVMInt32Type(), index_val, false);
         }
     }
 
+    LLVMValueRef llvm_mask_value = LLVMConstVector(values, len_mask);
+    free(values);
+
     return LLVMBuildShuffleVector(g->builder,
         ir_llvm_value(g, instruction->a),
         ir_llvm_value(g, instruction->b),
-        LLVMConstVector(values, len_c),
-        "");
+        llvm_mask_value, "");
 }
 
 static LLVMValueRef ir_render_pop_count(CodeGen *g, IrExecutable *executable, IrInstructionPopCount *instruction) {
diff --git a/src/ir.cpp b/src/ir.cpp
index f62a58e37e..cbc00f0cfe 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -11049,6 +11049,19 @@ static ZigType *ir_resolve_type(IrAnalyze *ira, IrInstruction *type_value) {
     return ir_resolve_const_type(ira->codegen, ira->new_irb.exec, type_value->source_node, val);
 }
 
+static ZigType *ir_resolve_vector_elem_type(IrAnalyze *ira, IrInstruction *elem_type_value) {
+    ZigType *elem_type = ir_resolve_type(ira, elem_type_value);
+    if (type_is_invalid(elem_type))
+        return ira->codegen->builtin_types.entry_invalid;
+    if (!is_valid_vector_elem_type(elem_type)) {
+        ir_add_error(ira, elem_type_value,
+            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
+                buf_ptr(&elem_type->name)));
+        return ira->codegen->builtin_types.entry_invalid;
+    }
+    return elem_type;
+}
+
 static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
     ZigType *ty = ir_resolve_type(ira, type_value);
     if (type_is_invalid(ty))
@@ -22096,242 +22109,212 @@ static IrInstruction *ir_analyze_instruction_vector_type(IrAnalyze *ira, IrInstr
     if (!ir_resolve_unsigned(ira, instruction->len->child, ira->codegen->builtin_types.entry_u32, &len))
         return ira->codegen->invalid_instruction;
 
-    ZigType *elem_type = ir_resolve_type(ira, instruction->elem_type->child);
+    ZigType *elem_type = ir_resolve_vector_elem_type(ira, instruction->elem_type->child);
     if (type_is_invalid(elem_type))
         return ira->codegen->invalid_instruction;
 
-    if (!is_valid_vector_elem_type(elem_type)) {
-        ir_add_error(ira, instruction->elem_type,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&elem_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-
     ZigType *vector_type = get_vector_type(ira->codegen, len, elem_type);
 
     return ir_const_type(ira, &instruction->base, vector_type);
 }
 
 static IrInstruction *ir_analyze_shuffle_vector(IrAnalyze *ira, IrInstruction *source_instr,
-    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask) {
-    assert(source_instr && scalar_type && a && b && mask);
-    assert(scalar_type->id == ZigTypeIdBool ||
-           scalar_type->id == ZigTypeIdInt ||
-           scalar_type->id == ZigTypeIdFloat ||
-           scalar_type->id == ZigTypeIdPointer);
-
-    ZigType *mask_type = mask->value.type;
-    if (type_is_invalid(mask_type))
+    ZigType *scalar_type, IrInstruction *a, IrInstruction *b, IrInstruction *mask)
+{
+    ir_assert(source_instr && scalar_type && a && b && mask, source_instr);
+    ir_assert(is_valid_vector_elem_type(scalar_type), source_instr);
+
+    uint32_t len_mask;
+    if (mask->value.type->id == ZigTypeIdVector) {
+        len_mask = mask->value.type->data.vector.len;
+    } else if (mask->value.type->id == ZigTypeIdArray) {
+        len_mask = mask->value.type->data.array.len;
+    } else {
+        ir_add_error(ira, mask,
+            buf_sprintf("expected vector or array, found '%s'",
+                buf_ptr(&mask->value.type->name)));
         return ira->codegen->invalid_instruction;
-
-    const char *shuffle_mask_fail_fmt = "@shuffle mask operand must be a vector of signed 32-bit integers, got '%s'";
-
-    if (mask_type->id == ZigTypeIdArray) {
-        ZigType *vector_type = get_vector_type(ira->codegen, mask_type->data.array.len, mask_type->data.array.child_type);
-        mask = ir_analyze_array_to_vector(ira, mask, mask, vector_type);
-        if (!mask)
-            return ira->codegen->invalid_instruction;
-        mask_type = vector_type;
     }
+    mask = ir_implicit_cast(ira, mask, get_vector_type(ira->codegen, len_mask,
+                ira->codegen->builtin_types.entry_i32));
+    if (type_is_invalid(mask->value.type))
+        return ira->codegen->invalid_instruction;
 
-    if (mask_type->id != ZigTypeIdVector) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+    uint32_t len_a;
+    if (a->value.type->id == ZigTypeIdVector) {
+        len_a = a->value.type->data.vector.len;
+    } else if (a->value.type->id == ZigTypeIdArray) {
+        len_a = a->value.type->data.array.len;
+    } else if (a->value.type->id == ZigTypeIdUndefined) {
+        len_a = UINT32_MAX;
+    } else {
+        ir_add_error(ira, a,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&a->value.type->name)));
         return ira->codegen->invalid_instruction;
     }
 
-    ZigType *mask_scalar_type = mask_type->data.array.child_type;
-    if (mask_scalar_type->id != ZigTypeIdInt) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
+    uint32_t len_b;
+    if (b->value.type->id == ZigTypeIdVector) {
+        len_b = b->value.type->data.vector.len;
+    } else if (b->value.type->id == ZigTypeIdArray) {
+        len_b = b->value.type->data.array.len;
+    } else if (b->value.type->id == ZigTypeIdUndefined) {
+        len_b = UINT32_MAX;
+    } else {
+        ir_add_error(ira, b,
+            buf_sprintf("expected vector or array with element type '%s', found '%s'",
+                buf_ptr(&scalar_type->name),
+                buf_ptr(&b->value.type->name)));
         return ira->codegen->invalid_instruction;
     }
 
-    if (mask_scalar_type->data.integral.bit_count != 32 ||
-        mask_scalar_type->data.integral.is_signed == false) {
-        ir_add_error(ira, mask,
-            buf_sprintf(shuffle_mask_fail_fmt, buf_ptr(&mask->value.type->name)));
-        return ira->codegen->invalid_instruction;
+    if (len_a == UINT32_MAX && len_b == UINT32_MAX) {
+        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_mask, scalar_type));
     }
 
-    uint64_t len_a, len_b, len_c = mask->value.type->data.vector.len;
-    if (a->value.type->id != ZigTypeIdVector) {
-        if (a->value.type->id != ZigTypeIdUndefined) {
-            ir_add_error(ira, a,
-                buf_sprintf("expected vector of element type '%s' got '%s'",
-                    buf_ptr(&scalar_type->name),
-                    buf_ptr(&a->value.type->name)));
+    if (len_a == UINT32_MAX) {
+        len_a = len_b;
+        a = ir_const_undef(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+    } else {
+        a = ir_implicit_cast(ira, a, get_vector_type(ira->codegen, len_a, scalar_type));
+        if (type_is_invalid(a->value.type))
             return ira->codegen->invalid_instruction;
-        }
+    }
+
+    if (len_b == UINT32_MAX) {
+        len_b = len_a;
+        b = ir_const_undef(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
     } else {
-        len_a = a->value.type->data.vector.len;
+        b = ir_implicit_cast(ira, b, get_vector_type(ira->codegen, len_b, scalar_type));
+        if (type_is_invalid(b->value.type))
+            return ira->codegen->invalid_instruction;
     }
 
-    if (b->value.type->id != ZigTypeIdVector) {
-        if (b->value.type->id != ZigTypeIdUndefined) {
-            ir_add_error(ira, b,
-                buf_sprintf("expected vector of element type '%s' got '%s'",
-                    buf_ptr(&scalar_type->name),
-                    buf_ptr(&b->value.type->name)));
+    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
+    if (mask_val == nullptr)
+        return ira->codegen->invalid_instruction;
+
+    expand_undef_array(ira->codegen, mask_val);
+
+    for (uint32_t i = 0; i < len_mask; i += 1) {
+        ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+        if (mask_elem_val->special == ConstValSpecialUndef)
+            continue;
+        int32_t v_i32 = bigint_as_signed(&mask_elem_val->data.x_bigint);
+        uint32_t v;
+        IrInstruction *chosen_operand;
+        if (v_i32 >= 0) {
+            v = (uint32_t)v_i32;
+            chosen_operand = a;
+        } else {
+            v = (uint32_t)~v_i32;
+            chosen_operand = b;
+        }
+        if (v >= chosen_operand->value.type->data.vector.len) {
+            ErrorMsg *msg = ir_add_error(ira, mask,
+                buf_sprintf("mask index '%u' has out-of-bounds selection", i));
+            add_error_note(ira->codegen, msg, chosen_operand->source_node,
+                buf_sprintf("selected index '%u' out of bounds of %s", v,
+                    buf_ptr(&chosen_operand->value.type->name)));
+            if (chosen_operand == a && v < len_a + len_b) {
+                add_error_note(ira->codegen, msg, b->source_node,
+                    buf_create_from_str("selections from the second vector are specified with negative numbers"));
+            }
             return ira->codegen->invalid_instruction;
         }
-    } else {
-        len_b = b->value.type->data.vector.len;
     }
 
-    if (a->value.type->id == ZigTypeIdUndefined && b->value.type->id == ZigTypeIdUndefined) {
-        return ir_const_undef(ira, a, get_vector_type(ira->codegen, len_c, scalar_type));
-    }
+    ZigType *result_type = get_vector_type(ira->codegen, len_mask, scalar_type);
+    if (instr_is_comptime(a) && instr_is_comptime(b)) {
+        ConstExprValue *a_val = ir_resolve_const(ira, a, UndefOk);
+        if (a_val == nullptr)
+            return ira->codegen->invalid_instruction;
 
-    // undefined is a vector up to length of the other vector.
-    if (a->value.type->id == ZigTypeIdUndefined) {
-        a = ir_const_undef(ira, a, b->value.type);
-        len_a = b->value.type->data.vector.len;
-    } else if (b->value.type->id == ZigTypeIdUndefined) {
-        b = ir_const_undef(ira, b, a->value.type);
-        len_b = a->value.type->data.vector.len;
-    }
+        ConstExprValue *b_val = ir_resolve_const(ira, b, UndefOk);
+        if (b_val == nullptr)
+            return ira->codegen->invalid_instruction;
 
-    // FIXME I think this needs to be more sophisticated
-    if (a->value.type->data.vector.elem_type != scalar_type) {
-        ir_add_error(ira, a,
-            buf_sprintf("element type '%s' does not match '%s'",
-                buf_ptr(&a->value.type->data.vector.elem_type->name),
-                buf_ptr(&scalar_type->name)));
-        return ira->codegen->invalid_instruction;
-    }
-    if (b->value.type->data.vector.elem_type != scalar_type) {
-        ir_add_error(ira, b,
-            buf_sprintf("element type '%s' does not match '%s'",
-                buf_ptr(&b->value.type->data.vector.elem_type->name),
-                buf_ptr(&scalar_type->name)));
-        return ira->codegen->invalid_instruction;
+        expand_undef_array(ira->codegen, a_val);
+        expand_undef_array(ira->codegen, b_val);
+
+        IrInstruction *result = ir_const(ira, source_instr, result_type);
+        result->value.data.x_array.data.s_none.elements = create_const_vals(len_mask);
+        for (uint32_t i = 0; i < mask_val->type->data.vector.len; i += 1) {
+            ConstExprValue *mask_elem_val = &mask_val->data.x_array.data.s_none.elements[i];
+            ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+            if (mask_elem_val->special == ConstValSpecialUndef) {
+                result_elem_val->special = ConstValSpecialUndef;
+                continue;
+            }
+            int32_t v = bigint_as_signed(&mask_elem_val->data.x_bigint);
+            // We've already checked for and emitted compile errors for index out of bounds here.
+            ConstExprValue *src_elem_val = (v >= 0) ?
+                &a->value.data.x_array.data.s_none.elements[v] :
+                &b->value.data.x_array.data.s_none.elements[~v];
+            copy_const_val(result_elem_val, src_elem_val, false);
+
+            ir_assert(result_elem_val->special == ConstValSpecialStatic, source_instr);
+        }
+        result->value.special = ConstValSpecialStatic;
+        return result;
     }
 
-    if (a->value.type != b->value.type) {
-        assert(len_a != len_b);
-        uint32_t len_max = max(len_a, len_b), len_min = min(len_a, len_b);
-        bool expand_b = len_b < len_a;
+    // All static analysis passed, and not comptime.
+    // For runtime codegen, vectors a and b must be the same length. Here we
+    // recursively @shuffle the smaller vector to append undefined elements
+    // to it up to the length of the longer vector. This recursion terminates
+    // in 1 call because these calls to ir_analyze_shuffle_vector guarantee
+    // len_a == len_b.
+    if (len_a != len_b) {
+        uint32_t len_min = min(len_a, len_b);
+        uint32_t len_max = max(len_a, len_b);
+
         IrInstruction *expand_mask = ir_const(ira, mask,
             get_vector_type(ira->codegen, len_max, ira->codegen->builtin_types.entry_i32));
         expand_mask->value.data.x_array.data.s_none.elements = create_const_vals(len_max);
         uint32_t i = 0;
-        for (; i < len_min; i++)
+        for (; i < len_min; i += 1)
             bigint_init_unsigned(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, i);
-        for (; i < len_max; i++)
+        for (; i < len_max; i += 1)
             bigint_init_signed(&expand_mask->value.data.x_array.data.s_none.elements[i].data.x_bigint, -1);
+
         IrInstruction *undef = ir_const_undef(ira, source_instr,
             get_vector_type(ira->codegen, len_min, scalar_type));
-        if (expand_b) {
-            if (instr_is_comptime(b)) {
-                ConstExprValue *old = b->value.data.x_array.data.s_none.elements;
-                b->value.data.x_array.data.s_none.elements =
-                    allocate<ConstExprValue>(len_a);
-                memcpy(b->value.data.x_array.data.s_none.elements, old,
-                    b->value.type->data.vector.len * sizeof(ConstExprValue));
-            } else {
-                b = ir_build_shuffle_vector(&ira->new_irb,
-                    source_instr->scope, source_instr->source_node,
-                    nullptr, b, undef, expand_mask);
-                b->value.special = ConstValSpecialRuntime;
-            }
-            b->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
-        } else {
-            if (instr_is_comptime(a)) {
-                ConstExprValue *old = a->value.data.x_array.data.s_none.elements;
-                a->value.data.x_array.data.s_none.elements =
-                    allocate<ConstExprValue>(len_b);
-                memcpy(a->value.data.x_array.data.s_none.elements, old,
-                    a->value.type->data.vector.len * sizeof(ConstExprValue));
-            } else {
-                a = ir_build_shuffle_vector(&ira->new_irb,
-                    source_instr->scope, source_instr->source_node,
-                    nullptr, a, undef, expand_mask);
-                a->value.special = ConstValSpecialRuntime;
-            }
-            a->value.type = get_vector_type(ira->codegen, len_max, scalar_type);
-        }
-    }
-    ConstExprValue *mask_val = ir_resolve_const(ira, mask, UndefOk);
-    if (!mask_val) {
-        ir_add_error(ira, mask,
-            buf_sprintf("mask must be comptime"));
-        return ira->codegen->invalid_instruction;
-    }
-    for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
-        if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
-            continue;
-        int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-        if (v >= 0 && (uint64_t)v + 1 > len_a) {
-            ErrorMsg *msg = ir_add_error(ira, mask,
-                buf_sprintf("mask index out of bounds"));
-            add_error_note(ira->codegen, msg, mask->source_node,
-                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
-            if ((uint64_t)v <= len_a + len_b)
-                add_error_note(ira->codegen, msg, mask->source_node,
-                    buf_sprintf("selections from the second vector are specified with negative numbers"));
-        } else if (v < 0 && (uint64_t)~v + 1 > len_b) {
-            ErrorMsg *msg = ir_add_error(ira, mask,
-                buf_sprintf("mask index out of bounds"));
-            add_error_note(ira->codegen, msg, mask->source_node,
-                buf_sprintf("when computing vector element at index %" ZIG_PRI_usize, (uintptr_t)i));
-        }
-        else
-            continue;
-        return ira->codegen->invalid_instruction;
-    }
 
-    ZigType *result_type = get_vector_type(ira->codegen, len_c, scalar_type);
-    if (instr_is_comptime(a) &&
-        instr_is_comptime(b)) {
-        IrInstruction *result = ir_const(ira, source_instr, result_type);
-        result->value.data.x_array.data.s_none.elements = create_const_vals(len_c);
-        for (uint32_t i = 0;i < mask->value.type->data.vector.len;i++) {
-            if (mask->value.data.x_array.data.s_none.elements[i].special == ConstValSpecialUndef)
-                result->value.data.x_array.data.s_none.elements[i].special =
-                    ConstValSpecialUndef;
-            int64_t v = bigint_as_signed(&mask->value.data.x_array.data.s_none.elements[i].data.x_bigint);
-            if (v >= 0)
-                result->value.data.x_array.data.s_none.elements[i] =
-                    a->value.data.x_array.data.s_none.elements[v];
-            else if (v < 0)
-                result->value.data.x_array.data.s_none.elements[i] =
-                    b->value.data.x_array.data.s_none.elements[~v];
-            else
-                zig_unreachable();
-            result->value.data.x_array.data.s_none.elements[i].special =
-                ConstValSpecialStatic;
+        if (len_b < len_a) {
+            b = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, b, undef, expand_mask);
+        } else {
+            a = ir_analyze_shuffle_vector(ira, source_instr, scalar_type, a, undef, expand_mask);
         }
-        result->value.special = ConstValSpecialStatic;
-        return result;
     }
 
-    // All static analysis passed, and not comptime
     IrInstruction *result = ir_build_shuffle_vector(&ira->new_irb,
         source_instr->scope, source_instr->source_node,
         nullptr, a, b, mask);
     result->value.type = result_type;
-    result->value.special = ConstValSpecialRuntime;
     return result;
 }
 
 static IrInstruction *ir_analyze_instruction_shuffle_vector(IrAnalyze *ira, IrInstructionShuffleVector *instruction) {
-    ZigType *scalar_type = ir_resolve_type(ira, instruction->scalar_type);
-    assert(scalar_type);
+    ZigType *scalar_type = ir_resolve_vector_elem_type(ira, instruction->scalar_type);
     if (type_is_invalid(scalar_type))
         return ira->codegen->invalid_instruction;
 
-    if (scalar_type->id != ZigTypeIdBool &&
-        scalar_type->id != ZigTypeIdInt &&
-        scalar_type->id != ZigTypeIdFloat &&
-        scalar_type->id != ZigTypeIdPointer) {
-        ir_add_error(ira, instruction->scalar_type,
-            buf_sprintf("vector element type must be integer, float, bool, or pointer; '%s' is invalid",
-                buf_ptr(&scalar_type->name)));
+    IrInstruction *a = instruction->a->child;
+    if (type_is_invalid(a->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *b = instruction->b->child;
+    if (type_is_invalid(b->value.type))
+        return ira->codegen->invalid_instruction;
+
+    IrInstruction *mask = instruction->mask->child;
+    if (type_is_invalid(mask->value.type))
         return ira->codegen->invalid_instruction;
-    }
 
-    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, instruction->a->child, instruction->b->child, instruction->mask->child);
+    return ir_analyze_shuffle_vector(ira, &instruction->base, scalar_type, a, b, mask);
 }
 
 static IrInstruction *ir_analyze_instruction_bool_not(IrAnalyze *ira, IrInstructionBoolNot *instruction) {
diff --git a/test/compile_errors.zig b/test/compile_errors.zig
index d9b4ee6a95..1fe3fc58ab 100644
--- a/test/compile_errors.zig
+++ b/test/compile_errors.zig
@@ -6485,16 +6485,16 @@ pub fn addCases(cases: *tests.CompileErrorContext) void {
     );
 
     cases.addTest(
-        "using LLVM syntax for @shuffle",
+        "@shuffle with selected index past first vector length",
         \\export fn entry() void {
-        \\    const v: @Vector(4, u32) = [4]u32{0, 1, 2, 3};
-        \\    const x: @Vector(4, u32) = [4]u32{4, 5, 6, 7};
-        \\    var z = @shuffle(u32, v, x, [8]i32{0, 1, 2, 3, 4, 5, 6, 7});
+        \\    const v: @Vector(4, u32) = [4]u32{ 10, 11, 12, 13 };
+        \\    const x: @Vector(4, u32) = [4]u32{ 14, 15, 16, 17 };
+        \\    var z = @shuffle(u32, v, x, [8]i32{ 0, 1, 2, 3, 7, 6, 5, 4 });
         \\}
     ,
-        "tmp.zig:4:39: error: mask index out of bounds",
-        "tmp.zig:4:39: note: when computing vector element at index 4",
-        "tmp.zig:4:39: note: selections from the second vector are specified with negative numbers",
+        "tmp.zig:4:39: error: mask index '4' has out-of-bounds selection",
+        "tmp.zig:4:27: note: selected index '7' out of bounds of @Vector(4, u32)",
+        "tmp.zig:4:30: note: selections from the second vector are specified with negative numbers",
     );
 
     cases.addTest(
diff --git a/test/stage1/behavior.zig b/test/stage1/behavior.zig
index db6cdad3b1..e56fc7ba7f 100644
--- a/test/stage1/behavior.zig
+++ b/test/stage1/behavior.zig
@@ -80,6 +80,7 @@ comptime {
     _ = @import("behavior/pub_enum.zig");
     _ = @import("behavior/ref_var_in_if_after_if_2nd_switch_prong.zig");
     _ = @import("behavior/reflection.zig");
+    _ = @import("behavior/shuffle.zig");
     _ = @import("behavior/sizeof_and_typeof.zig");
     _ = @import("behavior/slice.zig");
     _ = @import("behavior/slicetobytes.zig");
diff --git a/test/stage1/behavior/shuffle.zig b/test/stage1/behavior/shuffle.zig
index 70bff5991e..2029ec582f 100644
--- a/test/stage1/behavior/shuffle.zig
+++ b/test/stage1/behavior/shuffle.zig
@@ -7,46 +7,46 @@ test "@shuffle" {
         fn doTheTest() void {
             var v: @Vector(4, i32) = [4]i32{ 2147483647, -2, 30, 40 };
             var x: @Vector(4, i32) = [4]i32{ 1, 2147483647, 3, 4 };
-            const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3)};
+            const mask: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 3, ~i32(3) };
             var res = @shuffle(i32, v, x, mask);
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
 
             // Implicit cast from array (of mask)
-            res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3)});
+            res = @shuffle(i32, v, x, [4]i32{ 0, ~i32(2), 3, ~i32(3) });
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 40, 4 }));
 
             // Undefined
-            const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0};
+            const mask2: @Vector(4, i32) = [4]i32{ 3, 1, 2, 0 };
             res = @shuffle(i32, v, undefined, mask2);
-            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647}));
+            expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 40, -2, 30, 2147483647 }));
 
             // Upcasting of b
-            var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined};
-            const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3};
+            var v2: @Vector(2, i32) = [2]i32{ 2147483647, undefined };
+            const mask3: @Vector(4, i32) = [4]i32{ ~i32(0), 2, ~i32(0), 3 };
             res = @shuffle(i32, x, v2, mask3);
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, 2147483647, 4 }));
 
             // Upcasting of a
-            var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2};
-            const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3)};
+            var v3: @Vector(2, i32) = [2]i32{ 2147483647, -2 };
+            const mask4: @Vector(4, i32) = [4]i32{ 0, ~i32(2), 1, ~i32(3) };
             res = @shuffle(i32, v3, x, mask4);
             expect(mem.eql(i32, ([4]i32)(res), [4]i32{ 2147483647, 3, -2, 4 }));
 
             // bool
             {
-                var x2: @Vector(4, bool) = [4]bool{ false, true, false, true};
-                var v4: @Vector(2, bool) = [2]bool{ true, false};
-                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+                var x2: @Vector(4, bool) = [4]bool{ false, true, false, true };
+                var v4: @Vector(2, bool) = [2]bool{ true, false };
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
                 var res2 = @shuffle(bool, x2, v4, mask5);
                 expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
             }
 
-            // FIXME re-enable when LLVM codegen is fixed
-            // https://bugs.llvm.org/show_bug.cgi?id=42803
+            // TODO re-enable when LLVM codegen is fixed
+            // https://github.com/ziglang/zig/issues/3246
             if (false) {
-                var x2: @Vector(3, bool) = [3]bool{ false, true, false};
-                var v4: @Vector(2, bool) = [2]bool{ true, false};
-                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2};
+                var x2: @Vector(3, bool) = [3]bool{ false, true, false };
+                var v4: @Vector(2, bool) = [2]bool{ true, false };
+                const mask5: @Vector(4, i32) = [4]i32{ 0, ~i32(1), 1, 2 };
                 var res2 = @shuffle(bool, x2, v4, mask5);
                 expect(mem.eql(bool, ([4]bool)(res2), [4]bool{ false, false, true, false }));
             }
-- 
cgit v1.2.3


From 380c8ec2c95fa8d732c141c705d9940629eb2012 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 19 Sep 2019 00:59:04 -0400
Subject: implement runtime `@byteSwap` and other fixups

 * update docs for `@byteSwap`.
 * fix hash & eql functions for ZigLLVMFnIdBswap not updated to
   include vector len. this was causing incorrect bswap function
   being called in unrelated code
 * fix `@byteSwap` behavior tests only testing comptime and not
   runtime operations
 * implement runtime `@byteSwap`
 * fix incorrect logic in ir_render_vector_to_array and
   ir_render_array_to_vector with regards to whether or not to bitcast
 * `@byteSwap` accepts an array operand which it will cast to vector
 * simplify `@byteSwap` semantic analysis code and various fixes
---
 doc/langref.html.in               | 11 ++++-
 src/analyze.cpp                   |  6 ++-
 src/codegen.cpp                   | 23 ++++++----
 src/ir.cpp                        | 91 ++++++++++++++++++++-------------------
 test/stage1/behavior/byteswap.zig | 89 +++++++++++++++++++++++---------------
 5 files changed, 129 insertions(+), 91 deletions(-)

(limited to 'doc')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 8a303640e6..61fc06fd02 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6542,12 +6542,21 @@ async fn func(y: *i32) void {
       {#header_close#}
 
       {#header_open|@byteSwap#}
-      <pre>{#syntax#}@byteSwap(comptime T: type, integer: T) T{#endsyntax#}</pre>
+      <pre>{#syntax#}@byteSwap(comptime T: type, operand: T) T{#endsyntax#}</pre>
       <p>{#syntax#}T{#endsyntax#} must be an integer type with bit count evenly divisible by 8.</p>
+      <p>{#syntax#}operand{#endsyntax#} may be an {#link|integer|Integers#} or {#link|vector|Vectors#}.</p>
       <p>
       Swaps the byte order of the integer. This converts a big endian integer to a little endian integer,
       and converts a little endian integer to a big endian integer.
       </p>
+      <p>
+      Note that for the purposes of memory layout with respect to endianness, the integer type should be
+      related to the number of bytes reported by {#link|@sizeOf#} bytes. This is demonstrated with
+      {#syntax#}u24{#endsyntax#}. {#syntax#}@sizeOf(u24) == 4{#endsyntax#}, which means that a
+      {#syntax#}u24{#endsyntax#} stored in memory takes 4 bytes, and those 4 bytes are what are swapped on
+      a little vs big endian system. On the other hand, if {#syntax#}T{#endsyntax#} is specified to
+      be {#syntax#}u24{#endsyntax#}, then only 3 bytes are reversed.
+      </p>
       {#header_close#}
 
       {#header_open|@bitReverse#}
diff --git a/src/analyze.cpp b/src/analyze.cpp
index ac70d5646f..66b72b935d 100644
--- a/src/analyze.cpp
+++ b/src/analyze.cpp
@@ -6896,7 +6896,8 @@ uint32_t zig_llvm_fn_key_hash(ZigLLVMFnKey x) {
             return (uint32_t)(x.data.floating.bit_count) * ((uint32_t)x.id + 1025) +
                    (uint32_t)(x.data.floating.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBswap:
-            return (uint32_t)(x.data.bswap.bit_count) * (uint32_t)3661994335;
+            return (uint32_t)(x.data.bswap.bit_count) * ((uint32_t)3661994335) +
+                   (uint32_t)(x.data.bswap.vector_len) * (((uint32_t)x.id << 5) + 1025);
         case ZigLLVMFnIdBitReverse:
             return (uint32_t)(x.data.bit_reverse.bit_count) * (uint32_t)2621398431;
         case ZigLLVMFnIdOverflowArithmetic:
@@ -6919,7 +6920,8 @@ bool zig_llvm_fn_key_eql(ZigLLVMFnKey a, ZigLLVMFnKey b) {
         case ZigLLVMFnIdPopCount:
             return a.data.pop_count.bit_count == b.data.pop_count.bit_count;
         case ZigLLVMFnIdBswap:
-            return a.data.bswap.bit_count == b.data.bswap.bit_count;
+            return a.data.bswap.bit_count == b.data.bswap.bit_count &&
+                   a.data.bswap.vector_len == b.data.bswap.vector_len;
         case ZigLLVMFnIdBitReverse:
             return a.data.bit_reverse.bit_count == b.data.bit_reverse.bit_count;
         case ZigLLVMFnIdFloatOp:
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 6a575d32a2..54c02b288a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -4509,9 +4509,7 @@ static LLVMValueRef get_int_builtin_fn(CodeGen *g, ZigType *expr_type, BuiltinFn
     bool is_vector = expr_type->id == ZigTypeIdVector;
     ZigType *int_type = is_vector ? expr_type->data.vector.elem_type : expr_type;
     assert(int_type->id == ZigTypeIdInt);
-    uint32_t vector_len = 0;
-    if (is_vector)
-        vector_len = expr_type->data.vector.len;
+    uint32_t vector_len = is_vector ? expr_type->data.vector.len : 0;
     ZigLLVMFnKey key = {};
     const char *fn_name;
     uint32_t n_args;
@@ -5563,16 +5561,23 @@ static LLVMValueRef ir_render_bswap(CodeGen *g, IrExecutable *executable, IrInst
     // Not an even number of bytes, so we zext 1 byte, then bswap, shift right 1 byte, truncate
     ZigType *extended_type = get_int_type(g, int_type->data.integral.is_signed,
             int_type->data.integral.bit_count + 8);
-    if (is_vector)
+    LLVMValueRef shift_amt = LLVMConstInt(get_llvm_type(g, extended_type), 8, false);
+    if (is_vector) {
         extended_type = get_vector_type(g, expr_type->data.vector.len, extended_type);
+        LLVMValueRef *values = allocate_nonzero<LLVMValueRef>(expr_type->data.vector.len);
+        for (uint32_t i = 0; i < expr_type->data.vector.len; i += 1) {
+            values[i] = shift_amt;
+        }
+        shift_amt = LLVMConstVector(values, expr_type->data.vector.len);
+        free(values);
+    }
     // aabbcc
     LLVMValueRef extended = LLVMBuildZExt(g->builder, op, get_llvm_type(g, extended_type), "");
     // 00aabbcc
     LLVMValueRef fn_val = get_int_builtin_fn(g, extended_type, BuiltinFnIdBswap);
     LLVMValueRef swapped = LLVMBuildCall(g->builder, fn_val, &extended, 1, "");
     // ccbbaa00
-    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped,
-            LLVMConstInt(get_llvm_type(g, extended_type), 8, false), "");
+    LLVMValueRef shifted = ZigLLVMBuildLShrExact(g->builder, swapped, shift_amt, "");
     // 00ccbbaa
     return LLVMBuildTrunc(g->builder, shifted, get_llvm_type(g, expr_type), "");
 }
@@ -5595,7 +5600,7 @@ static LLVMValueRef ir_render_vector_to_array(CodeGen *g, IrExecutable *executab
     LLVMValueRef vector = ir_llvm_value(g, instruction->vector);
 
     ZigType *elem_type = array_type->data.array.child_type;
-    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
     if (bitcast_ok) {
         LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, result_loc,
                 LLVMPointerType(get_llvm_type(g, instruction->vector->value.type), 0), "");
@@ -5629,7 +5634,7 @@ static LLVMValueRef ir_render_array_to_vector(CodeGen *g, IrExecutable *executab
     LLVMTypeRef vector_type_ref = get_llvm_type(g, vector_type);
 
     ZigType *elem_type = vector_type->data.vector.elem_type;
-    bool bitcast_ok = (elem_type->size_in_bits * 8) == elem_type->abi_size;
+    bool bitcast_ok = elem_type->size_in_bits == elem_type->abi_size * 8;
     if (bitcast_ok) {
         LLVMValueRef casted_ptr = LLVMBuildBitCast(g->builder, array_ptr,
                 LLVMPointerType(vector_type_ref, 0), "");
@@ -8902,7 +8907,7 @@ void add_cc_args(CodeGen *g, ZigList<const char *> &args, const char *out_dep_pa
         args.append(g->framework_dirs.at(i));
     }
 
-    //note(dimenus): appending libc headers before c_headers breaks intrinsics 
+    //note(dimenus): appending libc headers before c_headers breaks intrinsics
     //and other compiler specific items
     // According to Rich Felker libc headers are supposed to go before C language headers.
     args.append("-isystem");
diff --git a/src/ir.cpp b/src/ir.cpp
index e8ef45a116..1eba53ef45 100644
--- a/src/ir.cpp
+++ b/src/ir.cpp
@@ -11068,8 +11068,15 @@ static ZigType *ir_resolve_int_type(IrAnalyze *ira, IrInstruction *type_value) {
         return ira->codegen->builtin_types.entry_invalid;
 
     if (ty->id != ZigTypeIdInt) {
-        ir_add_error(ira, type_value,
+        ErrorMsg *msg = ir_add_error(ira, type_value,
             buf_sprintf("expected integer type, found '%s'", buf_ptr(&ty->name)));
+        if (ty->id == ZigTypeIdVector &&
+            ty->data.vector.elem_type->id == ZigTypeIdInt)
+        {
+            add_error_note(ira->codegen, msg, type_value->source_node,
+                buf_sprintf("represent vectors with their element types, i.e. '%s'",
+                    buf_ptr(&ty->data.vector.elem_type->name)));
+        }
         return ira->codegen->builtin_types.entry_invalid;
     }
 
@@ -25253,47 +25260,35 @@ static IrInstruction *ir_analyze_instruction_float_op(IrAnalyze *ira, IrInstruct
 }
 
 static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstructionBswap *instruction) {
-    IrInstruction *op = instruction->op->child;
-    ZigType *type_expr = ir_resolve_type(ira, instruction->type->child);
-    if (type_is_invalid(type_expr))
+    Error err;
+
+    ZigType *int_type = ir_resolve_int_type(ira, instruction->type->child);
+    if (type_is_invalid(int_type))
         return ira->codegen->invalid_instruction;
 
-    if (type_expr->id != ZigTypeIdInt) {
-        ir_add_error(ira, instruction->type,
-            buf_sprintf("expected integer type, found '%s'", buf_ptr(&type_expr->name)));
-        if (type_expr->id == ZigTypeIdVector &&
-            type_expr->data.vector.elem_type->id == ZigTypeIdInt)
-            ir_add_error(ira, instruction->type,
-                buf_sprintf("represent vectors with their scalar types, i.e. '%s'",
-                    buf_ptr(&type_expr->data.vector.elem_type->name)));
+    IrInstruction *uncasted_op = instruction->op->child;
+    if (type_is_invalid(uncasted_op->value.type))
         return ira->codegen->invalid_instruction;
+
+    uint32_t vector_len; // UINT32_MAX means not a vector
+    if (uncasted_op->value.type->id == ZigTypeIdArray &&
+        is_valid_vector_elem_type(uncasted_op->value.type->data.array.child_type))
+    {
+        vector_len = uncasted_op->value.type->data.array.len;
+    } else if (uncasted_op->value.type->id == ZigTypeIdVector) {
+        vector_len = uncasted_op->value.type->data.vector.len;
+    } else {
+        vector_len = UINT32_MAX;
     }
-    ZigType *int_type = type_expr;
 
-    ZigType *expr_type = op->value.type;
-    bool is_vector = expr_type->id == ZigTypeIdVector;
-    ZigType *ret_type = int_type;
-    if (is_vector)
-        ret_type = get_vector_type(ira->codegen, expr_type->data.vector.len, int_type);
+    bool is_vector = (vector_len != UINT32_MAX);
+    ZigType *op_type = is_vector ? get_vector_type(ira->codegen, vector_len, int_type) : int_type;
 
-    op = ir_implicit_cast(ira, instruction->op->child, ret_type);
+    IrInstruction *op = ir_implicit_cast(ira, uncasted_op, op_type);
     if (type_is_invalid(op->value.type))
         return ira->codegen->invalid_instruction;
 
-    if (int_type->data.integral.bit_count == 0) {
-        IrInstruction *result = ir_const(ira, &instruction->base, ret_type);
-        if (is_vector) {
-            expand_undef_array(ira->codegen, &result->value);
-            result->value.data.x_array.data.s_none.elements =
-                allocate<ConstExprValue>(expr_type->data.vector.len);
-            for (unsigned i = 0; i < expr_type->data.vector.len; i++)
-                bigint_init_unsigned(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint, 0);
-        }
-        bigint_init_unsigned(&result->value.data.x_bigint, 0);
-        return result;
-    }
-
-    if (int_type->data.integral.bit_count == 8)
+    if (int_type->data.integral.bit_count == 8 || int_type->data.integral.bit_count == 0)
         return op;
 
     if (int_type->data.integral.bit_count % 8 != 0) {
@@ -25308,21 +25303,28 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
         if (val == nullptr)
             return ira->codegen->invalid_instruction;
         if (val->special == ConstValSpecialUndef)
-            return ir_const_undef(ira, &instruction->base, ret_type);
+            return ir_const_undef(ira, &instruction->base, op_type);
 
-        IrInstruction *result = ir_const(ira, &instruction->base, ret_type);
+        IrInstruction *result = ir_const(ira, &instruction->base, op_type);
         size_t buf_size = int_type->data.integral.bit_count / 8;
         uint8_t *buf = allocate_nonzero<uint8_t>(buf_size);
         if (is_vector) {
-            expand_undef_array(ira->codegen, &result->value);
-            result->value.data.x_array.data.s_none.elements =
-                allocate<ConstExprValue>(expr_type->data.vector.len);
-            for (unsigned i = 0; i < expr_type->data.vector.len; i++) {
-                ConstExprValue *cur = &val->data.x_array.data.s_none.elements[i];
-                result->value.data.x_array.data.s_none.elements[i].special = cur->special;
-                if (cur->special == ConstValSpecialUndef)
+            expand_undef_array(ira->codegen, val);
+            result->value.data.x_array.data.s_none.elements = create_const_vals(op_type->data.vector.len);
+            for (unsigned i = 0; i < op_type->data.vector.len; i += 1) {
+                ConstExprValue *op_elem_val = &val->data.x_array.data.s_none.elements[i];
+                if ((err = ir_resolve_const_val(ira->codegen, ira->new_irb.exec, instruction->base.source_node,
+                    op_elem_val, UndefOk)))
+                {
+                    return ira->codegen->invalid_instruction;
+                }
+                ConstExprValue *result_elem_val = &result->value.data.x_array.data.s_none.elements[i];
+                result_elem_val->type = int_type;
+                result_elem_val->special = op_elem_val->special;
+                if (op_elem_val->special == ConstValSpecialUndef)
                     continue;
-                bigint_write_twos_complement(&cur->data.x_bigint, buf, int_type->data.integral.bit_count, true);
+
+                bigint_write_twos_complement(&op_elem_val->data.x_bigint, buf, int_type->data.integral.bit_count, true);
                 bigint_read_twos_complement(&result->value.data.x_array.data.s_none.elements[i].data.x_bigint,
                         buf, int_type->data.integral.bit_count, false,
                         int_type->data.integral.is_signed);
@@ -25332,12 +25334,13 @@ static IrInstruction *ir_analyze_instruction_bswap(IrAnalyze *ira, IrInstruction
             bigint_read_twos_complement(&result->value.data.x_bigint, buf, int_type->data.integral.bit_count, false,
                     int_type->data.integral.is_signed);
         }
+        free(buf);
         return result;
     }
 
     IrInstruction *result = ir_build_bswap(&ira->new_irb, instruction->base.scope,
             instruction->base.source_node, nullptr, op);
-    result->value.type = ret_type;
+    result->value.type = op_type;
     return result;
 }
 
diff --git a/test/stage1/behavior/byteswap.zig b/test/stage1/behavior/byteswap.zig
index 249db155b7..d8fc554808 100644
--- a/test/stage1/behavior/byteswap.zig
+++ b/test/stage1/behavior/byteswap.zig
@@ -1,43 +1,62 @@
 const std = @import("std");
 const expect = std.testing.expect;
 
-test "@byteSwap" {
-    comptime testByteSwap();
-    testByteSwap();
-}
+test "@byteSwap integers" {
+    const ByteSwapIntTest = struct {
+        fn run() void {
+            t(u0, 0, 0);
+            t(u8, 0x12, 0x12);
+            t(u16, 0x1234, 0x3412);
+            t(u24, 0x123456, 0x563412);
+            t(u32, 0x12345678, 0x78563412);
+            t(u40, 0x123456789a, 0x9a78563412);
+            t(i48, 0x123456789abc, @bitCast(i48, u48(0xbc9a78563412)));
+            t(u56, 0x123456789abcde, 0xdebc9a78563412);
+            t(u64, 0x123456789abcdef1, 0xf1debc9a78563412);
+            t(u128, 0x123456789abcdef11121314151617181, 0x8171615141312111f1debc9a78563412);
 
-test "@byteSwap on vectors" {
-    comptime testVectorByteSwap();
-    testVectorByteSwap();
+            t(u0, u0(0), 0);
+            t(i8, i8(-50), -50);
+            t(i16, @bitCast(i16, u16(0x1234)), @bitCast(i16, u16(0x3412)));
+            t(i24, @bitCast(i24, u24(0x123456)), @bitCast(i24, u24(0x563412)));
+            t(i32, @bitCast(i32, u32(0x12345678)), @bitCast(i32, u32(0x78563412)));
+            t(u40, @bitCast(i40, u40(0x123456789a)), u40(0x9a78563412));
+            t(i48, @bitCast(i48, u48(0x123456789abc)), @bitCast(i48, u48(0xbc9a78563412)));
+            t(i56, @bitCast(i56, u56(0x123456789abcde)), @bitCast(i56, u56(0xdebc9a78563412)));
+            t(i64, @bitCast(i64, u64(0x123456789abcdef1)), @bitCast(i64, u64(0xf1debc9a78563412)));
+            t(
+                i128,
+                @bitCast(i128, u128(0x123456789abcdef11121314151617181)),
+                @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)),
+            );
+        }
+        fn t(comptime I: type, input: I, expected_output: I) void {
+            std.testing.expectEqual(expected_output, @byteSwap(I, input));
+        }
+    };
+    comptime ByteSwapIntTest.run();
+    ByteSwapIntTest.run();
 }
 
-fn testByteSwap() void {
-    expect(@byteSwap(u0, 0) == 0);
-    expect(@byteSwap(u8, 0x12) == 0x12);
-    expect(@byteSwap(u16, 0x1234) == 0x3412);
-    expect(@byteSwap(u24, 0x123456) == 0x563412);
-    expect(@byteSwap(u32, 0x12345678) == 0x78563412);
-    expect(@byteSwap(u40, 0x123456789a) == 0x9a78563412);
-    expect(@byteSwap(i48, 0x123456789abc) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(u56, 0x123456789abcde) == 0xdebc9a78563412);
-    expect(@byteSwap(u64, 0x123456789abcdef1) == 0xf1debc9a78563412);
-    expect(@byteSwap(u128, 0x123456789abcdef11121314151617181) == 0x8171615141312111f1debc9a78563412);
-
-    expect(@byteSwap(u0, u0(0)) == 0);
-    expect(@byteSwap(i8, i8(-50)) == -50);
-    expect(@byteSwap(i16, @bitCast(i16, u16(0x1234))) == @bitCast(i16, u16(0x3412)));
-    expect(@byteSwap(i24, @bitCast(i24, u24(0x123456))) == @bitCast(i24, u24(0x563412)));
-    expect(@byteSwap(i32, @bitCast(i32, u32(0x12345678))) == @bitCast(i32, u32(0x78563412)));
-    expect(@byteSwap(u40, @bitCast(i40, u40(0x123456789a))) == u40(0x9a78563412));
-    expect(@byteSwap(i48, @bitCast(i48, u48(0x123456789abc))) == @bitCast(i48, u48(0xbc9a78563412)));
-    expect(@byteSwap(i56, @bitCast(i56, u56(0x123456789abcde))) == @bitCast(i56, u56(0xdebc9a78563412)));
-    expect(@byteSwap(i64, @bitCast(i64, u64(0x123456789abcdef1))) == @bitCast(i64, u64(0xf1debc9a78563412)));
-    expect(@byteSwap(i128, @bitCast(i128, u128(0x123456789abcdef11121314151617181))) ==
-        @bitCast(i128, u128(0x8171615141312111f1debc9a78563412)));
-}
+test "@byteSwap vectors" {
+    const ByteSwapVectorTest = struct {
+        fn run() void {
+            t(u8, 2, [_]u8{ 0x12, 0x13 }, [_]u8{ 0x12, 0x13 });
+            t(u16, 2, [_]u16{ 0x1234, 0x2345 }, [_]u16{ 0x3412, 0x4523 });
+            t(u24, 2, [_]u24{ 0x123456, 0x234567 }, [_]u24{ 0x563412, 0x674523 });
+        }
 
-fn testVectorByteSwap() void {
-    expect((@byteSwap(u8, @Vector(2, u8)([2]u8{0x12, 0x13})) == @Vector(2, u8)([2]u8{0x12, 0x13})).all);
-    expect((@byteSwap(u16, @Vector(2, u16)([2]u16{0x1234, 0x2345})) == @Vector(2, u16)([2]u16{0x3412, 0x4523})).all);
-    expect((@byteSwap(u24, @Vector(2, u24)([2]u24{0x123456, 0x234567})) == @Vector(2, u24)([2]u24{0x563412, 0x674523})).all);
+        fn t(
+            comptime I: type,
+            comptime n: comptime_int,
+            input: @Vector(n, I),
+            expected_vector: @Vector(n, I),
+        ) void {
+            const actual_output: [n]I = @byteSwap(I, input);
+            const expected_output: [n]I = expected_vector;
+            std.testing.expectEqual(expected_output, actual_output);
+        }
+    };
+    comptime ByteSwapVectorTest.run();
+    ByteSwapVectorTest.run();
 }
-- 
cgit v1.2.3


From 28c7fe60b6de6e3c32e082a0abfb5a7bac8fc45a Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 19 Sep 2019 11:14:42 -0400
Subject: add docs for `@splat`

---
 doc/langref.html.in | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

(limited to 'doc')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 61fc06fd02..1158135dab 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -5864,7 +5864,7 @@ volatile (
     : [number] "{rax}" (number),
         [arg1] "{rdi}" (arg1)
 // Next is the list of clobbers. These declare a set of registers whose
-// values will not be preserved by the execution of this assembly code. 
+// values will not be preserved by the execution of this assembly code.
 // These do not include output or input registers. The special clobber
 // value of "memory" means that the assembly writes to arbitrary undeclared
 // memory locations - not only the memory pointed to by a declared indirect
@@ -5885,7 +5885,7 @@ volatile (
       </p>
       {#header_open|Output Constraints#}
       <p>
-      Output constraints are still considered to be unstable in Zig, and 
+      Output constraints are still considered to be unstable in Zig, and
       so
       <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
       and
@@ -5900,7 +5900,7 @@ volatile (
 
       {#header_open|Input Constraints#}
       <p>
-      Input constraints are still considered to be unstable in Zig, and 
+      Input constraints are still considered to be unstable in Zig, and
       so
       <a href="http://releases.llvm.org/8.0.0/docs/LangRef.html#inline-asm-constraint-string">LLVM documentation</a>
       and
@@ -5919,7 +5919,7 @@ volatile (
       the assembly code. These do not include output or input registers. The special clobber
       value of {#syntax#}"memory"{#endsyntax#} means that the assembly causes writes to
       arbitrary undeclared memory locations - not only the memory pointed to by a declared
-      indirect output. 
+      indirect output.
       </p>
       <p>
       Failure to declare the full set of clobbers for a given inline assembly
@@ -7746,6 +7746,30 @@ test "@setRuntimeSafety" {
       </p>
       {#header_close#}
 
+      {#header_open|@splat#}
+      <pre>{#syntax#}@splat(comptime len: u32, scalar: var) @Vector(len, @typeOf(scalar)){#endsyntax#}</pre>
+      <p>
+      Produces a vector of length {#syntax#}len{#endsyntax#} where each element is the value
+      {#syntax#}scalar{#endsyntax#}:
+      </p>
+      {#code_begin|test#}
+const std = @import("std");
+const assert = std.debug.assert;
+
+test "vector @splat" {
+    const scalar: u32 = 5;
+    const result = @splat(4, scalar);
+    comptime assert(@typeOf(result) == @Vector(4, u32));
+    assert(std.mem.eql(u32, ([4]u32)(result), [_]u32{ 5, 5, 5, 5 }));
+}
+      {#code_end#}
+      <p>
+      {#syntax#}scalar{#endsyntax#} must be an {#link|integer|Integers#}, {#link|bool|Primitive Types#},
+      {#link|float|Floats#}, or {#link|pointer|Pointers#}.
+      </p>
+      {#see_also|Vectors|@shuffle#}
+      {#header_close#}
+
       {#header_open|@sqrt#}
       <pre>{#syntax#}@sqrt(comptime T: type, value: T) T{#endsyntax#}</pre>
       <p>
@@ -9456,8 +9480,8 @@ const c = @cImport({
         <li>Does not support Zig-only pointer attributes such as alignment. Use normal {#link|Pointers#}
         please!</li>
       </ul>
-      <p>When a C pointer is pointing to a single struct (not an array), deference the C pointer to 
-        access to the struct's fields or member data. That syntax looks like 
+      <p>When a C pointer is pointing to a single struct (not an array), deference the C pointer to
+        access to the struct's fields or member data. That syntax looks like
         this: </p>
         <p>{#syntax#}ptr_to_struct.*.struct_member{#endsyntax#}</p>
         <p>This is comparable to doing {#syntax#}->{#endsyntax#} in C.</p>
-- 
cgit v1.2.3


From ff9f3275dede031cdbea67272f648bb91c79c574 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@git.icu>
Date: Wed, 18 Sep 2019 18:34:40 -0500
Subject: docs: clarify @clz and @ctz terminology to not be endian-specific.

This was brought up in IRC a few days ago.
---
 doc/langref.html.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'doc')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 1158135dab..d9750a6635 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -6650,7 +6650,7 @@ async fn func(y: *i32) void {
       {#header_open|@clz#}
       <pre>{#syntax#}@clz(comptime T: type, integer: T){#endsyntax#}</pre>
       <p>
-      This function counts the number of leading zeroes in {#syntax#}integer{#endsyntax#}.
+      This function counts the number of most-significant (leading in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
       </p>
       <p>
       If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
@@ -6792,7 +6792,7 @@ test "main" {
       {#header_open|@ctz#}
       <pre>{#syntax#}@ctz(comptime T: type, integer: T){#endsyntax#}</pre>
       <p>
-      This function counts the number of trailing zeroes in {#syntax#}integer{#endsyntax#}.
+      This function counts the number of least-significant (trailing in a big-Endian sense) zeroes in {#syntax#}integer{#endsyntax#}.
       </p>
       <p>
       If {#syntax#}integer{#endsyntax#} is known at {#link|comptime#},
-- 
cgit v1.2.3