Merge pull request #15481 from ziglang/use-mem-intrinsics

actually use the new memory intrinsics
author: Andrew Kelley <andrew@ziglang.org> 2023-04-29 00:19:55 -0700
committer: GitHub <noreply@github.com> 2023-04-29 00:19:55 -0700
commit: d65b42e07caa00dfe2f2fbf221c593ce57882784 (patch)
tree: 7926cbea1499e0affe930bf6d7455dc24adf014e /lib/std/compress
parent: fd6200eda6d4fe19c34a59430a88a9ce38d6d7a4 (diff)
parent: fa200ca0cad2705bad40eb723dedf4e3bf11f2ff (diff)
download: zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.tar.gz
zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.zip
13 files changed, 60 insertions, 62 deletions
diff --git a/lib/std/compress/deflate.zig b/lib/std/compress/deflate.zig
index 7b079a6039..2fe5969067 100644
--- a/lib/std/compress/deflate.zig
+++ b/lib/std/compress/deflate.zig
@@ -12,6 +12,20 @@ pub const Decompressor = inflate.Decompressor;
 pub const compressor = deflate.compressor;
 pub const decompressor = inflate.decompressor;
 
+/// Copies elements from a source `src` slice into a destination `dst` slice.
+/// The copy never returns an error but might not be complete if the destination is too small.
+/// Returns the number of elements copied, which will be the minimum of `src.len` and `dst.len`.
+/// TODO: remove this smelly function
+pub fn copy(dst: []u8, src: []const u8) usize {
+    if (dst.len <= src.len) {
+        @memcpy(dst, src[0..dst.len]);
+        return dst.len;
+    } else {
+        @memcpy(dst[0..src.len], src);
+        return src.len;
+    }
+}
+
 test {
     _ = @import("deflate/token.zig");
     _ = @import("deflate/bits_utils.zig");
diff --git a/lib/std/compress/deflate/compressor.zig b/lib/std/compress/deflate/compressor.zig
index 6c21875941..e2cbafe520 100644
--- a/lib/std/compress/deflate/compressor.zig
+++ b/lib/std/compress/deflate/compressor.zig
@@ -10,7 +10,6 @@ const Allocator = std.mem.Allocator;
 const deflate_const = @import("deflate_const.zig");
 const fast = @import("deflate_fast.zig");
 const hm_bw = @import("huffman_bit_writer.zig");
-const mu = @import("mem_utils.zig");
 const token = @import("token.zig");
 
 pub const Compression = enum(i5) {
@@ -296,7 +295,7 @@ pub fn Compressor(comptime WriterType: anytype) type {
         fn fillDeflate(self: *Self, b: []const u8) u32 {
             if (self.index >= 2 * window_size - (min_match_length + max_match_length)) {
                 // shift the window by window_size
-                mem.copy(u8, self.window, self.window[window_size .. 2 * window_size]);
+                mem.copyForwards(u8, self.window, self.window[window_size .. 2 * window_size]);
                 self.index -= window_size;
                 self.window_end -= window_size;
                 if (self.block_start >= window_size) {
@@ -328,7 +327,7 @@ pub fn Compressor(comptime WriterType: anytype) type {
                     }
                 }
             }
-            var n = mu.copy(self.window[self.window_end..], b);
+            const n = std.compress.deflate.copy(self.window[self.window_end..], b);
             self.window_end += n;
             return @intCast(u32, n);
         }
@@ -369,7 +368,7 @@ pub fn Compressor(comptime WriterType: anytype) type {
                 b = b[b.len - window_size ..];
             }
             // Add all to window.
-            mem.copy(u8, self.window, b);
+            @memcpy(self.window[0..b.len], b);
             var n = b.len;
 
             // Calculate 256 hashes at the time (more L1 cache hits)
@@ -543,7 +542,7 @@ pub fn Compressor(comptime WriterType: anytype) type {
             self.hash_offset = 1;
             self.tokens = try self.allocator.alloc(token.Token, max_flate_block_tokens);
             self.tokens_count = 0;
-            mem.set(token.Token, self.tokens, 0);
+            @memset(self.tokens, 0);
             self.length = min_match_length - 1;
             self.offset = 0;
             self.byte_available = false;
@@ -706,7 +705,7 @@ pub fn Compressor(comptime WriterType: anytype) type {
         }
 
         fn fillStore(self: *Self, b: []const u8) u32 {
-            var n = mu.copy(self.window[self.window_end..], b);
+            const n = std.compress.deflate.copy(self.window[self.window_end..], b);
             self.window_end += n;
             return @intCast(u32, n);
         }
@@ -841,9 +840,9 @@ pub fn Compressor(comptime WriterType: anytype) type {
             s.hash_head = try allocator.alloc(u32, hash_size);
             s.hash_prev = try allocator.alloc(u32, window_size);
             s.hash_match = try allocator.alloc(u32, max_match_length - 1);
-            mem.set(u32, s.hash_head, 0);
-            mem.set(u32, s.hash_prev, 0);
-            mem.set(u32, s.hash_match, 0);
+            @memset(s.hash_head, 0);
+            @memset(s.hash_prev, 0);
+            @memset(s.hash_match, 0);
 
             switch (options.level) {
                 .no_compression => {
@@ -936,8 +935,8 @@ pub fn Compressor(comptime WriterType: anytype) type {
                 .best_compression,
                 => {
                     self.chain_head = 0;
-                    mem.set(u32, self.hash_head, 0);
-                    mem.set(u32, self.hash_prev, 0);
+                    @memset(self.hash_head, 0);
+                    @memset(self.hash_prev, 0);
                     self.hash_offset = 1;
                     self.index = 0;
                     self.window_end = 0;
@@ -1091,8 +1090,8 @@ test "bulkHash4" {
         // double the test data
         var out = try testing.allocator.alloc(u8, x.out.len * 2);
         defer testing.allocator.free(out);
-        mem.copy(u8, out[0..x.out.len], x.out);
-        mem.copy(u8, out[x.out.len..], x.out);
+        @memcpy(out[0..x.out.len], x.out);
+        @memcpy(out[x.out.len..], x.out);
 
         var j: usize = 4;
         while (j < out.len) : (j += 1) {
diff --git a/lib/std/compress/deflate/decompressor.zig b/lib/std/compress/deflate/decompressor.zig
index e5cfbb0f6b..6c232c598e 100644
--- a/lib/std/compress/deflate/decompressor.zig
+++ b/lib/std/compress/deflate/decompressor.zig
@@ -9,7 +9,6 @@ const ArrayList = std.ArrayList;
 const bu = @import("bits_utils.zig");
 const ddec = @import("dict_decoder.zig");
 const deflate_const = @import("deflate_const.zig");
-const mu = @import("mem_utils.zig");
 
 const max_match_offset = deflate_const.max_match_offset;
 const end_block_marker = deflate_const.end_block_marker;
@@ -159,7 +158,7 @@ const HuffmanDecoder = struct {
                 if (sanity) {
                     // initialize to a known invalid chunk code (0) to see if we overwrite
                     // this value later on
-                    mem.set(u16, self.links[off], 0);
+                    @memset(self.links[off], 0);
                 }
                 try self.sub_chunks.append(off);
             }
@@ -451,7 +450,7 @@ pub fn Decompressor(comptime ReaderType: type) type {
         pub fn read(self: *Self, output: []u8) Error!usize {
             while (true) {
                 if (self.to_read.len > 0) {
-                    var n = mu.copy(output, self.to_read);
+                    const n = std.compress.deflate.copy(output, self.to_read);
                     self.to_read = self.to_read[n..];
                     if (self.to_read.len == 0 and
                         self.err != null)
diff --git a/lib/std/compress/deflate/deflate_fast.zig b/lib/std/compress/deflate/deflate_fast.zig
index 2009af2611..c86d181cb5 100644
--- a/lib/std/compress/deflate/deflate_fast.zig
+++ b/lib/std/compress/deflate/deflate_fast.zig
@@ -237,7 +237,7 @@ pub const DeflateFast = struct {
         }
         self.cur += @intCast(i32, src.len);
         self.prev_len = @intCast(u32, src.len);
-        mem.copy(u8, self.prev[0..self.prev_len], src);
+        @memcpy(self.prev[0..self.prev_len], src);
         return;
     }
 
@@ -566,11 +566,11 @@ test "best speed match 2/2" {
     for (cases) |c| {
         var previous = try testing.allocator.alloc(u8, c.previous);
         defer testing.allocator.free(previous);
-        mem.set(u8, previous, 0);
+        @memset(previous, 0);
 
         var current = try testing.allocator.alloc(u8, c.current);
         defer testing.allocator.free(current);
-        mem.set(u8, current, 0);
+        @memset(current, 0);
 
         var e = DeflateFast{
             .prev = previous,
diff --git a/lib/std/compress/deflate/deflate_fast_test.zig b/lib/std/compress/deflate/deflate_fast_test.zig
index f8efa80630..1c771d925a 100644
--- a/lib/std/compress/deflate/deflate_fast_test.zig
+++ b/lib/std/compress/deflate/deflate_fast_test.zig
@@ -123,13 +123,13 @@ test "best speed max match offset" {
                 var src = try testing.allocator.alloc(u8, src_len);
                 defer testing.allocator.free(src);
 
-                mem.copy(u8, src, abc);
+                @memcpy(src[0..abc.len], abc);
                 if (!do_match_before) {
-                    var src_offset: usize = @intCast(usize, offset - @as(i32, xyz.len));
-                    mem.copy(u8, src[src_offset..], xyz);
+                    const src_offset: usize = @intCast(usize, offset - @as(i32, xyz.len));
+                    @memcpy(src[src_offset..][0..xyz.len], xyz);
                 }
-                var src_offset: usize = @intCast(usize, offset);
-                mem.copy(u8, src[src_offset..], abc);
+                const src_offset: usize = @intCast(usize, offset);
+                @memcpy(src[src_offset..][0..abc.len], abc);
 
                 var compressed = ArrayList(u8).init(testing.allocator);
                 defer compressed.deinit();
diff --git a/lib/std/compress/deflate/dict_decoder.zig b/lib/std/compress/deflate/dict_decoder.zig
index bf21572827..d9f240e7b4 100644
--- a/lib/std/compress/deflate/dict_decoder.zig
+++ b/lib/std/compress/deflate/dict_decoder.zig
@@ -47,7 +47,8 @@ pub const DictDecoder = struct {
         self.wr_pos = 0;
 
         if (dict != null) {
-            mem.copy(u8, self.hist, dict.?[dict.?.len -| self.hist.len..]);
+            const src = dict.?[dict.?.len -| self.hist.len..];
+            @memcpy(self.hist[0..src.len], src);
             self.wr_pos = @intCast(u32, dict.?.len);
         }
 
@@ -103,12 +104,15 @@ pub const DictDecoder = struct {
         self.wr_pos += 1;
     }
 
+    /// TODO: eliminate this function because the callsites should care about whether
+    /// or not their arguments alias and then they should directly call `@memcpy` or
+    /// `mem.copyForwards`.
     fn copy(dst: []u8, src: []const u8) u32 {
         if (src.len > dst.len) {
-            mem.copy(u8, dst, src[0..dst.len]);
+            mem.copyForwards(u8, dst, src[0..dst.len]);
             return @intCast(u32, dst.len);
         }
-        mem.copy(u8, dst, src);
+        mem.copyForwards(u8, dst[0..src.len], src);
         return @intCast(u32, src.len);
     }
 
diff --git a/lib/std/compress/deflate/huffman_code.zig b/lib/std/compress/deflate/huffman_code.zig
index e911e5219b..cf1dd71c75 100644
--- a/lib/std/compress/deflate/huffman_code.zig
+++ b/lib/std/compress/deflate/huffman_code.zig
@@ -202,7 +202,7 @@ pub const HuffmanEncoder = struct {
                     // more values in the level below
                     l.last_freq = l.next_pair_freq;
                     // Take leaf counts from the lower level, except counts[level] remains the same.
-                    mem.copy(u32, leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
+                    @memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
                     levels[l.level - 1].needed = 2;
                 }
 
diff --git a/lib/std/compress/deflate/mem_utils.zig b/lib/std/compress/deflate/mem_utils.zig
deleted file mode 100644
index 32b55a1bb6..0000000000
--- a/lib/std/compress/deflate/mem_utils.zig
+++ /dev/null
@@ -1,15 +0,0 @@
-const std = @import("std");
-const math = std.math;
-const mem = std.mem;
-
-// Copies elements from a source `src` slice into a destination `dst` slice.
-// The copy never returns an error but might not be complete if the destination is too small.
-// Returns the number of elements copied, which will be the minimum of `src.len` and `dst.len`.
-pub fn copy(dst: []u8, src: []const u8) usize {
-    if (dst.len <= src.len) {
-        mem.copy(u8, dst[0..], src[0..dst.len]);
-    } else {
-        mem.copy(u8, dst[0..src.len], src[0..]);
-    }
-    return math.min(dst.len, src.len);
-}
diff --git a/lib/std/compress/lzma.zig b/lib/std/compress/lzma.zig
index 8bb8c19da1..ff05bc1c8b 100644
--- a/lib/std/compress/lzma.zig
+++ b/lib/std/compress/lzma.zig
@@ -75,9 +75,9 @@ pub fn Decompress(comptime ReaderType: type) type {
                 }
             }
             const input = self.to_read.items;
-            const n = math.min(input.len, output.len);
-            mem.copy(u8, output[0..n], input[0..n]);
-            mem.copy(u8, input, input[n..]);
+            const n = @min(input.len, output.len);
+            @memcpy(output[0..n], input[0..n]);
+            @memcpy(input[0 .. input.len - n], input[n..]);
             self.to_read.shrinkRetainingCapacity(input.len - n);
             return n;
         }
diff --git a/lib/std/compress/lzma/decode/rangecoder.zig b/lib/std/compress/lzma/decode/rangecoder.zig
index 5df10be060..5a2f309fe4 100644
--- a/lib/std/compress/lzma/decode/rangecoder.zig
+++ b/lib/std/compress/lzma/decode/rangecoder.zig
@@ -143,7 +143,7 @@ pub fn BitTree(comptime num_bits: usize) type {
         }
 
         pub fn reset(self: *Self) void {
-            mem.set(u16, &self.probs, 0x400);
+            @memset(&self.probs, 0x400);
         }
     };
 }
diff --git a/lib/std/compress/lzma/vec2d.zig b/lib/std/compress/lzma/vec2d.zig
index 1372d3592c..1ea3d1e8d5 100644
--- a/lib/std/compress/lzma/vec2d.zig
+++ b/lib/std/compress/lzma/vec2d.zig
@@ -13,7 +13,7 @@ pub fn Vec2D(comptime T: type) type {
         pub fn init(allocator: Allocator, value: T, size: struct { usize, usize }) !Self {
             const len = try math.mul(usize, size[0], size[1]);
             const data = try allocator.alloc(T, len);
-            mem.set(T, data, value);
+            @memset(data, value);
             return Self{
                 .data = data,
                 .cols = size[1],
@@ -26,7 +26,7 @@ pub fn Vec2D(comptime T: type) type {
         }
 
         pub fn fill(self: *Self, value: T) void {
-            mem.set(T, self.data, value);
+            @memset(self.data, value);
         }
 
         inline fn _get(self: Self, row: usize) ![]T {
diff --git a/lib/std/compress/xz/block.zig b/lib/std/compress/xz/block.zig
index 8d3d8f0353..520c335794 100644
--- a/lib/std/compress/xz/block.zig
+++ b/lib/std/compress/xz/block.zig
@@ -59,9 +59,9 @@ pub fn Decoder(comptime ReaderType: type) type {
             while (true) {
                 if (self.to_read.items.len > 0) {
                     const input = self.to_read.items;
-                    const n = std.math.min(input.len, output.len);
-                    std.mem.copy(u8, output[0..n], input[0..n]);
-                    std.mem.copy(u8, input, input[n..]);
+                    const n = @min(input.len, output.len);
+                    @memcpy(output[0..n], input[0..n]);
+                    std.mem.copyForwards(u8, input, input[n..]);
                     self.to_read.shrinkRetainingCapacity(input.len - n);
                     if (self.to_read.items.len == 0 and self.err != null) {
                         if (self.err.? == DecodeError.EndOfStreamWithNoError) {
diff --git a/lib/std/compress/zstandard/decode/block.zig b/lib/std/compress/zstandard/decode/block.zig
index 4b7353f63c..e2042650c6 100644
--- a/lib/std/compress/zstandard/decode/block.zig
+++ b/lib/std/compress/zstandard/decode/block.zig
@@ -293,10 +293,10 @@ pub const DecodeState = struct {
 
         try self.decodeLiteralsSlice(dest[write_pos..], sequence.literal_length);
         const copy_start = write_pos + sequence.literal_length - sequence.offset;
-        const copy_end = copy_start + sequence.match_length;
-        // NOTE: we ignore the usage message for std.mem.copy and copy with dest.ptr >= src.ptr
-        //       to allow repeats
-        std.mem.copy(u8, dest[write_pos + sequence.literal_length ..], dest[copy_start..copy_end]);
+        for (
+            dest[write_pos + sequence.literal_length ..][0..sequence.match_length],
+            dest[copy_start..][0..sequence.match_length],
+        ) |*d, s| d.* = s;
         self.written_count += sequence.match_length;
     }
 
@@ -311,7 +311,6 @@ pub const DecodeState = struct {
         try self.decodeLiteralsRingBuffer(dest, sequence.literal_length);
         const copy_start = dest.write_index + dest.data.len - sequence.offset;
         const copy_slice = dest.sliceAt(copy_start, sequence.match_length);
-        // TODO: would std.mem.copy and figuring out dest slice be better/faster?
         for (copy_slice.first) |b| dest.writeAssumeCapacity(b);
         for (copy_slice.second) |b| dest.writeAssumeCapacity(b);
         self.written_count += sequence.match_length;
@@ -444,9 +443,8 @@ pub const DecodeState = struct {
 
         switch (self.literal_header.block_type) {
             .raw => {
-                const literals_end = self.literal_written_count + len;
-                const literal_data = self.literal_streams.one[self.literal_written_count..literals_end];
-                std.mem.copy(u8, dest, literal_data);
+                const literal_data = self.literal_streams.one[self.literal_written_count..][0..len];
+                @memcpy(dest[0..len], literal_data);
                 self.literal_written_count += len;
                 self.written_count += len;
             },
@@ -615,8 +613,7 @@ pub fn decodeBlock(
         .raw => {
             if (src.len < block_size) return error.MalformedBlockSize;
             if (dest[written_count..].len < block_size) return error.DestTooSmall;
-            const data = src[0..block_size];
-            std.mem.copy(u8, dest[written_count..], data);
+            @memcpy(dest[written_count..][0..block_size], src[0..block_size]);
             consumed_count.* += block_size;
             decode_state.written_count += block_size;
             return block_size;
author	Andrew Kelley <andrew@ziglang.org>	2023-04-29 00:19:55 -0700
committer	GitHub <noreply@github.com>	2023-04-29 00:19:55 -0700
commit	d65b42e07caa00dfe2f2fbf221c593ce57882784 (patch)
tree	7926cbea1499e0affe930bf6d7455dc24adf014e /lib/std/compress
parent	fd6200eda6d4fe19c34a59430a88a9ce38d6d7a4 (diff)
parent	fa200ca0cad2705bad40eb723dedf4e3bf11f2ff (diff)
download	zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.tar.gz zig-d65b42e07caa00dfe2f2fbf221c593ce57882784.zip