aboutsummaryrefslogtreecommitdiff
path: root/lib/std/os/linux/bpf.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2020-09-22 11:41:21 -0700
committerAndrew Kelley <andrew@ziglang.org>2020-09-22 11:41:21 -0700
commite2d1f9874df2a9221aaa9ec55bd2974b70601f64 (patch)
treeeff7919b0717e193aa53b70fcee862d6f33deddb /lib/std/os/linux/bpf.zig
parent52b8239a22aa37fe3914427cd4e2905231769e59 (diff)
parent58ee5f4e61cd9b7a9ba65798e2214efa3753a733 (diff)
downloadzig-e2d1f9874df2a9221aaa9ec55bd2974b70601f64.tar.gz
zig-e2d1f9874df2a9221aaa9ec55bd2974b70601f64.zip
Merge remote-tracking branch 'origin/master' into llvm11
Diffstat (limited to 'lib/std/os/linux/bpf.zig')
-rw-r--r--lib/std/os/linux/bpf.zig829
1 files changed, 761 insertions, 68 deletions
diff --git a/lib/std/os/linux/bpf.zig b/lib/std/os/linux/bpf.zig
index 928c157c42..44c938feb8 100644
--- a/lib/std/os/linux/bpf.zig
+++ b/lib/std/os/linux/bpf.zig
@@ -3,9 +3,16 @@
// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
// The MIT license requires this copyright notice to be included in all copies
// and substantial portions of the software.
-usingnamespace std.os;
+usingnamespace std.os.linux;
const std = @import("../../std.zig");
+const errno = getErrno;
+const unexpectedErrno = std.os.unexpectedErrno;
const expectEqual = std.testing.expectEqual;
+const expectError = std.testing.expectError;
+const expect = std.testing.expect;
+
+pub const btf = @import("bpf/btf.zig");
+pub const kern = @import("bpf/kern.zig");
// instruction classes
pub const LD = 0x00;
@@ -62,6 +69,7 @@ pub const MAXINSNS = 4096;
// instruction classes
/// jmp mode in word width
pub const JMP32 = 0x06;
+
/// alu mode in double word width
pub const ALU64 = 0x07;
@@ -72,14 +80,17 @@ pub const XADD = 0xc0;
// alu/jmp fields
/// mov reg to reg
pub const MOV = 0xb0;
+
/// sign extending arithmetic shift right */
pub const ARSH = 0xc0;
// change endianness of a register
/// flags for endianness conversion:
pub const END = 0xd0;
+
/// convert to little-endian */
pub const TO_LE = 0x00;
+
/// convert to big-endian
pub const TO_BE = 0x08;
pub const FROM_LE = TO_LE;
@@ -88,29 +99,39 @@ pub const FROM_BE = TO_BE;
// jmp encodings
/// jump != *
pub const JNE = 0x50;
+
/// LT is unsigned, '<'
pub const JLT = 0xa0;
+
/// LE is unsigned, '<=' *
pub const JLE = 0xb0;
+
/// SGT is signed '>', GT in x86
pub const JSGT = 0x60;
+
/// SGE is signed '>=', GE in x86
pub const JSGE = 0x70;
+
/// SLT is signed, '<'
pub const JSLT = 0xc0;
+
/// SLE is signed, '<='
pub const JSLE = 0xd0;
+
/// function call
pub const CALL = 0x80;
+
/// function return
pub const EXIT = 0x90;
/// Flag for prog_attach command. If a sub-cgroup installs some bpf program, the
/// program in this cgroup yields to sub-cgroup program.
pub const F_ALLOW_OVERRIDE = 0x1;
+
/// Flag for prog_attach command. If a sub-cgroup installs some bpf program,
/// that cgroup program gets run in addition to the program in this cgroup.
pub const F_ALLOW_MULTI = 0x2;
+
/// Flag for prog_attach command.
pub const F_REPLACE = 0x4;
@@ -164,47 +185,61 @@ pub const PSEUDO_CALL = 1;
/// flag for BPF_MAP_UPDATE_ELEM command. create new element or update existing
pub const ANY = 0;
+
/// flag for BPF_MAP_UPDATE_ELEM command. create new element if it didn't exist
pub const NOEXIST = 1;
+
/// flag for BPF_MAP_UPDATE_ELEM command. update existing element
pub const EXIST = 2;
+
/// flag for BPF_MAP_UPDATE_ELEM command. spin_lock-ed map_lookup/map_update
pub const F_LOCK = 4;
/// flag for BPF_MAP_CREATE command */
pub const BPF_F_NO_PREALLOC = 0x1;
+
/// flag for BPF_MAP_CREATE command. Instead of having one common LRU list in
/// the BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list which can
/// scale and perform better. Note, the LRU nodes (including free nodes) cannot
/// be moved across different LRU lists.
pub const BPF_F_NO_COMMON_LRU = 0x2;
+
/// flag for BPF_MAP_CREATE command. Specify numa node during map creation
pub const BPF_F_NUMA_NODE = 0x4;
+
/// flag for BPF_MAP_CREATE command. Flags for BPF object read access from
/// syscall side
pub const BPF_F_RDONLY = 0x8;
+
/// flag for BPF_MAP_CREATE command. Flags for BPF object write access from
/// syscall side
pub const BPF_F_WRONLY = 0x10;
+
/// flag for BPF_MAP_CREATE command. Flag for stack_map, store build_id+offset
/// instead of pointer
pub const BPF_F_STACK_BUILD_ID = 0x20;
+
/// flag for BPF_MAP_CREATE command. Zero-initialize hash function seed. This
/// should only be used for testing.
pub const BPF_F_ZERO_SEED = 0x40;
+
/// flag for BPF_MAP_CREATE command Flags for accessing BPF object from program
/// side.
pub const BPF_F_RDONLY_PROG = 0x80;
+
/// flag for BPF_MAP_CREATE command. Flags for accessing BPF object from program
/// side.
pub const BPF_F_WRONLY_PROG = 0x100;
+
/// flag for BPF_MAP_CREATE command. Clone map from listener for newly accepted
/// socket
pub const BPF_F_CLONE = 0x200;
+
/// flag for BPF_MAP_CREATE command. Enable memory-mapping BPF map
pub const BPF_F_MMAPABLE = 0x400;
-/// These values correspond to "syscalls" within the BPF program's environment
+/// These values correspond to "syscalls" within the BPF program's environment,
+/// each one is documented in std.os.linux.BPF.kern
pub const Helper = enum(i32) {
unspec,
map_lookup_elem,
@@ -325,9 +360,34 @@ pub const Helper = enum(i32) {
tcp_send_ack,
send_signal_thread,
jiffies64,
+ read_branch_records,
+ get_ns_current_pid_tgid,
+ xdp_output,
+ get_netns_cookie,
+ get_current_ancestor_cgroup_id,
+ sk_assign,
+ ktime_get_boot_ns,
+ seq_printf,
+ seq_write,
+ sk_cgroup_id,
+ sk_ancestor_cgroup_id,
+ ringbuf_output,
+ ringbuf_reserve,
+ ringbuf_submit,
+ ringbuf_discard,
+ ringbuf_query,
+ csum_level,
+ skc_to_tcp6_sock,
+ skc_to_tcp_sock,
+ skc_to_tcp_timewait_sock,
+ skc_to_tcp_request_sock,
+ skc_to_udp6_sock,
+ get_task_stack,
_,
};
+// TODO: determine that this is the expected bit layout for both little and big
+// endian systems
/// a single BPF instruction
pub const Insn = packed struct {
code: u8,
@@ -340,19 +400,30 @@ pub const Insn = packed struct {
/// frame
pub const Reg = packed enum(u4) { r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 };
const Source = packed enum(u1) { reg, imm };
+
+ const Mode = packed enum(u8) {
+ imm = IMM,
+ abs = ABS,
+ ind = IND,
+ mem = MEM,
+ len = LEN,
+ msh = MSH,
+ };
+
const AluOp = packed enum(u8) {
add = ADD,
sub = SUB,
mul = MUL,
div = DIV,
- op_or = OR,
- op_and = AND,
+ alu_or = OR,
+ alu_and = AND,
lsh = LSH,
rsh = RSH,
neg = NEG,
mod = MOD,
xor = XOR,
mov = MOV,
+ arsh = ARSH,
};
pub const Size = packed enum(u8) {
@@ -368,6 +439,13 @@ pub const Insn = packed struct {
jgt = JGT,
jge = JGE,
jset = JSET,
+ jlt = JLT,
+ jle = JLE,
+ jne = JNE,
+ jsgt = JSGT,
+ jsge = JSGE,
+ jslt = JSLT,
+ jsle = JSLE,
};
const ImmOrReg = union(Source) {
@@ -419,22 +497,100 @@ pub const Insn = packed struct {
return alu(64, .add, dst, src);
}
+ pub fn sub(dst: Reg, src: anytype) Insn {
+ return alu(64, .sub, dst, src);
+ }
+
+ pub fn mul(dst: Reg, src: anytype) Insn {
+ return alu(64, .mul, dst, src);
+ }
+
+ pub fn div(dst: Reg, src: anytype) Insn {
+ return alu(64, .div, dst, src);
+ }
+
+ pub fn alu_or(dst: Reg, src: anytype) Insn {
+ return alu(64, .alu_or, dst, src);
+ }
+
+ pub fn alu_and(dst: Reg, src: anytype) Insn {
+ return alu(64, .alu_and, dst, src);
+ }
+
+ pub fn lsh(dst: Reg, src: anytype) Insn {
+ return alu(64, .lsh, dst, src);
+ }
+
+ pub fn rsh(dst: Reg, src: anytype) Insn {
+ return alu(64, .rsh, dst, src);
+ }
+
+ pub fn neg(dst: Reg) Insn {
+ return alu(64, .neg, dst, 0);
+ }
+
+ pub fn mod(dst: Reg, src: anytype) Insn {
+ return alu(64, .mod, dst, src);
+ }
+
+ pub fn xor(dst: Reg, src: anytype) Insn {
+ return alu(64, .xor, dst, src);
+ }
+
+ pub fn arsh(dst: Reg, src: anytype) Insn {
+ return alu(64, .arsh, dst, src);
+ }
+
fn jmp(op: JmpOp, dst: Reg, src: anytype, off: i16) Insn {
return imm_reg(JMP | @enumToInt(op), dst, src, off);
}
+ pub fn ja(off: i16) Insn {
+ return jmp(.ja, .r0, 0, off);
+ }
+
pub fn jeq(dst: Reg, src: anytype, off: i16) Insn {
return jmp(.jeq, dst, src, off);
}
- pub fn stx_mem(size: Size, dst: Reg, src: Reg, off: i16) Insn {
- return Insn{
- .code = STX | @enumToInt(size) | MEM,
- .dst = @enumToInt(dst),
- .src = @enumToInt(src),
- .off = off,
- .imm = 0,
- };
+ pub fn jgt(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jgt, dst, src, off);
+ }
+
+ pub fn jge(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jge, dst, src, off);
+ }
+
+ pub fn jlt(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jlt, dst, src, off);
+ }
+
+ pub fn jle(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jle, dst, src, off);
+ }
+
+ pub fn jset(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jset, dst, src, off);
+ }
+
+ pub fn jne(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jne, dst, src, off);
+ }
+
+ pub fn jsgt(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jsgt, dst, src, off);
+ }
+
+ pub fn jsge(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jsge, dst, src, off);
+ }
+
+ pub fn jslt(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jslt, dst, src, off);
+ }
+
+ pub fn jsle(dst: Reg, src: anytype, off: i16) Insn {
+ return jmp(.jsle, dst, src, off);
}
pub fn xadd(dst: Reg, src: Reg) Insn {
@@ -447,17 +603,34 @@ pub const Insn = packed struct {
};
}
- /// direct packet access, R0 = *(uint *)(skb->data + imm32)
- pub fn ld_abs(size: Size, imm: i32) Insn {
+ fn ld(mode: Mode, size: Size, dst: Reg, src: Reg, imm: i32) Insn {
return Insn{
- .code = LD | @enumToInt(size) | ABS,
- .dst = 0,
- .src = 0,
+ .code = @enumToInt(mode) | @enumToInt(size) | LD,
+ .dst = @enumToInt(dst),
+ .src = @enumToInt(src),
.off = 0,
.imm = imm,
};
}
+ pub fn ld_abs(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
+ return ld(.abs, size, dst, src, imm);
+ }
+
+ pub fn ld_ind(size: Size, dst: Reg, src: Reg, imm: i32) Insn {
+ return ld(.ind, size, dst, src, imm);
+ }
+
+ pub fn ldx(size: Size, dst: Reg, src: Reg, off: i16) Insn {
+ return Insn{
+ .code = MEM | @enumToInt(size) | LDX,
+ .dst = @enumToInt(dst),
+ .src = @enumToInt(src),
+ .off = off,
+ .imm = 0,
+ };
+ }
+
fn ld_imm_impl1(dst: Reg, src: Reg, imm: u64) Insn {
return Insn{
.code = LD | DW | IMM,
@@ -478,6 +651,14 @@ pub const Insn = packed struct {
};
}
+ pub fn ld_dw1(dst: Reg, imm: u64) Insn {
+ return ld_imm_impl1(dst, .r0, imm);
+ }
+
+ pub fn ld_dw2(imm: u64) Insn {
+ return ld_imm_impl2(imm);
+ }
+
pub fn ld_map_fd1(dst: Reg, map_fd: fd_t) Insn {
return ld_imm_impl1(dst, @intToEnum(Reg, PSEUDO_MAP_FD), @intCast(u64, map_fd));
}
@@ -486,6 +667,53 @@ pub const Insn = packed struct {
return ld_imm_impl2(@intCast(u64, map_fd));
}
+ pub fn st(comptime size: Size, dst: Reg, off: i16, imm: i32) Insn {
+ if (size == .double_word) @compileError("TODO: need to determine how to correctly handle double words");
+ return Insn{
+ .code = MEM | @enumToInt(size) | ST,
+ .dst = @enumToInt(dst),
+ .src = 0,
+ .off = off,
+ .imm = imm,
+ };
+ }
+
+ pub fn stx(size: Size, dst: Reg, off: i16, src: Reg) Insn {
+ return Insn{
+ .code = MEM | @enumToInt(size) | STX,
+ .dst = @enumToInt(dst),
+ .src = @enumToInt(src),
+ .off = off,
+ .imm = 0,
+ };
+ }
+
+ fn endian_swap(endian: std.builtin.Endian, comptime size: Size, dst: Reg) Insn {
+ return Insn{
+ .code = switch (endian) {
+ .Big => 0xdc,
+ .Little => 0xd4,
+ },
+ .dst = @enumToInt(dst),
+ .src = 0,
+ .off = 0,
+ .imm = switch (size) {
+ .byte => @compileError("can't swap a single byte"),
+ .half_word => 16,
+ .word => 32,
+ .double_word => 64,
+ },
+ };
+ }
+
+ pub fn le(comptime size: Size, dst: Reg) Insn {
+ return endian_swap(.Little, size, dst);
+ }
+
+ pub fn be(comptime size: Size, dst: Reg) Insn {
+ return endian_swap(.Big, size, dst);
+ }
+
pub fn call(helper: Helper) Insn {
return Insn{
.code = JMP | CALL,
@@ -508,95 +736,242 @@ pub const Insn = packed struct {
}
};
-fn expect_insn(insn: Insn, val: u64) void {
- expectEqual(@bitCast(u64, insn), val);
-}
-
test "insn bitsize" {
expectEqual(@bitSizeOf(Insn), 64);
}
-// mov instructions
-test "mov imm" {
- expect_insn(Insn.mov(.r1, 1), 0x00000001000001b7);
-}
-
-test "mov reg" {
- expect_insn(Insn.mov(.r6, .r1), 0x00000000000016bf);
-}
-
-// alu instructions
-test "add imm" {
- expect_insn(Insn.add(.r2, -4), 0xfffffffc00000207);
+fn expect_opcode(code: u8, insn: Insn) void {
+ expectEqual(code, insn.code);
}
-// ld instructions
-test "ld_abs" {
- expect_insn(Insn.ld_abs(.byte, 42), 0x0000002a00000030);
-}
-
-test "ld_map_fd" {
- expect_insn(Insn.ld_map_fd1(.r1, 42), 0x0000002a00001118);
- expect_insn(Insn.ld_map_fd2(42), 0x0000000000000000);
-}
-
-// st instructions
-test "stx_mem" {
- expect_insn(Insn.stx_mem(.word, .r10, .r0, -4), 0x00000000fffc0a63);
-}
-
-test "xadd" {
- expect_insn(Insn.xadd(.r0, .r1), 0x00000000000010db);
-}
-
-// jmp instructions
-test "jeq imm" {
- expect_insn(Insn.jeq(.r0, 0, 2), 0x0000000000020015);
-}
-
-// other instructions
-test "call" {
- expect_insn(Insn.call(.map_lookup_elem), 0x0000000100000085);
-}
-
-test "exit" {
- expect_insn(Insn.exit(), 0x0000000000000095);
+// The opcodes were grabbed from https://github.com/iovisor/bpf-docs/blob/master/eBPF.md
+test "opcodes" {
+ // instructions that have a name that end with 1 or 2 are consecutive for
+ // loading 64-bit immediates (imm is only 32 bits wide)
+
+ // alu instructions
+ expect_opcode(0x07, Insn.add(.r1, 0));
+ expect_opcode(0x0f, Insn.add(.r1, .r2));
+ expect_opcode(0x17, Insn.sub(.r1, 0));
+ expect_opcode(0x1f, Insn.sub(.r1, .r2));
+ expect_opcode(0x27, Insn.mul(.r1, 0));
+ expect_opcode(0x2f, Insn.mul(.r1, .r2));
+ expect_opcode(0x37, Insn.div(.r1, 0));
+ expect_opcode(0x3f, Insn.div(.r1, .r2));
+ expect_opcode(0x47, Insn.alu_or(.r1, 0));
+ expect_opcode(0x4f, Insn.alu_or(.r1, .r2));
+ expect_opcode(0x57, Insn.alu_and(.r1, 0));
+ expect_opcode(0x5f, Insn.alu_and(.r1, .r2));
+ expect_opcode(0x67, Insn.lsh(.r1, 0));
+ expect_opcode(0x6f, Insn.lsh(.r1, .r2));
+ expect_opcode(0x77, Insn.rsh(.r1, 0));
+ expect_opcode(0x7f, Insn.rsh(.r1, .r2));
+ expect_opcode(0x87, Insn.neg(.r1));
+ expect_opcode(0x97, Insn.mod(.r1, 0));
+ expect_opcode(0x9f, Insn.mod(.r1, .r2));
+ expect_opcode(0xa7, Insn.xor(.r1, 0));
+ expect_opcode(0xaf, Insn.xor(.r1, .r2));
+ expect_opcode(0xb7, Insn.mov(.r1, 0));
+ expect_opcode(0xbf, Insn.mov(.r1, .r2));
+ expect_opcode(0xc7, Insn.arsh(.r1, 0));
+ expect_opcode(0xcf, Insn.arsh(.r1, .r2));
+
+ // atomic instructions: might be more of these not documented in the wild
+ expect_opcode(0xdb, Insn.xadd(.r1, .r2));
+
+ // TODO: byteswap instructions
+ expect_opcode(0xd4, Insn.le(.half_word, .r1));
+ expectEqual(@intCast(i32, 16), Insn.le(.half_word, .r1).imm);
+ expect_opcode(0xd4, Insn.le(.word, .r1));
+ expectEqual(@intCast(i32, 32), Insn.le(.word, .r1).imm);
+ expect_opcode(0xd4, Insn.le(.double_word, .r1));
+ expectEqual(@intCast(i32, 64), Insn.le(.double_word, .r1).imm);
+ expect_opcode(0xdc, Insn.be(.half_word, .r1));
+ expectEqual(@intCast(i32, 16), Insn.be(.half_word, .r1).imm);
+ expect_opcode(0xdc, Insn.be(.word, .r1));
+ expectEqual(@intCast(i32, 32), Insn.be(.word, .r1).imm);
+ expect_opcode(0xdc, Insn.be(.double_word, .r1));
+ expectEqual(@intCast(i32, 64), Insn.be(.double_word, .r1).imm);
+
+ // memory instructions
+ expect_opcode(0x18, Insn.ld_dw1(.r1, 0));
+ expect_opcode(0x00, Insn.ld_dw2(0));
+
+ // loading a map fd
+ expect_opcode(0x18, Insn.ld_map_fd1(.r1, 0));
+ expectEqual(@intCast(u4, PSEUDO_MAP_FD), Insn.ld_map_fd1(.r1, 0).src);
+ expect_opcode(0x00, Insn.ld_map_fd2(0));
+
+ expect_opcode(0x38, Insn.ld_abs(.double_word, .r1, .r2, 0));
+ expect_opcode(0x20, Insn.ld_abs(.word, .r1, .r2, 0));
+ expect_opcode(0x28, Insn.ld_abs(.half_word, .r1, .r2, 0));
+ expect_opcode(0x30, Insn.ld_abs(.byte, .r1, .r2, 0));
+
+ expect_opcode(0x58, Insn.ld_ind(.double_word, .r1, .r2, 0));
+ expect_opcode(0x40, Insn.ld_ind(.word, .r1, .r2, 0));
+ expect_opcode(0x48, Insn.ld_ind(.half_word, .r1, .r2, 0));
+ expect_opcode(0x50, Insn.ld_ind(.byte, .r1, .r2, 0));
+
+ expect_opcode(0x79, Insn.ldx(.double_word, .r1, .r2, 0));
+ expect_opcode(0x61, Insn.ldx(.word, .r1, .r2, 0));
+ expect_opcode(0x69, Insn.ldx(.half_word, .r1, .r2, 0));
+ expect_opcode(0x71, Insn.ldx(.byte, .r1, .r2, 0));
+
+ expect_opcode(0x62, Insn.st(.word, .r1, 0, 0));
+ expect_opcode(0x6a, Insn.st(.half_word, .r1, 0, 0));
+ expect_opcode(0x72, Insn.st(.byte, .r1, 0, 0));
+
+ expect_opcode(0x63, Insn.stx(.word, .r1, 0, .r2));
+ expect_opcode(0x6b, Insn.stx(.half_word, .r1, 0, .r2));
+ expect_opcode(0x73, Insn.stx(.byte, .r1, 0, .r2));
+ expect_opcode(0x7b, Insn.stx(.double_word, .r1, 0, .r2));
+
+ // branch instructions
+ expect_opcode(0x05, Insn.ja(0));
+ expect_opcode(0x15, Insn.jeq(.r1, 0, 0));
+ expect_opcode(0x1d, Insn.jeq(.r1, .r2, 0));
+ expect_opcode(0x25, Insn.jgt(.r1, 0, 0));
+ expect_opcode(0x2d, Insn.jgt(.r1, .r2, 0));
+ expect_opcode(0x35, Insn.jge(.r1, 0, 0));
+ expect_opcode(0x3d, Insn.jge(.r1, .r2, 0));
+ expect_opcode(0xa5, Insn.jlt(.r1, 0, 0));
+ expect_opcode(0xad, Insn.jlt(.r1, .r2, 0));
+ expect_opcode(0xb5, Insn.jle(.r1, 0, 0));
+ expect_opcode(0xbd, Insn.jle(.r1, .r2, 0));
+ expect_opcode(0x45, Insn.jset(.r1, 0, 0));
+ expect_opcode(0x4d, Insn.jset(.r1, .r2, 0));
+ expect_opcode(0x55, Insn.jne(.r1, 0, 0));
+ expect_opcode(0x5d, Insn.jne(.r1, .r2, 0));
+ expect_opcode(0x65, Insn.jsgt(.r1, 0, 0));
+ expect_opcode(0x6d, Insn.jsgt(.r1, .r2, 0));
+ expect_opcode(0x75, Insn.jsge(.r1, 0, 0));
+ expect_opcode(0x7d, Insn.jsge(.r1, .r2, 0));
+ expect_opcode(0xc5, Insn.jslt(.r1, 0, 0));
+ expect_opcode(0xcd, Insn.jslt(.r1, .r2, 0));
+ expect_opcode(0xd5, Insn.jsle(.r1, 0, 0));
+ expect_opcode(0xdd, Insn.jsle(.r1, .r2, 0));
+ expect_opcode(0x85, Insn.call(.unspec));
+ expect_opcode(0x95, Insn.exit());
}
pub const Cmd = extern enum(usize) {
+ /// Create a map and return a file descriptor that refers to the map. The
+ /// close-on-exec file descriptor flag is automatically enabled for the new
+ /// file descriptor.
+ ///
+ /// uses MapCreateAttr
map_create,
+
+ /// Look up an element by key in a specified map and return its value.
+ ///
+ /// uses MapElemAttr
map_lookup_elem,
+
+ /// Create or update an element (key/value pair) in a specified map.
+ ///
+ /// uses MapElemAttr
map_update_elem,
+
+ /// Look up and delete an element by key in a specified map.
+ ///
+ /// uses MapElemAttr
map_delete_elem,
+
+ /// Look up an element by key in a specified map and return the key of the
+ /// next element.
map_get_next_key,
+
+ /// Verify and load an eBPF program, returning a new file descriptor
+ /// associated with the program. The close-on-exec file descriptor flag
+ /// is automatically enabled for the new file descriptor.
+ ///
+ /// uses ProgLoadAttr
prog_load,
+
+ /// Pin a map or eBPF program to a path within the minimal BPF filesystem
+ ///
+ /// uses ObjAttr
obj_pin,
+
+ /// Get the file descriptor of a BPF object pinned to a certain path
+ ///
+ /// uses ObjAttr
obj_get,
+
+ /// uses ProgAttachAttr
prog_attach,
+
+ /// uses ProgAttachAttr
prog_detach,
+
+ /// uses TestRunAttr
prog_test_run,
+
+ /// uses GetIdAttr
prog_get_next_id,
+
+ /// uses GetIdAttr
map_get_next_id,
+
+ /// uses GetIdAttr
prog_get_fd_by_id,
+
+ /// uses GetIdAttr
map_get_fd_by_id,
+
+ /// uses InfoAttr
obj_get_info_by_fd,
+
+ /// uses QueryAttr
prog_query,
+
+ /// uses RawTracepointAttr
raw_tracepoint_open,
+
+ /// uses BtfLoadAttr
btf_load,
+
+ /// uses GetIdAttr
btf_get_fd_by_id,
+
+ /// uses TaskFdQueryAttr
task_fd_query,
+
+ /// uses MapElemAttr
map_lookup_and_delete_elem,
map_freeze,
+
+ /// uses GetIdAttr
btf_get_next_id,
+
+ /// uses MapBatchAttr
map_lookup_batch,
+
+ /// uses MapBatchAttr
map_lookup_and_delete_batch,
+
+ /// uses MapBatchAttr
map_update_batch,
+
+ /// uses MapBatchAttr
map_delete_batch,
+
+ /// uses LinkCreateAttr
link_create,
+
+ /// uses LinkUpdateAttr
link_update,
+
+ /// uses GetIdAttr
link_get_fd_by_id,
+
+ /// uses GetIdAttr
link_get_next_id,
+
+ /// uses EnableStatsAttr
enable_stats,
+
+ /// uses IterCreateAttr
iter_create,
link_detach,
_,
@@ -630,42 +1005,138 @@ pub const MapType = extern enum(u32) {
sk_storage,
devmap_hash,
struct_ops,
+
+ /// An ordered and shared CPU version of perf_event_array. They have
+ /// similar semantics:
+ /// - variable length records
+ /// - no blocking: when full, reservation fails
+ /// - memory mappable for ease and speed
+ /// - epoll notifications for new data, but can busy poll
+ ///
+ /// Ringbufs give BPF programs two sets of APIs:
+ /// - ringbuf_output() allows copy data from one place to a ring
+ /// buffer, similar to bpf_perf_event_output()
+ /// - ringbuf_reserve()/ringbuf_commit()/ringbuf_discard() split the
+ /// process into two steps. First a fixed amount of space is reserved,
+ /// if that is successful then the program gets a pointer to a chunk of
+ /// memory and can be submitted with commit() or discarded with
+ /// discard()
+ ///
+ /// ringbuf_output() will incurr an extra memory copy, but allows to submit
+ /// records of the length that's not known beforehand, and is an easy
+ /// replacement for perf_event_outptu().
+ ///
+ /// ringbuf_reserve() avoids the extra memory copy but requires a known size
+ /// of memory beforehand.
+ ///
+ /// ringbuf_query() allows to query properties of the map, 4 are currently
+ /// supported:
+ /// - BPF_RB_AVAIL_DATA: amount of unconsumed data in ringbuf
+ /// - BPF_RB_RING_SIZE: returns size of ringbuf
+ /// - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical position
+ /// of consumer and producer respectively
+ ///
+ /// key size: 0
+ /// value size: 0
+ /// max entries: size of ringbuf, must be power of 2
ringbuf,
+
_,
};
pub const ProgType = extern enum(u32) {
unspec,
+
+ /// context type: __sk_buff
socket_filter,
+
+ /// context type: bpf_user_pt_regs_t
kprobe,
+
+ /// context type: __sk_buff
sched_cls,
+
+ /// context type: __sk_buff
sched_act,
+
+ /// context type: u64
tracepoint,
+
+ /// context type: xdp_md
xdp,
+
+ /// context type: bpf_perf_event_data
perf_event,
+
+ /// context type: __sk_buff
cgroup_skb,
+
+ /// context type: bpf_sock
cgroup_sock,
+
+ /// context type: __sk_buff
lwt_in,
+
+ /// context type: __sk_buff
lwt_out,
+
+ /// context type: __sk_buff
lwt_xmit,
+
+ /// context type: bpf_sock_ops
sock_ops,
+
+ /// context type: __sk_buff
sk_skb,
+
+ /// context type: bpf_cgroup_dev_ctx
cgroup_device,
+
+ /// context type: sk_msg_md
sk_msg,
+
+ /// context type: bpf_raw_tracepoint_args
raw_tracepoint,
+
+ /// context type: bpf_sock_addr
cgroup_sock_addr,
+
+ /// context type: __sk_buff
lwt_seg6local,
+
+ /// context type: u32
lirc_mode2,
+
+ /// context type: sk_reuseport_md
sk_reuseport,
+
+ /// context type: __sk_buff
flow_dissector,
+
+ /// context type: bpf_sysctl
cgroup_sysctl,
+
+ /// context type: bpf_raw_tracepoint_args
raw_tracepoint_writable,
+
+ /// context type: bpf_sockopt
cgroup_sockopt,
+
+ /// context type: void *
tracing,
+
+ /// context type: void *
struct_ops,
+
+ /// context type: void *
ext,
+
+ /// context type: void *
lsm,
+
+ /// context type: bpf_sk_lookup
sk_lookup,
+ _,
};
pub const AttachType = extern enum(u32) {
@@ -715,27 +1186,38 @@ const obj_name_len = 16;
pub const MapCreateAttr = extern struct {
/// one of MapType
map_type: u32,
+
/// size of key in bytes
key_size: u32,
+
/// size of value in bytes
value_size: u32,
+
/// max number of entries in a map
max_entries: u32,
+
/// .map_create related flags
map_flags: u32,
+
/// fd pointing to the inner map
inner_map_fd: fd_t,
+
/// numa node (effective only if MapCreateFlags.numa_node is set)
numa_node: u32,
map_name: [obj_name_len]u8,
+
/// ifindex of netdev to create on
map_ifindex: u32,
+
/// fd pointing to a BTF type data
btf_fd: fd_t,
+
/// BTF type_id of the key
btf_key_type_id: u32,
+
/// BTF type_id of the value
bpf_value_type_id: u32,
+
/// BTF type_id of a kernel struct stored as the map value
btf_vmlinux_value_type_id: u32,
};
@@ -755,10 +1237,12 @@ pub const MapElemAttr = extern struct {
pub const MapBatchAttr = extern struct {
/// start batch, NULL to start from beginning
in_batch: u64,
+
/// output: next start batch
out_batch: u64,
keys: u64,
values: u64,
+
/// input/output:
/// input: # of key/value elements
/// output: # of filled elements
@@ -775,35 +1259,49 @@ pub const ProgLoadAttr = extern struct {
insn_cnt: u32,
insns: u64,
license: u64,
+
/// verbosity level of verifier
log_level: u32,
+
/// size of user buffer
log_size: u32,
+
/// user supplied buffer
log_buf: u64,
+
/// not used
kern_version: u32,
prog_flags: u32,
prog_name: [obj_name_len]u8,
- /// ifindex of netdev to prep for. For some prog types expected attach
- /// type must be known at load time to verify attach type specific parts
- /// of prog (context accesses, allowed helpers, etc).
+
+ /// ifindex of netdev to prep for.
prog_ifindex: u32,
+
+ /// For some prog types expected attach type must be known at load time to
+ /// verify attach type specific parts of prog (context accesses, allowed
+ /// helpers, etc).
expected_attach_type: u32,
+
/// fd pointing to BTF type data
prog_btf_fd: fd_t,
+
/// userspace bpf_func_info size
func_info_rec_size: u32,
func_info: u64,
+
/// number of bpf_func_info records
func_info_cnt: u32,
+
/// userspace bpf_line_info size
line_info_rec_size: u32,
line_info: u64,
+
/// number of bpf_line_info records
line_info_cnt: u32,
+
/// in-kernel BTF type id to attach to
attact_btf_id: u32,
+
/// 0 to attach to vmlinux
attach_prog_id: u32,
};
@@ -819,29 +1317,36 @@ pub const ObjAttr = extern struct {
pub const ProgAttachAttr = extern struct {
/// container object to attach to
target_fd: fd_t,
+
/// eBPF program to attach
attach_bpf_fd: fd_t,
+
attach_type: u32,
attach_flags: u32,
+
// TODO: BPF_F_REPLACE flags
/// previously attached eBPF program to replace if .replace is used
replace_bpf_fd: fd_t,
};
/// struct used by Cmd.prog_test_run command
-pub const TestAttr = extern struct {
+pub const TestRunAttr = extern struct {
prog_fd: fd_t,
retval: u32,
+
/// input: len of data_in
data_size_in: u32,
+
/// input/output: len of data_out. returns ENOSPC if data_out is too small.
data_size_out: u32,
data_in: u64,
data_out: u64,
repeat: u32,
duration: u32,
+
/// input: len of ctx_in
ctx_size_in: u32,
+
/// input/output: len of ctx_out. returns ENOSPC if ctx_out is too small.
ctx_size_out: u32,
ctx_in: u64,
@@ -894,26 +1399,35 @@ pub const BtfLoadAttr = extern struct {
btf_log_level: u32,
};
+/// struct used by Cmd.task_fd_query
pub const TaskFdQueryAttr = extern struct {
/// input: pid
pid: pid_t,
+
/// input: fd
fd: fd_t,
+
/// input: flags
flags: u32,
+
/// input/output: buf len
buf_len: u32,
+
/// input/output:
/// tp_name for tracepoint
/// symbol for kprobe
/// filename for uprobe
buf: u64,
+
/// output: prod_id
prog_id: u32,
+
/// output: BPF_FD_TYPE
fd_type: u32,
+
/// output: probe_offset
probe_offset: u64,
+
/// output: probe_addr
probe_addr: u64,
};
@@ -922,9 +1436,11 @@ pub const TaskFdQueryAttr = extern struct {
pub const LinkCreateAttr = extern struct {
/// eBPF program to attach
prog_fd: fd_t,
+
/// object to attach to
target_fd: fd_t,
attach_type: u32,
+
/// extra flags
flags: u32,
};
@@ -932,10 +1448,13 @@ pub const LinkCreateAttr = extern struct {
/// struct used by Cmd.link_update command
pub const LinkUpdateAttr = extern struct {
link_fd: fd_t,
+
/// new program to update link with
new_prog_fd: fd_t,
+
/// extra flags
flags: u32,
+
/// expected link's program fd, it is specified only if BPF_F_REPLACE is
/// set in flags
old_prog_fd: fd_t,
@@ -952,6 +1471,7 @@ pub const IterCreateAttr = extern struct {
flags: u32,
};
+/// Mega struct that is passed to the bpf() syscall
pub const Attr = extern union {
map_create: MapCreateAttr,
map_elem: MapElemAttr,
@@ -971,3 +1491,176 @@ pub const Attr = extern union {
enable_stats: EnableStatsAttr,
iter_create: IterCreateAttr,
};
+
+pub const Log = struct {
+ level: u32,
+ buf: []u8,
+};
+
+pub fn map_create(map_type: MapType, key_size: u32, value_size: u32, max_entries: u32) !fd_t {
+ var attr = Attr{
+ .map_create = std.mem.zeroes(MapCreateAttr),
+ };
+
+ attr.map_create.map_type = @enumToInt(map_type);
+ attr.map_create.key_size = key_size;
+ attr.map_create.value_size = value_size;
+ attr.map_create.max_entries = max_entries;
+
+ const rc = bpf(.map_create, &attr, @sizeOf(MapCreateAttr));
+ return switch (errno(rc)) {
+ 0 => @intCast(fd_t, rc),
+ EINVAL => error.MapTypeOrAttrInvalid,
+ ENOMEM => error.SystemResources,
+ EPERM => error.AccessDenied,
+ else => |err| unexpectedErrno(rc),
+ };
+}
+
+test "map_create" {
+ const map = try map_create(.hash, 4, 4, 32);
+ defer std.os.close(map);
+}
+
+pub fn map_lookup_elem(fd: fd_t, key: []const u8, value: []u8) !void {
+ var attr = Attr{
+ .map_elem = std.mem.zeroes(MapElemAttr),
+ };
+
+ attr.map_elem.map_fd = fd;
+ attr.map_elem.key = @ptrToInt(key.ptr);
+ attr.map_elem.result.value = @ptrToInt(value.ptr);
+
+ const rc = bpf(.map_lookup_elem, &attr, @sizeOf(MapElemAttr));
+ switch (errno(rc)) {
+ 0 => return,
+ EBADF => return error.BadFd,
+ EFAULT => unreachable,
+ EINVAL => return error.FieldInAttrNeedsZeroing,
+ ENOENT => return error.NotFound,
+ EPERM => return error.AccessDenied,
+ else => |err| return unexpectedErrno(rc),
+ }
+}
+
+pub fn map_update_elem(fd: fd_t, key: []const u8, value: []const u8, flags: u64) !void {
+ var attr = Attr{
+ .map_elem = std.mem.zeroes(MapElemAttr),
+ };
+
+ attr.map_elem.map_fd = fd;
+ attr.map_elem.key = @ptrToInt(key.ptr);
+ attr.map_elem.result = .{ .value = @ptrToInt(value.ptr) };
+ attr.map_elem.flags = flags;
+
+ const rc = bpf(.map_update_elem, &attr, @sizeOf(MapElemAttr));
+ switch (errno(rc)) {
+ 0 => return,
+ E2BIG => return error.ReachedMaxEntries,
+ EBADF => return error.BadFd,
+ EFAULT => unreachable,
+ EINVAL => return error.FieldInAttrNeedsZeroing,
+ ENOMEM => return error.SystemResources,
+ EPERM => return error.AccessDenied,
+ else => |err| return unexpectedErrno(err),
+ }
+}
+
+pub fn map_delete_elem(fd: fd_t, key: []const u8) !void {
+ var attr = Attr{
+ .map_elem = std.mem.zeroes(MapElemAttr),
+ };
+
+ attr.map_elem.map_fd = fd;
+ attr.map_elem.key = @ptrToInt(key.ptr);
+
+ const rc = bpf(.map_delete_elem, &attr, @sizeOf(MapElemAttr));
+ switch (errno(rc)) {
+ 0 => return,
+ EBADF => return error.BadFd,
+ EFAULT => unreachable,
+ EINVAL => return error.FieldInAttrNeedsZeroing,
+ ENOENT => return error.NotFound,
+ EPERM => return error.AccessDenied,
+ else => |err| return unexpectedErrno(err),
+ }
+}
+
+test "map lookup, update, and delete" {
+ const key_size = 4;
+ const value_size = 4;
+ const map = try map_create(.hash, key_size, value_size, 1);
+ defer std.os.close(map);
+
+ const key = std.mem.zeroes([key_size]u8);
+ var value = std.mem.zeroes([value_size]u8);
+
+ // fails looking up value that doesn't exist
+ expectError(error.NotFound, map_lookup_elem(map, &key, &value));
+
+ // succeed at updating and looking up element
+ try map_update_elem(map, &key, &value, 0);
+ try map_lookup_elem(map, &key, &value);
+
+ // fails inserting more than max entries
+ const second_key = [key_size]u8{ 0, 0, 0, 1 };
+ expectError(error.ReachedMaxEntries, map_update_elem(map, &second_key, &value, 0));
+
+ // succeed at deleting an existing elem
+ try map_delete_elem(map, &key);
+ expectError(error.NotFound, map_lookup_elem(map, &key, &value));
+
+ // fail at deleting a non-existing elem
+ expectError(error.NotFound, map_delete_elem(map, &key));
+}
+
+pub fn prog_load(
+ prog_type: ProgType,
+ insns: []const Insn,
+ log: ?*Log,
+ license: []const u8,
+ kern_version: u32,
+) !fd_t {
+ var attr = Attr{
+ .prog_load = std.mem.zeroes(ProgLoadAttr),
+ };
+
+ attr.prog_load.prog_type = @enumToInt(prog_type);
+ attr.prog_load.insns = @ptrToInt(insns.ptr);
+ attr.prog_load.insn_cnt = @intCast(u32, insns.len);
+ attr.prog_load.license = @ptrToInt(license.ptr);
+ attr.prog_load.kern_version = kern_version;
+
+ if (log) |l| {
+ attr.prog_load.log_buf = @ptrToInt(l.buf.ptr);
+ attr.prog_load.log_size = @intCast(u32, l.buf.len);
+ attr.prog_load.log_level = l.level;
+ }
+
+ const rc = bpf(.prog_load, &attr, @sizeOf(ProgLoadAttr));
+ return switch (errno(rc)) {
+ 0 => @intCast(fd_t, rc),
+ EACCES => error.UnsafeProgram,
+ EFAULT => unreachable,
+ EINVAL => error.InvalidProgram,
+ EPERM => error.AccessDenied,
+ else => |err| unexpectedErrno(err),
+ };
+}
+
+test "prog_load" {
+ // this should fail because it does not set r0 before exiting
+ const bad_prog = [_]Insn{
+ Insn.exit(),
+ };
+
+ const good_prog = [_]Insn{
+ Insn.mov(.r0, 0),
+ Insn.exit(),
+ };
+
+ const prog = try prog_load(.socket_filter, &good_prog, null, "MIT", 0);
+ defer std.os.close(prog);
+
+ expectError(error.UnsafeProgram, prog_load(.socket_filter, &bad_prog, null, "MIT", 0));
+}