static bool type_is_pkt_pointer(enum bpf_reg_type type)
{
+ type = base_type(type);
return type == PTR_TO_PACKET ||
type == PTR_TO_PACKET_META;
}
static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
{
- return base_type(type) == PTR_TO_SOCKET ||
- base_type(type) == PTR_TO_TCP_SOCK ||
- base_type(type) == PTR_TO_MEM ||
- base_type(type) == PTR_TO_BTF_ID;
+ type = base_type(type);
+ return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK ||
+ type == PTR_TO_MEM || type == PTR_TO_BTF_ID;
}
static bool type_is_rdonly_mem(u32 type)
return type & MEM_RDONLY;
}
-static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
-{
- return type == ARG_PTR_TO_SOCK_COMMON;
-}
-
static bool type_may_be_null(u32 type)
{
return type & PTR_MAYBE_NULL;
}
-static bool may_be_acquire_function(enum bpf_func_id func_id)
-{
- return func_id == BPF_FUNC_sk_lookup_tcp ||
- func_id == BPF_FUNC_sk_lookup_udp ||
- func_id == BPF_FUNC_skc_lookup_tcp ||
- func_id == BPF_FUNC_map_lookup_elem ||
- func_id == BPF_FUNC_ringbuf_reserve;
-}
-
static bool is_acquire_function(enum bpf_func_id func_id,
const struct bpf_map *map)
{
func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
+static bool is_dynptr_ref_function(enum bpf_func_id func_id)
+{
+ return func_id == BPF_FUNC_dynptr_data;
+}
+
+static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
+ const struct bpf_map *map)
+{
+ int ref_obj_uses = 0;
+
+ if (is_ptr_cast_function(func_id))
+ ref_obj_uses++;
+ if (is_acquire_function(func_id, map))
+ ref_obj_uses++;
+ if (is_dynptr_ref_function(func_id))
+ ref_obj_uses++;
+
+ return ref_obj_uses > 1;
+}
+
static bool is_cmpxchg_insn(const struct bpf_insn *insn)
{
return BPF_CLASS(insn->code) == BPF_STX &&
id = ++env->id_gen;
state->refs[new_ofs].id = id;
state->refs[new_ofs].insn_idx = insn_idx;
+ state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
return id;
}
last_idx = state->acquired_refs - 1;
for (i = 0; i < state->acquired_refs; i++) {
if (state->refs[i].id == ptr_id) {
+ /* Cannot release caller references in callbacks */
+ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
+ return -EINVAL;
if (last_idx && i != last_idx)
memcpy(&state->refs[i], &state->refs[last_idx],
sizeof(*state->refs));
type == ARG_CONST_SIZE_OR_ZERO;
}
-static bool arg_type_is_alloc_size(enum bpf_arg_type type)
-{
- return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
-}
-
-static bool arg_type_is_int_ptr(enum bpf_arg_type type)
-{
- return type == ARG_PTR_TO_INT ||
- type == ARG_PTR_TO_LONG;
-}
-
static bool arg_type_is_release(enum bpf_arg_type type)
{
return type & OBJ_RELEASE;
struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
enum bpf_arg_type arg_type = fn->arg_type[arg];
enum bpf_reg_type type = reg->type;
+ u32 *arg_btf_id = NULL;
int err = 0;
if (arg_type == ARG_DONTCARE)
*/
goto skip_type_check;
- err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg], meta);
+ /* arg_btf_id and arg_size are in a union. */
+ if (base_type(arg_type) == ARG_PTR_TO_BTF_ID)
+ arg_btf_id = fn->arg_btf_id[arg];
+
+ err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
if (err)
return err;
meta->ref_obj_id = reg->ref_obj_id;
}
- if (arg_type == ARG_CONST_MAP_PTR) {
+ switch (base_type(arg_type)) {
+ case ARG_CONST_MAP_PTR:
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
if (meta->map_ptr) {
/* Use map_uid (which is unique id of inner map) to reject:
}
meta->map_ptr = reg->map_ptr;
meta->map_uid = reg->map_uid;
- } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
+ break;
+ case ARG_PTR_TO_MAP_KEY:
/* bpf_map_xxx(..., map_ptr, ..., key) call:
* check that [key, key + map->key_size) are within
* stack limits and initialized
err = check_helper_mem_access(env, regno,
meta->map_ptr->key_size, false,
NULL);
- } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
+ break;
+ case ARG_PTR_TO_MAP_VALUE:
if (type_may_be_null(arg_type) && register_is_null(reg))
return 0;
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
- } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
+ break;
+ case ARG_PTR_TO_PERCPU_BTF_ID:
if (!reg->btf_id) {
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
return -EACCES;
}
meta->ret_btf = reg->btf;
meta->ret_btf_id = reg->btf_id;
- } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+ break;
+ case ARG_PTR_TO_SPIN_LOCK:
if (meta->func_id == BPF_FUNC_spin_lock) {
if (process_spin_lock(env, regno, true))
return -EACCES;
verbose(env, "verifier internal error\n");
return -EFAULT;
}
- } else if (arg_type == ARG_PTR_TO_TIMER) {
+ break;
+ case ARG_PTR_TO_TIMER:
if (process_timer_func(env, regno, meta))
return -EACCES;
- } else if (arg_type == ARG_PTR_TO_FUNC) {
+ break;
+ case ARG_PTR_TO_FUNC:
meta->subprogno = reg->subprogno;
- } else if (base_type(arg_type) == ARG_PTR_TO_MEM) {
+ break;
+ case ARG_PTR_TO_MEM:
/* The access to this pointer is only checked when we hit the
* next is_mem_size argument below.
*/
meta->raw_mode = arg_type & MEM_UNINIT;
- } else if (arg_type_is_mem_size(arg_type)) {
- bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
-
- err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
- } else if (arg_type_is_dynptr(arg_type)) {
+ if (arg_type & MEM_FIXED_SIZE) {
+ err = check_helper_mem_access(env, regno,
+ fn->arg_size[arg], false,
+ meta);
+ }
+ break;
+ case ARG_CONST_SIZE:
+ err = check_mem_size_reg(env, reg, regno, false, meta);
+ break;
+ case ARG_CONST_SIZE_OR_ZERO:
+ err = check_mem_size_reg(env, reg, regno, true, meta);
+ break;
+ case ARG_PTR_TO_DYNPTR:
if (arg_type & MEM_UNINIT) {
if (!is_dynptr_reg_valid_uninit(env, reg)) {
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
err_extra, arg + 1);
return -EINVAL;
}
- } else if (arg_type_is_alloc_size(arg_type)) {
+ break;
+ case ARG_CONST_ALLOC_SIZE_OR_ZERO:
if (!tnum_is_const(reg->var_off)) {
verbose(env, "R%d is not a known constant'\n",
regno);
return -EACCES;
}
meta->mem_size = reg->var_off.value;
- } else if (arg_type_is_int_ptr(arg_type)) {
+ err = mark_chain_precision(env, regno);
+ if (err)
+ return err;
+ break;
+ case ARG_PTR_TO_INT:
+ case ARG_PTR_TO_LONG:
+ {
int size = int_ptr_type_to_size(arg_type);
err = check_helper_mem_access(env, regno, size, false, meta);
if (err)
return err;
err = check_ptr_alignment(env, reg, 0, size, true);
- } else if (arg_type == ARG_PTR_TO_CONST_STR) {
+ break;
+ }
+ case ARG_PTR_TO_CONST_STR:
+ {
struct bpf_map *map = reg->map_ptr;
int map_off;
u64 map_addr;
verbose(env, "string is not zero-terminated\n");
return -EINVAL;
}
- } else if (arg_type == ARG_PTR_TO_KPTR) {
+ break;
+ }
+ case ARG_PTR_TO_KPTR:
if (process_kptr_func(env, regno, meta))
return -EACCES;
+ break;
}
return err;
static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
{
- return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
+ return env->prog->jit_requested &&
+ bpf_jit_supports_subprog_tailcalls();
}
static int check_map_func_compatibility(struct bpf_verifier_env *env,
return count <= 1;
}
-static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
- enum bpf_arg_type arg_next)
+static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
{
- return (base_type(arg_curr) == ARG_PTR_TO_MEM) !=
- arg_type_is_mem_size(arg_next);
+ bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
+ bool has_size = fn->arg_size[arg] != 0;
+ bool is_next_size = false;
+
+ if (arg + 1 < ARRAY_SIZE(fn->arg_type))
+ is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
+
+ if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
+ return is_next_size;
+
+ return has_size == is_next_size || is_next_size == is_fixed;
}
static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
* helper function specification.
*/
if (arg_type_is_mem_size(fn->arg1_type) ||
- base_type(fn->arg5_type) == ARG_PTR_TO_MEM ||
- check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
- check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
- check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
- check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
+ check_args_pair_invalid(fn, 0) ||
+ check_args_pair_invalid(fn, 1) ||
+ check_args_pair_invalid(fn, 2) ||
+ check_args_pair_invalid(fn, 3) ||
+ check_args_pair_invalid(fn, 4))
return false;
return true;
}
-static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
-{
- int count = 0;
-
- if (arg_type_may_be_refcounted(fn->arg1_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg2_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg3_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg4_type))
- count++;
- if (arg_type_may_be_refcounted(fn->arg5_type))
- count++;
-
- /* A reference acquiring function cannot acquire
- * another refcounted ptr.
- */
- if (may_be_acquire_function(func_id) && count)
- return false;
-
- /* We only support one arg being unreferenced at the moment,
- * which is sufficient for the helper functions we have right now.
- */
- return count <= 1;
-}
-
static bool check_btf_id_ok(const struct bpf_func_proto *fn)
{
int i;
if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
return false;
- if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
+ if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
+ /* arg_btf_id and arg_size are in a union. */
+ (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
+ !(fn->arg_type[i] & MEM_FIXED_SIZE)))
return false;
}
return true;
}
-static int check_func_proto(const struct bpf_func_proto *fn, int func_id,
- struct bpf_call_arg_meta *meta)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
- check_btf_id_ok(fn) &&
- check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
+ check_btf_id_ok(fn) ? 0 : -EINVAL;
}
/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
caller->regs[BPF_REG_0] = *r0;
}
- /* Transfer references to the caller */
- err = copy_reference_state(caller, callee);
- if (err)
- return err;
+ /* callback_fn frame should have released its own additions to parent's
+ * reference state at this point, or check_reference_leak would
+ * complain, hence it must be the same as the caller. There is no need
+ * to copy it back.
+ */
+ if (!callee->in_callback_fn) {
+ /* Transfer references to the caller */
+ err = copy_reference_state(caller, callee);
+ if (err)
+ return err;
+ }
*insn_idx = callee->callsite + 1;
if (env->log.level & BPF_LOG_LEVEL) {
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
struct bpf_reg_state *regs = cur_regs(env), *reg;
struct bpf_map *map = meta->map_ptr;
- struct tnum range;
- u64 val;
+ u64 val, max;
int err;
if (func_id != BPF_FUNC_tail_call)
return -EINVAL;
}
- range = tnum_range(0, map->max_entries - 1);
reg = ®s[BPF_REG_3];
+ val = reg->var_off.value;
+ max = map->max_entries;
- if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
+ if (!(register_is_const(reg) && val < max)) {
bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
return 0;
}
err = mark_chain_precision(env, BPF_REG_3);
if (err)
return err;
-
- val = reg->var_off.value;
if (bpf_map_key_unseen(aux))
bpf_map_key_store(aux, val);
else if (!bpf_map_key_poisoned(aux) &&
static int check_reference_leak(struct bpf_verifier_env *env)
{
struct bpf_func_state *state = cur_func(env);
+ bool refs_lingering = false;
int i;
+ if (state->frameno && !state->in_callback_fn)
+ return 0;
+
for (i = 0; i < state->acquired_refs; i++) {
+ if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
+ continue;
verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
state->refs[i].id, state->refs[i].insn_idx);
+ refs_lingering = true;
}
- return state->acquired_refs ? -EINVAL : 0;
+ return refs_lingering ? -EINVAL : 0;
}
static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
return -ENOTSUPP;
}
+static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+{
+ return &env->insn_aux_data[env->insn_idx];
+}
+
+static bool loop_flag_is_zero(struct bpf_verifier_env *env)
+{
+ struct bpf_reg_state *regs = cur_regs(env);
+ struct bpf_reg_state *reg = ®s[BPF_REG_4];
+ bool reg_is_null = register_is_null(reg);
+
+ if (reg_is_null)
+ mark_chain_precision(env, BPF_REG_4);
+
+ return reg_is_null;
+}
+
+static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
+{
+ struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
+
+ if (!state->initialized) {
+ state->initialized = 1;
+ state->fit_for_inline = loop_flag_is_zero(env);
+ state->callback_subprogno = subprogno;
+ return;
+ }
+
+ if (!state->fit_for_inline)
+ return;
+
+ state->fit_for_inline = (loop_flag_is_zero(env) &&
+ state->callback_subprogno == subprogno);
+}
+
static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
const struct bpf_func_proto *fn = NULL;
enum bpf_return_type ret_type;
enum bpf_type_flag ret_flag;
memset(&meta, 0, sizeof(meta));
meta.pkt_access = fn->pkt_access;
- err = check_func_proto(fn, func_id, &meta);
+ err = check_func_proto(fn, func_id);
if (err) {
verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
err = check_bpf_snprintf_call(env, regs);
break;
case BPF_FUNC_loop:
+ update_loop_inline_state(env, meta.subprogno);
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
set_loop_callback_state);
break;
reg_type_str(env, regs[BPF_REG_1].type));
return -EACCES;
}
+ break;
+ case BPF_FUNC_set_retval:
+ if (prog_type == BPF_PROG_TYPE_LSM &&
+ env->prog->expected_attach_type == BPF_LSM_CGROUP) {
+ if (!env->prog->aux->attach_func_proto->type) {
+ /* Make sure programs that attach to void
+ * hooks don't try to modify return value.
+ */
+ verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
+ return -EINVAL;
+ }
+ }
+ break;
+ case BPF_FUNC_dynptr_data:
+ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
+ if (arg_type_is_dynptr(fn->arg_type[i])) {
+ if (meta.ref_obj_id) {
+ verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
+ return -EFAULT;
+ }
+ /* Find the id of the dynptr we're tracking the reference of */
+ meta.ref_obj_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]);
+ break;
+ }
+ }
+ if (i == MAX_BPF_FUNC_REG_ARGS) {
+ verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
+ return -EFAULT;
+ }
+ break;
}
if (err)
/* update return register (already marked as written above) */
ret_type = fn->ret_type;
- ret_flag = type_flag(fn->ret_type);
- if (ret_type == RET_INTEGER) {
+ ret_flag = type_flag(ret_type);
+
+ switch (base_type(ret_type)) {
+ case RET_INTEGER:
/* sets type to SCALAR_VALUE */
mark_reg_unknown(env, regs, BPF_REG_0);
- } else if (ret_type == RET_VOID) {
+ break;
+ case RET_VOID:
regs[BPF_REG_0].type = NOT_INIT;
- } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
+ break;
+ case RET_PTR_TO_MAP_VALUE:
/* There is no offset yet applied, variable or fixed */
mark_reg_known_zero(env, regs, BPF_REG_0);
/* remember map_ptr, so that check_map_access()
map_value_has_spin_lock(meta.map_ptr)) {
regs[BPF_REG_0].id = ++env->id_gen;
}
- } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
+ break;
+ case RET_PTR_TO_SOCKET:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
- } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
+ break;
+ case RET_PTR_TO_SOCK_COMMON:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
- } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
+ break;
+ case RET_PTR_TO_TCP_SOCK:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
- } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
+ break;
+ case RET_PTR_TO_ALLOC_MEM:
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
regs[BPF_REG_0].mem_size = meta.mem_size;
- } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
+ break;
+ case RET_PTR_TO_MEM_OR_BTF_ID:
+ {
const struct btf_type *t;
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].btf = meta.ret_btf;
regs[BPF_REG_0].btf_id = meta.ret_btf_id;
}
- } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
+ break;
+ }
+ case RET_PTR_TO_BTF_ID:
+ {
struct btf *ret_btf;
int ret_btf_id;
}
regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
- } else {
+ break;
+ }
+ default:
verbose(env, "unknown return type %u of func %s#%d\n",
base_type(ret_type), func_id_name(func_id), func_id);
return -EINVAL;
if (type_may_be_null(regs[BPF_REG_0].type))
regs[BPF_REG_0].id = ++env->id_gen;
- if (is_ptr_cast_function(func_id)) {
+ if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
+ verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
+ func_id_name(func_id), func_id);
+ return -EFAULT;
+ }
+
+ if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
} else if (is_acquire_function(func_id, meta.map_ptr)) {
regs[BPF_REG_0].id = id;
/* For release_reference() */
regs[BPF_REG_0].ref_obj_id = id;
- } else if (func_id == BPF_FUNC_dynptr_data) {
- int dynptr_id = 0, i;
-
- /* Find the id of the dynptr we're acquiring a reference to */
- for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
- if (arg_type_is_dynptr(fn->arg_type[i])) {
- if (dynptr_id) {
- verbose(env, "verifier internal error: multiple dynptr args in func\n");
- return -EFAULT;
- }
- dynptr_id = stack_slot_get_id(env, ®s[BPF_REG_1 + i]);
- }
- }
- /* For release_reference() */
- regs[BPF_REG_0].ref_obj_id = dynptr_id;
}
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
int err, insn_idx = *insn_idx_p;
const struct btf_param *args;
struct btf *desc_btf;
+ u32 *kfunc_flags;
bool acq;
/* skip for now, but return error when we find this in fixup_kfunc_call */
func_name = btf_name_by_offset(desc_btf, func->name_off);
func_proto = btf_type_by_id(desc_btf, func->type);
- if (!btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_CHECK, func_id)) {
+ kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
+ if (!kfunc_flags) {
verbose(env, "calling kernel function %s is not allowed\n",
func_name);
return -EACCES;
}
+ if (*kfunc_flags & KF_DESTRUCTIVE && !capable(CAP_SYS_BOOT)) {
+ verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capabilities\n");
+ return -EACCES;
+ }
- acq = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_ACQUIRE, func_id);
+ acq = *kfunc_flags & KF_ACQUIRE;
/* Check the arguments */
- err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs);
+ err = btf_check_kfunc_arg_match(env, desc_btf, func_id, regs, *kfunc_flags);
if (err < 0)
return err;
/* In case of release function, we get register number of refcounted
regs[BPF_REG_0].btf = desc_btf;
regs[BPF_REG_0].type = PTR_TO_BTF_ID;
regs[BPF_REG_0].btf_id = ptr_type_id;
- if (btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog),
- BTF_KFUNC_TYPE_RET_NULL, func_id)) {
+ if (*kfunc_flags & KF_RET_NULL) {
regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
regs[BPF_REG_0].id = ++env->id_gen;
return true;
}
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
-{
- return &env->insn_aux_data[env->insn_idx];
-}
-
enum {
REASON_BOUNDS = -1,
REASON_TYPE = -2,
if (opcode == BPF_END || opcode == BPF_NEG) {
if (opcode == BPF_NEG) {
- if (BPF_SRC(insn->code) != 0 ||
+ if (BPF_SRC(insn->code) != BPF_K ||
insn->src_reg != BPF_REG_0 ||
insn->off != 0 || insn->imm != 0) {
verbose(env, "BPF_NEG uses reserved fields\n");
const bool is_subprog = frame->subprogno;
/* LSM and struct_ops func-ptr's return type could be "void" */
- if (!is_subprog &&
- (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
- prog_type == BPF_PROG_TYPE_LSM) &&
- !prog->aux->attach_func_proto->type)
- return 0;
+ if (!is_subprog) {
+ switch (prog_type) {
+ case BPF_PROG_TYPE_LSM:
+ if (prog->expected_attach_type == BPF_LSM_CGROUP)
+ /* See below, can be 0 or 0-1 depending on hook. */
+ break;
+ fallthrough;
+ case BPF_PROG_TYPE_STRUCT_OPS:
+ if (!prog->aux->attach_func_proto->type)
+ return 0;
+ break;
+ default:
+ break;
+ }
+ }
/* eBPF calling convention is such that R0 is used
* to return the value from eBPF program.
case BPF_PROG_TYPE_SK_LOOKUP:
range = tnum_range(SK_DROP, SK_PASS);
break;
+
+ case BPF_PROG_TYPE_LSM:
+ if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
+ /* Regular BPF_PROG_TYPE_LSM programs can return
+ * any value.
+ */
+ return 0;
+ }
+ if (!env->prog->aux->attach_func_proto->type) {
+ /* Make sure programs that attach to void
+ * hooks don't try to modify return value.
+ */
+ range = tnum_range(1, 1);
+ }
+ break;
+
case BPF_PROG_TYPE_EXT:
/* freplace program can return anything as its return value
* depends on the to-be-replaced kernel func or bpf program.
if (!tnum_in(range, reg->var_off)) {
verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
+ if (prog->expected_attach_type == BPF_LSM_CGROUP &&
+ prog_type == BPF_PROG_TYPE_LSM &&
+ !prog->aux->attach_func_proto->type)
+ verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
return -EINVAL;
}
goto err_free;
ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
scalar_return =
- btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
+ btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
goto err_free;
return -EINVAL;
}
+ /* We must do check_reference_leak here before
+ * prepare_func_exit to handle the case when
+ * state->curframe > 0, it may be a callback
+ * function, for which reference_state must
+ * match caller reference state when it exits.
+ */
+ err = check_reference_leak(env);
+ if (err)
+ return err;
+
if (state->curframe) {
/* exit from nested function */
err = prepare_func_exit(env, &env->insn_idx);
continue;
}
- err = check_reference_leak(env);
- if (err)
- return err;
-
err = check_return_code(env);
if (err)
return err;
return err;
}
-static int check_map_prealloc(struct bpf_map *map)
-{
- return (map->map_type != BPF_MAP_TYPE_HASH &&
- map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
- map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
- !(map->map_flags & BPF_F_NO_PREALLOC);
-}
-
static bool is_tracing_prog_type(enum bpf_prog_type type)
{
switch (type) {
case BPF_PROG_TYPE_TRACEPOINT:
case BPF_PROG_TYPE_PERF_EVENT:
case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
return true;
default:
return false;
}
}
-static bool is_preallocated_map(struct bpf_map *map)
-{
- if (!check_map_prealloc(map))
- return false;
- if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
- return false;
- return true;
-}
-
static int check_map_prog_compatibility(struct bpf_verifier_env *env,
struct bpf_map *map,
struct bpf_prog *prog)
{
enum bpf_prog_type prog_type = resolve_prog_type(prog);
- /*
- * Validate that trace type programs use preallocated hash maps.
- *
- * For programs attached to PERF events this is mandatory as the
- * perf NMI can hit any arbitrary code sequence.
- *
- * All other trace types using preallocated hash maps are unsafe as
- * well because tracepoint or kprobes can be inside locked regions
- * of the memory allocator or at a place where a recursion into the
- * memory allocator would see inconsistent state.
- *
- * On RT enabled kernels run-time allocation of all trace type
- * programs is strictly prohibited due to lock type constraints. On
- * !RT kernels it is allowed for backwards compatibility reasons for
- * now, but warnings are emitted so developers are made aware of
- * the unsafety and can fix their programs before this is enforced.
- */
- if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
- if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
- verbose(env, "perf_event programs can only use preallocated hash map\n");
- return -EINVAL;
- }
- if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
- verbose(env, "trace type programs can only use preallocated hash map\n");
- return -EINVAL;
- }
- WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
- verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
- }
if (map_value_has_spin_lock(map)) {
if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
case BPF_MAP_TYPE_HASH_OF_MAPS:
- if (!is_preallocated_map(map)) {
- verbose(env,
- "Sleepable programs can only use preallocated maps\n");
- return -EINVAL;
- }
- break;
case BPF_MAP_TYPE_RINGBUF:
case BPF_MAP_TYPE_INODE_STORAGE:
case BPF_MAP_TYPE_SK_STORAGE:
/* Below members will be freed only at prog->aux */
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
+ func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
func[i]->aux->poke_tab = prog->aux->poke_tab;
func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
poke->aux = func[i]->aux;
}
- /* Use bpf_prog_F_tag to indicate functions in stack traces.
- * Long term would need debug info to populate names
- */
func[i]->aux->name[0] = 'F';
func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
func[i]->jit_requested = 1;
return 0;
}
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+ int position,
+ s32 stack_base,
+ u32 callback_subprogno,
+ u32 *cnt)
+{
+ s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+ s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+ s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+ int reg_loop_max = BPF_REG_6;
+ int reg_loop_cnt = BPF_REG_7;
+ int reg_loop_ctx = BPF_REG_8;
+
+ struct bpf_prog *new_prog;
+ u32 callback_start;
+ u32 call_insn_offset;
+ s32 callback_offset;
+
+ /* This represents an inlined version of bpf_iter.c:bpf_loop,
+ * be careful to modify this code in sync.
+ */
+ struct bpf_insn insn_buf[] = {
+ /* Return error and jump to the end of the patch if
+ * expected number of iterations is too big.
+ */
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
+ BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 16),
+ /* spill R6, R7, R8 to use these as loop vars */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
+ /* initialize loop vars */
+ BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
+ BPF_MOV32_IMM(reg_loop_cnt, 0),
+ BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
+ /* loop header,
+ * if reg_loop_cnt >= reg_loop_max skip the loop body
+ */
+ BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
+ /* callback call,
+ * correct callback offset would be set after patching
+ */
+ BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
+ BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
+ BPF_CALL_REL(0),
+ /* increment loop counter */
+ BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
+ /* jump to loop header if callback returned 0 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
+ /* return value of bpf_loop,
+ * set R0 to the number of iterations
+ */
+ BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
+ /* restore original values of R6, R7, R8 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
+ };
+
+ *cnt = ARRAY_SIZE(insn_buf);
+ new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+ if (!new_prog)
+ return new_prog;
+
+ /* callback start is known only after patching */
+ callback_start = env->subprog_info[callback_subprogno].start;
+ /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+ call_insn_offset = position + 12;
+ callback_offset = callback_start - call_insn_offset - 1;
+ new_prog->insnsi[call_insn_offset].imm = callback_offset;
+
+ return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL) &&
+ insn->src_reg == 0 &&
+ insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8. These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+static int optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *subprogs = env->subprog_info;
+ int i, cur_subprog = 0, cnt, delta = 0;
+ struct bpf_insn *insn = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ u16 stack_depth = subprogs[cur_subprog].stack_depth;
+ u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ u16 stack_depth_extra = 0;
+
+ for (i = 0; i < insn_cnt; i++, insn++) {
+ struct bpf_loop_inline_state *inline_state =
+ &env->insn_aux_data[i + delta].loop_inline_state;
+
+ if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+ struct bpf_prog *new_prog;
+
+ stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+ new_prog = inline_bpf_loop(env,
+ i + delta,
+ -(stack_depth + stack_depth_extra),
+ inline_state->callback_subprogno,
+ &cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ }
+
+ if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+ subprogs[cur_subprog].stack_depth += stack_depth_extra;
+ cur_subprog++;
+ stack_depth = subprogs[cur_subprog].stack_depth;
+ stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+ stack_depth_extra = 0;
+ }
+ }
+
+ env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+ return 0;
+}
+
static void free_states(struct bpf_verifier_env *env)
{
struct bpf_verifier_state_list *sl, *sln;
fallthrough;
case BPF_MODIFY_RETURN:
case BPF_LSM_MAC:
+ case BPF_LSM_CGROUP:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
if (!btf_type_is_func(t)) {
}
if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
- prog->type != BPF_PROG_TYPE_LSM) {
- verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
+ prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
+ verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
return -EINVAL;
}
ret = check_max_stack_depth(env);
/* instruction rewrites happen after this point */
+ if (ret == 0)
+ ret = optimize_bpf_loop(env);
+
if (is_priv) {
if (ret == 0)
opt_hard_wire_dead_code_branches(env);