mirror of
https://github.com/torvalds/linux.git
synced 2025-04-09 14:45:27 +00:00

After blamed commit rtm_to_fib_config() now calls lwtunnel_valid_encap_type{_attr}() without RTNL held, triggering an unlock balance in __rtnl_unlock, as reported by syzbot [1] IPv6 and rtm_to_nh_config() are not yet converted. Add a temporary @rtnl_is_held parameter to lwtunnel_valid_encap_type() and lwtunnel_valid_encap_type_attr(). While we are at it replace the two rcu_dereference() in lwtunnel_valid_encap_type() with more appropriate rcu_access_pointer(). [1] syz-executor245/5836 is trying to release lock (rtnl_mutex) at: [<ffffffff89d0e38c>] __rtnl_unlock+0x6c/0xf0 net/core/rtnetlink.c:142 but there are no more locks to release! other info that might help us debug this: no locks held by syz-executor245/5836. stack backtrace: CPU: 0 UID: 0 PID: 5836 Comm: syz-executor245 Not tainted 6.14.0-rc4-syzkaller-00873-g3424291dd242 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call Trace: <TASK> __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_unlock_imbalance_bug+0x25b/0x2d0 kernel/locking/lockdep.c:5289 __lock_release kernel/locking/lockdep.c:5518 [inline] lock_release+0x47e/0xa30 kernel/locking/lockdep.c:5872 __mutex_unlock_slowpath+0xec/0x800 kernel/locking/mutex.c:891 __rtnl_unlock+0x6c/0xf0 net/core/rtnetlink.c:142 lwtunnel_valid_encap_type+0x38a/0x5f0 net/core/lwtunnel.c:169 lwtunnel_valid_encap_type_attr+0x113/0x270 net/core/lwtunnel.c:209 rtm_to_fib_config+0x949/0x14e0 net/ipv4/fib_frontend.c:808 inet_rtm_newroute+0xf6/0x2a0 net/ipv4/fib_frontend.c:917 rtnetlink_rcv_msg+0x791/0xcf0 net/core/rtnetlink.c:6919 netlink_rcv_skb+0x206/0x480 net/netlink/af_netlink.c:2534 netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1339 netlink_sendmsg+0x8de/0xcb0 net/netlink/af_netlink.c:1883 sock_sendmsg_nosec net/socket.c:709 [inline] Fixes: 1dd2af7963e9 ("ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.") Reported-by: syzbot+3f18ef0f7df107a3f6a0@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/67c6f87a.050a0220.38b91b.0147.GAE@google.com/T/#u Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com> Link: https://patch.msgid.link/20250304125918.2763514-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
274 lines
6.8 KiB
C
274 lines
6.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef __NET_LWTUNNEL_H
|
|
#define __NET_LWTUNNEL_H 1
|
|
|
|
#include <linux/lwtunnel.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/types.h>
|
|
#include <net/route.h>
|
|
|
|
#define LWTUNNEL_HASH_BITS 7
|
|
#define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS)
|
|
|
|
/* lw tunnel state flags */
|
|
#define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0)
|
|
#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
|
|
#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2)
|
|
|
|
/* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return
|
|
* values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety.
|
|
*/
|
|
enum {
|
|
LWTUNNEL_XMIT_DONE,
|
|
LWTUNNEL_XMIT_CONTINUE = 0x100,
|
|
};
|
|
|
|
|
|
struct lwtunnel_state {
|
|
__u16 type;
|
|
__u16 flags;
|
|
__u16 headroom;
|
|
atomic_t refcnt;
|
|
int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb);
|
|
int (*orig_input)(struct sk_buff *);
|
|
struct rcu_head rcu;
|
|
__u8 data[];
|
|
};
|
|
|
|
struct lwtunnel_encap_ops {
|
|
int (*build_state)(struct net *net, struct nlattr *encap,
|
|
unsigned int family, const void *cfg,
|
|
struct lwtunnel_state **ts,
|
|
struct netlink_ext_ack *extack);
|
|
void (*destroy_state)(struct lwtunnel_state *lws);
|
|
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
|
|
int (*input)(struct sk_buff *skb);
|
|
int (*fill_encap)(struct sk_buff *skb,
|
|
struct lwtunnel_state *lwtstate);
|
|
int (*get_encap_size)(struct lwtunnel_state *lwtstate);
|
|
int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
|
|
int (*xmit)(struct sk_buff *skb);
|
|
|
|
struct module *owner;
|
|
};
|
|
|
|
#ifdef CONFIG_LWTUNNEL
|
|
|
|
DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
|
|
|
|
void lwtstate_free(struct lwtunnel_state *lws);
|
|
|
|
static inline struct lwtunnel_state *
|
|
lwtstate_get(struct lwtunnel_state *lws)
|
|
{
|
|
if (lws)
|
|
atomic_inc(&lws->refcnt);
|
|
|
|
return lws;
|
|
}
|
|
|
|
static inline void lwtstate_put(struct lwtunnel_state *lws)
|
|
{
|
|
if (!lws)
|
|
return;
|
|
|
|
if (atomic_dec_and_test(&lws->refcnt))
|
|
lwtstate_free(lws);
|
|
}
|
|
|
|
static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
|
|
{
|
|
if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
|
|
{
|
|
if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
|
|
{
|
|
if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
|
|
unsigned int mtu)
|
|
{
|
|
if ((lwtunnel_xmit_redirect(lwtstate) ||
|
|
lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu)
|
|
return lwtstate->headroom;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
|
|
unsigned int num);
|
|
int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
|
|
unsigned int num);
|
|
int lwtunnel_valid_encap_type(u16 encap_type,
|
|
struct netlink_ext_ack *extack,
|
|
bool rtnl_is_held);
|
|
int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
|
|
struct netlink_ext_ack *extack,
|
|
bool rtnl_is_held);
|
|
int lwtunnel_build_state(struct net *net, u16 encap_type,
|
|
struct nlattr *encap,
|
|
unsigned int family, const void *cfg,
|
|
struct lwtunnel_state **lws,
|
|
struct netlink_ext_ack *extack);
|
|
int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
|
|
int encap_attr, int encap_type_attr);
|
|
int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
|
|
struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
|
|
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
|
|
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
|
|
int lwtunnel_input(struct sk_buff *skb);
|
|
int lwtunnel_xmit(struct sk_buff *skb);
|
|
int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
|
|
bool ingress);
|
|
|
|
static inline void lwtunnel_set_redirect(struct dst_entry *dst)
|
|
{
|
|
if (lwtunnel_output_redirect(dst->lwtstate)) {
|
|
dst->lwtstate->orig_output = dst->output;
|
|
dst->output = lwtunnel_output;
|
|
}
|
|
if (lwtunnel_input_redirect(dst->lwtstate)) {
|
|
dst->lwtstate->orig_input = dst->input;
|
|
dst->input = lwtunnel_input;
|
|
}
|
|
}
|
|
#else
|
|
|
|
static inline void lwtstate_free(struct lwtunnel_state *lws)
|
|
{
|
|
}
|
|
|
|
static inline struct lwtunnel_state *
|
|
lwtstate_get(struct lwtunnel_state *lws)
|
|
{
|
|
return lws;
|
|
}
|
|
|
|
static inline void lwtstate_put(struct lwtunnel_state *lws)
|
|
{
|
|
}
|
|
|
|
static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
static inline void lwtunnel_set_redirect(struct dst_entry *dst)
|
|
{
|
|
}
|
|
|
|
static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
|
|
unsigned int mtu)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
|
|
unsigned int num)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
|
|
unsigned int num)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static inline int lwtunnel_valid_encap_type(u16 encap_type,
|
|
struct netlink_ext_ack *extack,
|
|
bool rtnl_is_held)
|
|
{
|
|
NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel");
|
|
return -EOPNOTSUPP;
|
|
}
|
|
static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
|
|
struct netlink_ext_ack *extack,
|
|
bool rtnl_is_held)
|
|
{
|
|
/* return 0 since we are not walking attr looking for
|
|
* RTA_ENCAP_TYPE attribute on nexthops.
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
static inline int lwtunnel_build_state(struct net *net, u16 encap_type,
|
|
struct nlattr *encap,
|
|
unsigned int family, const void *cfg,
|
|
struct lwtunnel_state **lws,
|
|
struct netlink_ext_ack *extack)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static inline int lwtunnel_fill_encap(struct sk_buff *skb,
|
|
struct lwtunnel_state *lwtstate,
|
|
int encap_attr, int encap_type_attr)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a,
|
|
struct lwtunnel_state *b)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static inline int lwtunnel_input(struct sk_buff *skb)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
static inline int lwtunnel_xmit(struct sk_buff *skb)
|
|
{
|
|
return -EOPNOTSUPP;
|
|
}
|
|
|
|
#endif /* CONFIG_LWTUNNEL */
|
|
|
|
#define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type))
|
|
|
|
#endif /* __NET_LWTUNNEL_H */
|