1
0
mirror of https://github.com/torvalds/linux.git synced 2025-04-09 14:45:27 +00:00
Eric Dumazet f130a0cc1b inet: fix lwtunnel_valid_encap_type() lock imbalance
After blamed commit rtm_to_fib_config() now calls
lwtunnel_valid_encap_type{_attr}() without RTNL held,
triggering an unlock balance in __rtnl_unlock,
as reported by syzbot [1]

IPv6 and rtm_to_nh_config() are not yet converted.

Add a temporary @rtnl_is_held parameter to lwtunnel_valid_encap_type()
and lwtunnel_valid_encap_type_attr().

While we are at it replace the two rcu_dereference()
in lwtunnel_valid_encap_type() with more appropriate
rcu_access_pointer().

[1]
syz-executor245/5836 is trying to release lock (rtnl_mutex) at:
 [<ffffffff89d0e38c>] __rtnl_unlock+0x6c/0xf0 net/core/rtnetlink.c:142
but there are no more locks to release!

other info that might help us debug this:
no locks held by syz-executor245/5836.

stack backtrace:
CPU: 0 UID: 0 PID: 5836 Comm: syz-executor245 Not tainted 6.14.0-rc4-syzkaller-00873-g3424291dd242 
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025
Call Trace:
 <TASK>
  __dump_stack lib/dump_stack.c:94 [inline]
  dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120
  print_unlock_imbalance_bug+0x25b/0x2d0 kernel/locking/lockdep.c:5289
  __lock_release kernel/locking/lockdep.c:5518 [inline]
  lock_release+0x47e/0xa30 kernel/locking/lockdep.c:5872
  __mutex_unlock_slowpath+0xec/0x800 kernel/locking/mutex.c:891
  __rtnl_unlock+0x6c/0xf0 net/core/rtnetlink.c:142
  lwtunnel_valid_encap_type+0x38a/0x5f0 net/core/lwtunnel.c:169
  lwtunnel_valid_encap_type_attr+0x113/0x270 net/core/lwtunnel.c:209
  rtm_to_fib_config+0x949/0x14e0 net/ipv4/fib_frontend.c:808
  inet_rtm_newroute+0xf6/0x2a0 net/ipv4/fib_frontend.c:917
  rtnetlink_rcv_msg+0x791/0xcf0 net/core/rtnetlink.c:6919
  netlink_rcv_skb+0x206/0x480 net/netlink/af_netlink.c:2534
  netlink_unicast_kernel net/netlink/af_netlink.c:1313 [inline]
  netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1339
  netlink_sendmsg+0x8de/0xcb0 net/netlink/af_netlink.c:1883
  sock_sendmsg_nosec net/socket.c:709 [inline]

Fixes: 1dd2af7963e9 ("ipv4: fib: Convert RTM_NEWROUTE and RTM_DELROUTE to per-netns RTNL.")
Reported-by: syzbot+3f18ef0f7df107a3f6a0@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/netdev/67c6f87a.050a0220.38b91b.0147.GAE@google.com/T/#u
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20250304125918.2763514-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-03-05 19:16:56 -08:00

274 lines
6.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __NET_LWTUNNEL_H
#define __NET_LWTUNNEL_H 1
#include <linux/lwtunnel.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/types.h>
#include <net/route.h>
#define LWTUNNEL_HASH_BITS 7
#define LWTUNNEL_HASH_SIZE (1 << LWTUNNEL_HASH_BITS)
/* lw tunnel state flags */
#define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0)
#define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1)
#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2)
/* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return
* values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety.
*/
enum {
LWTUNNEL_XMIT_DONE,
LWTUNNEL_XMIT_CONTINUE = 0x100,
};
struct lwtunnel_state {
__u16 type;
__u16 flags;
__u16 headroom;
atomic_t refcnt;
int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb);
int (*orig_input)(struct sk_buff *);
struct rcu_head rcu;
__u8 data[];
};
struct lwtunnel_encap_ops {
int (*build_state)(struct net *net, struct nlattr *encap,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
struct netlink_ext_ack *extack);
void (*destroy_state)(struct lwtunnel_state *lws);
int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
int (*input)(struct sk_buff *skb);
int (*fill_encap)(struct sk_buff *skb,
struct lwtunnel_state *lwtstate);
int (*get_encap_size)(struct lwtunnel_state *lwtstate);
int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b);
int (*xmit)(struct sk_buff *skb);
struct module *owner;
};
#ifdef CONFIG_LWTUNNEL
DECLARE_STATIC_KEY_FALSE(nf_hooks_lwtunnel_enabled);
void lwtstate_free(struct lwtunnel_state *lws);
static inline struct lwtunnel_state *
lwtstate_get(struct lwtunnel_state *lws)
{
if (lws)
atomic_inc(&lws->refcnt);
return lws;
}
static inline void lwtstate_put(struct lwtunnel_state *lws)
{
if (!lws)
return;
if (atomic_dec_and_test(&lws->refcnt))
lwtstate_free(lws);
}
static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
{
if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_OUTPUT_REDIRECT))
return true;
return false;
}
static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
{
if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_INPUT_REDIRECT))
return true;
return false;
}
static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
{
if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT))
return true;
return false;
}
static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
unsigned int mtu)
{
if ((lwtunnel_xmit_redirect(lwtstate) ||
lwtunnel_output_redirect(lwtstate)) && lwtstate->headroom < mtu)
return lwtstate->headroom;
return 0;
}
int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
unsigned int num);
int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
unsigned int num);
int lwtunnel_valid_encap_type(u16 encap_type,
struct netlink_ext_ack *extack,
bool rtnl_is_held);
int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
struct netlink_ext_ack *extack,
bool rtnl_is_held);
int lwtunnel_build_state(struct net *net, u16 encap_type,
struct nlattr *encap,
unsigned int family, const void *cfg,
struct lwtunnel_state **lws,
struct netlink_ext_ack *extack);
int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate,
int encap_attr, int encap_type_attr);
int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate);
struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len);
int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b);
int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int lwtunnel_input(struct sk_buff *skb);
int lwtunnel_xmit(struct sk_buff *skb);
int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
bool ingress);
static inline void lwtunnel_set_redirect(struct dst_entry *dst)
{
if (lwtunnel_output_redirect(dst->lwtstate)) {
dst->lwtstate->orig_output = dst->output;
dst->output = lwtunnel_output;
}
if (lwtunnel_input_redirect(dst->lwtstate)) {
dst->lwtstate->orig_input = dst->input;
dst->input = lwtunnel_input;
}
}
#else
static inline void lwtstate_free(struct lwtunnel_state *lws)
{
}
static inline struct lwtunnel_state *
lwtstate_get(struct lwtunnel_state *lws)
{
return lws;
}
static inline void lwtstate_put(struct lwtunnel_state *lws)
{
}
static inline bool lwtunnel_output_redirect(struct lwtunnel_state *lwtstate)
{
return false;
}
static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate)
{
return false;
}
static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate)
{
return false;
}
static inline void lwtunnel_set_redirect(struct dst_entry *dst)
{
}
static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate,
unsigned int mtu)
{
return 0;
}
static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op,
unsigned int num)
{
return -EOPNOTSUPP;
}
static inline int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op,
unsigned int num)
{
return -EOPNOTSUPP;
}
static inline int lwtunnel_valid_encap_type(u16 encap_type,
struct netlink_ext_ack *extack,
bool rtnl_is_held)
{
NL_SET_ERR_MSG(extack, "CONFIG_LWTUNNEL is not enabled in this kernel");
return -EOPNOTSUPP;
}
static inline int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int len,
struct netlink_ext_ack *extack,
bool rtnl_is_held)
{
/* return 0 since we are not walking attr looking for
* RTA_ENCAP_TYPE attribute on nexthops.
*/
return 0;
}
static inline int lwtunnel_build_state(struct net *net, u16 encap_type,
struct nlattr *encap,
unsigned int family, const void *cfg,
struct lwtunnel_state **lws,
struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
static inline int lwtunnel_fill_encap(struct sk_buff *skb,
struct lwtunnel_state *lwtstate,
int encap_attr, int encap_type_attr)
{
return 0;
}
static inline int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
{
return 0;
}
static inline struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len)
{
return NULL;
}
static inline int lwtunnel_cmp_encap(struct lwtunnel_state *a,
struct lwtunnel_state *b)
{
return 0;
}
static inline int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return -EOPNOTSUPP;
}
static inline int lwtunnel_input(struct sk_buff *skb)
{
return -EOPNOTSUPP;
}
static inline int lwtunnel_xmit(struct sk_buff *skb)
{
return -EOPNOTSUPP;
}
#endif /* CONFIG_LWTUNNEL */
#define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type))
#endif /* __NET_LWTUNNEL_H */