vfs-6.15-rc1.pipe

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ90qUAAKCRCRxhvAZXjc
 orffAQDL5w+qzwD1QfJX/bj7skoiNYYyml3kWZx9t43t76OZ2QD8C03ORvKEe9ik
 7uwcFpcEHwoTzzZir5p4UbFz7y/ZrAI=
 =7Tcu
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.15-rc1.pipe' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs pipe updates from Christian Brauner:

 - Introduce struct file_operations pipeanon_fops

 - Don't update {a,c,m}time for anonymous pipes to avoid the performance
   costs associated with it

 - Change pipe_write() to never add a zero-sized buffer

 - Limit the slots in pipe_resize_ring()

 - Use pipe_buf() to retrieve the pipe buffer everywhere

 - Drop an always true check in anon_pipe_write()

 - Cache 2 pages instead of 1

 - Avoid spurious calls to prepare_to_wait_event() in ___wait_event()

* tag 'vfs-6.15-rc1.pipe' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  fs/splice: Use pipe_buf() helper to retrieve pipe buffer
  fs/pipe: Use pipe_buf() helper to retrieve pipe buffer
  kernel/watch_queue: Use pipe_buf() to retrieve the pipe buffer
  fs/pipe: Limit the slots in pipe_resize_ring()
  wait: avoid spurious calls to prepare_to_wait_event() in ___wait_event()
  pipe: cache 2 pages instead of 1
  pipe: drop an always true check in anon_pipe_write()
  pipe: change pipe_write() to never add a zero-sized buffer
  pipe: don't update {a,c,m}time for anonymous pipes
  pipe: introduce struct file_operations pipeanon_fops
This commit is contained in:
Linus Torvalds 2025-03-24 09:52:37 -07:00
commit 71ee2fde57
5 changed files with 128 additions and 113 deletions

189
fs/pipe.c
View File

@ -112,20 +112,40 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
pipe_lock(pipe2);
}
static struct page *anon_pipe_get_page(struct pipe_inode_info *pipe)
{
for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
if (pipe->tmp_page[i]) {
struct page *page = pipe->tmp_page[i];
pipe->tmp_page[i] = NULL;
return page;
}
}
return alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
}
static void anon_pipe_put_page(struct pipe_inode_info *pipe,
struct page *page)
{
if (page_count(page) == 1) {
for (int i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
if (!pipe->tmp_page[i]) {
pipe->tmp_page[i] = page;
return;
}
}
}
put_page(page);
}
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
struct page *page = buf->page;
/*
* If nobody else uses this page, and we don't already have a
* temporary page, let's keep track of it as a one-deep
* allocation cache. (Otherwise just release our reference to it)
*/
if (page_count(page) == 1 && !pipe->tmp_page)
pipe->tmp_page = page;
else
put_page(page);
anon_pipe_put_page(pipe, page);
}
static bool anon_pipe_buf_try_steal(struct pipe_inode_info *pipe,
@ -247,7 +267,7 @@ static inline unsigned int pipe_update_tail(struct pipe_inode_info *pipe,
}
static ssize_t
pipe_read(struct kiocb *iocb, struct iov_iter *to)
anon_pipe_read(struct kiocb *iocb, struct iov_iter *to)
{
size_t total_len = iov_iter_count(to);
struct file *filp = iocb->ki_filp;
@ -274,7 +294,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
/* Read ->head with a barrier vs post_one_notification() */
unsigned int head = smp_load_acquire(&pipe->head);
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
#ifdef CONFIG_WATCH_QUEUE
if (pipe->note_loss) {
@ -301,7 +320,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
#endif
if (!pipe_empty(head, tail)) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
struct pipe_buffer *buf = pipe_buf(pipe, tail);
size_t chars = buf->len;
size_t written;
int error;
@ -359,29 +378,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
break;
}
mutex_unlock(&pipe->mutex);
/*
* We only get here if we didn't actually read anything.
*
* However, we could have seen (and removed) a zero-sized
* pipe buffer, and might have made space in the buffers
* that way.
*
* You can't make zero-sized pipe buffers by doing an empty
* write (not even in packet mode), but they can happen if
* the writer gets an EFAULT when trying to fill a buffer
* that already got allocated and inserted in the buffer
* array.
*
* So we still need to wake up any pending writers in the
* _very_ unlikely case that the pipe was full, but we got
* no data.
*/
if (unlikely(wake_writer))
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
/*
* But because we didn't read anything, at this point we can
* just return directly with -ERESTARTSYS if we're interrupted,
* since we've done any required wakeups and there's no need
@ -390,7 +389,6 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
return -ERESTARTSYS;
wake_writer = false;
wake_next_reader = true;
mutex_lock(&pipe->mutex);
}
@ -403,8 +401,15 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
if (wake_next_reader)
wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
return ret;
}
static ssize_t
fifo_pipe_read(struct kiocb *iocb, struct iov_iter *to)
{
int ret = anon_pipe_read(iocb, to);
if (ret > 0)
file_accessed(filp);
file_accessed(iocb->ki_filp);
return ret;
}
@ -424,7 +429,7 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
}
static ssize_t
pipe_write(struct kiocb *iocb, struct iov_iter *from)
anon_pipe_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data;
@ -471,8 +476,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
was_empty = pipe_empty(head, pipe->tail);
chars = total_len & (PAGE_SIZE-1);
if (chars && !was_empty) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
struct pipe_buffer *buf = pipe_buf(pipe, head - 1);
int offset = buf->offset + buf->len;
if ((buf->flags & PIPE_BUF_FLAG_CAN_MERGE) &&
@ -503,54 +507,44 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
head = pipe->head;
if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf;
struct page *page = pipe->tmp_page;
struct page *page;
int copied;
if (!page) {
page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT);
if (unlikely(!page)) {
ret = ret ? : -ENOMEM;
break;
}
pipe->tmp_page = page;
page = anon_pipe_get_page(pipe);
if (unlikely(!page)) {
if (!ret)
ret = -ENOMEM;
break;
}
/* Allocate a slot in the ring in advance and attach an
* empty buffer. If we fault or otherwise fail to use
* it, either the reader will consume it or it'll still
* be there for the next write.
*/
pipe->head = head + 1;
/* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
buf->page = page;
buf->ops = &anon_pipe_buf_ops;
buf->offset = 0;
buf->len = 0;
if (is_packetized(filp))
buf->flags = PIPE_BUF_FLAG_PACKET;
else
buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
pipe->tmp_page = NULL;
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
anon_pipe_put_page(pipe, page);
if (!ret)
ret = -EFAULT;
break;
}
ret += copied;
pipe->head = head + 1;
/* Insert it into the buffer array */
buf = pipe_buf(pipe, head);
buf->page = page;
buf->ops = &anon_pipe_buf_ops;
buf->offset = 0;
if (is_packetized(filp))
buf->flags = PIPE_BUF_FLAG_PACKET;
else
buf->flags = PIPE_BUF_FLAG_CAN_MERGE;
buf->len = copied;
ret += copied;
if (!iov_iter_count(from))
break;
}
if (!pipe_full(head, pipe->tail, pipe->max_usage))
continue;
}
/* Wait for buffer space to become available. */
if ((filp->f_flags & O_NONBLOCK) ||
@ -602,11 +596,21 @@ out:
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
if (wake_next_writer)
wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM);
if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
int err = file_update_time(filp);
if (err)
ret = err;
sb_end_write(file_inode(filp)->i_sb);
return ret;
}
static ssize_t
fifo_pipe_write(struct kiocb *iocb, struct iov_iter *from)
{
int ret = anon_pipe_write(iocb, from);
if (ret > 0) {
struct file *filp = iocb->ki_filp;
if (sb_start_write_trylock(file_inode(filp)->i_sb)) {
int err = file_update_time(filp);
if (err)
ret = err;
sb_end_write(file_inode(filp)->i_sb);
}
}
return ret;
}
@ -853,8 +857,10 @@ void free_pipe_info(struct pipe_inode_info *pipe)
if (pipe->watch_queue)
put_watch_queue(pipe->watch_queue);
#endif
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
for (i = 0; i < ARRAY_SIZE(pipe->tmp_page); i++) {
if (pipe->tmp_page[i])
__free_page(pipe->tmp_page[i]);
}
kfree(pipe->bufs);
kfree(pipe);
}
@ -874,6 +880,8 @@ static const struct dentry_operations pipefs_dentry_operations = {
.d_dname = pipefs_dname,
};
static const struct file_operations pipeanon_fops;
static struct inode * get_pipe_inode(void)
{
struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);
@ -891,7 +899,7 @@ static struct inode * get_pipe_inode(void)
inode->i_pipe = pipe;
pipe->files = 2;
pipe->readers = pipe->writers = 1;
inode->i_fop = &pipefifo_fops;
inode->i_fop = &pipeanon_fops;
/*
* Mark the inode dirty from the very beginning,
@ -934,7 +942,7 @@ int create_pipe_files(struct file **res, int flags)
f = alloc_file_pseudo(inode, pipe_mnt, "",
O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)),
&pipefifo_fops);
&pipeanon_fops);
if (IS_ERR(f)) {
free_pipe_info(inode->i_pipe);
iput(inode);
@ -945,7 +953,7 @@ int create_pipe_files(struct file **res, int flags)
f->f_pipe = 0;
res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK),
&pipefifo_fops);
&pipeanon_fops);
if (IS_ERR(res[0])) {
put_pipe_info(inode, inode->i_pipe);
fput(f);
@ -1109,8 +1117,8 @@ static void wake_up_partner(struct pipe_inode_info *pipe)
static int fifo_open(struct inode *inode, struct file *filp)
{
bool is_pipe = inode->i_fop == &pipeanon_fops;
struct pipe_inode_info *pipe;
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
int ret;
filp->f_pipe = 0;
@ -1234,8 +1242,19 @@ err:
const struct file_operations pipefifo_fops = {
.open = fifo_open,
.read_iter = pipe_read,
.write_iter = pipe_write,
.read_iter = fifo_pipe_read,
.write_iter = fifo_pipe_write,
.poll = pipe_poll,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
.splice_write = iter_file_splice_write,
};
static const struct file_operations pipeanon_fops = {
.open = fifo_open,
.read_iter = anon_pipe_read,
.write_iter = anon_pipe_write,
.poll = pipe_poll,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
@ -1271,6 +1290,10 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
struct pipe_buffer *bufs;
unsigned int head, tail, mask, n;
/* nr_slots larger than limits of pipe->{head,tail} */
if (unlikely(nr_slots > (pipe_index_t)-1u))
return -EINVAL;
bufs = kcalloc(nr_slots, sizeof(*bufs),
GFP_KERNEL_ACCOUNT | __GFP_NOWARN);
if (unlikely(!bufs))
@ -1390,7 +1413,9 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
{
struct pipe_inode_info *pipe = file->private_data;
if (file->f_op != &pipefifo_fops || !pipe)
if (!pipe)
return NULL;
if (file->f_op != &pipefifo_fops && file->f_op != &pipeanon_fops)
return NULL;
if (for_splice && pipe_has_watch_queue(pipe))
return NULL;

View File

@ -200,7 +200,6 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
unsigned int spd_pages = spd->nr_pages;
unsigned int tail = pipe->tail;
unsigned int head = pipe->head;
unsigned int mask = pipe->ring_size - 1;
ssize_t ret = 0;
int page_nr = 0;
@ -214,7 +213,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
}
while (!pipe_full(head, tail, pipe->max_usage)) {
struct pipe_buffer *buf = &pipe->bufs[head & mask];
struct pipe_buffer *buf = pipe_buf(pipe, head);
buf->page = spd->pages[page_nr];
buf->offset = spd->partial[page_nr].offset;
@ -247,7 +246,6 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
unsigned int head = pipe->head;
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
int ret;
if (unlikely(!pipe->readers)) {
@ -256,7 +254,7 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
} else if (pipe_full(head, tail, pipe->max_usage)) {
ret = -EAGAIN;
} else {
pipe->bufs[head & mask] = *buf;
*pipe_buf(pipe, head) = *buf;
pipe->head = head + 1;
return buf->len;
}
@ -447,11 +445,10 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
{
unsigned int head = pipe->head;
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
int ret;
while (!pipe_empty(head, tail)) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
struct pipe_buffer *buf = pipe_buf(pipe, tail);
sd->len = buf->len;
if (sd->len > sd->total_len)
@ -495,8 +492,7 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des
static inline bool eat_empty_buffer(struct pipe_inode_info *pipe)
{
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
struct pipe_buffer *buf = pipe_buf(pipe, tail);
if (unlikely(!buf->len)) {
pipe_buf_release(pipe, buf);
@ -690,7 +686,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
while (sd.total_len) {
struct kiocb kiocb;
struct iov_iter from;
unsigned int head, tail, mask;
unsigned int head, tail;
size_t left;
int n;
@ -711,12 +707,11 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
head = pipe->head;
tail = pipe->tail;
mask = pipe->ring_size - 1;
/* build the vector */
left = sd.total_len;
for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
struct pipe_buffer *buf = pipe_buf(pipe, tail);
size_t this_len = buf->len;
/* zero-length bvecs are not supported, skip them */
@ -752,7 +747,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
/* dismiss the fully eaten buffers, adjust the partial one */
tail = pipe->tail;
while (ret) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
struct pipe_buffer *buf = pipe_buf(pipe, tail);
if (ret >= buf->len) {
ret -= buf->len;
buf->len = 0;
@ -809,7 +804,7 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
pipe_lock(pipe);
while (len > 0) {
unsigned int head, tail, mask, bc = 0;
unsigned int head, tail, bc = 0;
size_t remain = len;
/*
@ -846,10 +841,9 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
head = pipe->head;
tail = pipe->tail;
mask = pipe->ring_size - 1;
while (!pipe_empty(head, tail)) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
struct pipe_buffer *buf = pipe_buf(pipe, tail);
size_t seg;
if (!buf->len) {
@ -894,7 +888,7 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
len -= ret;
tail = pipe->tail;
while (ret > 0) {
struct pipe_buffer *buf = &pipe->bufs[tail & mask];
struct pipe_buffer *buf = pipe_buf(pipe, tail);
size_t seg = min_t(size_t, ret, buf->len);
buf->offset += seg;
@ -1725,7 +1719,6 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
struct pipe_buffer *ibuf, *obuf;
unsigned int i_head, o_head;
unsigned int i_tail, o_tail;
unsigned int i_mask, o_mask;
int ret = 0;
bool input_wakeup = false;
@ -1747,9 +1740,7 @@ retry:
pipe_double_lock(ipipe, opipe);
i_tail = ipipe->tail;
i_mask = ipipe->ring_size - 1;
o_head = opipe->head;
o_mask = opipe->ring_size - 1;
do {
size_t o_len;
@ -1792,8 +1783,8 @@ retry:
goto retry;
}
ibuf = &ipipe->bufs[i_tail & i_mask];
obuf = &opipe->bufs[o_head & o_mask];
ibuf = pipe_buf(ipipe, i_tail);
obuf = pipe_buf(opipe, o_head);
if (len >= ibuf->len) {
/*
@ -1862,7 +1853,6 @@ static ssize_t link_pipe(struct pipe_inode_info *ipipe,
struct pipe_buffer *ibuf, *obuf;
unsigned int i_head, o_head;
unsigned int i_tail, o_tail;
unsigned int i_mask, o_mask;
ssize_t ret = 0;
/*
@ -1873,9 +1863,7 @@ static ssize_t link_pipe(struct pipe_inode_info *ipipe,
pipe_double_lock(ipipe, opipe);
i_tail = ipipe->tail;
i_mask = ipipe->ring_size - 1;
o_head = opipe->head;
o_mask = opipe->ring_size - 1;
do {
if (!opipe->readers) {
@ -1896,8 +1884,8 @@ static ssize_t link_pipe(struct pipe_inode_info *ipipe,
pipe_full(o_head, o_tail, opipe->max_usage))
break;
ibuf = &ipipe->bufs[i_tail & i_mask];
obuf = &opipe->bufs[o_head & o_mask];
ibuf = pipe_buf(ipipe, i_tail);
obuf = pipe_buf(opipe, o_head);
/*
* Get a reference to this pipe buffer,

View File

@ -108,7 +108,7 @@ struct pipe_inode_info {
#ifdef CONFIG_WATCH_QUEUE
bool note_loss;
#endif
struct page *tmp_page;
struct page *tmp_page[2];
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
struct pipe_buffer *bufs;

View File

@ -316,6 +316,9 @@ extern void init_wait_entry(struct wait_queue_entry *wq_entry, int flags);
} \
\
cmd; \
\
if (condition) \
break; \
} \
finish_wait(&wq_head, &__wq_entry); \
__out: __ret; \

View File

@ -101,12 +101,11 @@ static bool post_one_notification(struct watch_queue *wqueue,
struct pipe_inode_info *pipe = wqueue->pipe;
struct pipe_buffer *buf;
struct page *page;
unsigned int head, tail, mask, note, offset, len;
unsigned int head, tail, note, offset, len;
bool done = false;
spin_lock_irq(&pipe->rd_wait.lock);
mask = pipe->ring_size - 1;
head = pipe->head;
tail = pipe->tail;
if (pipe_full(head, tail, pipe->ring_size))
@ -124,7 +123,7 @@ static bool post_one_notification(struct watch_queue *wqueue,
memcpy(p + offset, n, len);
kunmap_atomic(p);
buf = &pipe->bufs[head & mask];
buf = pipe_buf(pipe, head);
buf->page = page;
buf->private = (unsigned long)wqueue;
buf->ops = &watch_queue_pipe_buf_ops;
@ -147,7 +146,7 @@ out:
return done;
lost:
buf = &pipe->bufs[(head - 1) & mask];
buf = pipe_buf(pipe, head - 1);
buf->flags |= PIPE_BUF_FLAG_LOSS;
goto out;
}