9883035ae7
The actual internal pipe implementation is already really about individual packets (called "pipe buffers"), and this simply exposes that as a special packetized mode. When we are in the packetized mode (marked by O_DIRECT as suggested by Alan Cox), a write() on a pipe will not merge the new data with previous writes, so each write will get a pipe buffer of its own. The pipe buffer is then marked with the PIPE_BUF_FLAG_PACKET flag, which in turn will tell the reader side to break the read at that boundary (and throw away any partial packet contents that do not fit in the read buffer). End result: as long as you do writes less than PIPE_BUF in size (so that the pipe doesn't have to split them up), you can now treat the pipe as a packet interface, where each read() system call will read one packet at a time. You can just use a sufficiently big read buffer (PIPE_BUF is sufficient, since bigger than that doesn't guarantee atomicity anyway), and the return value of the read() will naturally give you the size of the packet. NOTE! We do not support zero-sized packets, and zero-sized reads and writes to a pipe continue to be no-ops. Also note that big packets will currently be split at write time, but that the size at which that happens is not really specified (except that it's bigger than PIPE_BUF). Currently that limit is the system page size, but we might want to explicitly support bigger packets some day. The main user for this is going to be the autofs packet interface, allowing us to stop having to care so deeply about exact packet sizes (which have had bugs with 32/64-bit compatibility modes). But user space can create packetized pipes with "pipe2(fd, O_DIRECT)", which will fail with an EINVAL on kernels that do not support this interface. Tested-by: Michael Tokarev <mjt@tls.msk.ru> Cc: Alan Cox <alan@lxorguk.ukuu.org.uk> Cc: David Miller <davem@davemloft.net> Cc: Ian Kent <raven@themaw.net> Cc: Thomas Meyer <thomas@m3y3r.de> Cc: stable@kernel.org # needed for systemd/autofs interaction fix Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
165 lines
5.8 KiB
C
165 lines
5.8 KiB
C
#ifndef _LINUX_PIPE_FS_I_H
|
|
#define _LINUX_PIPE_FS_I_H
|
|
|
|
#define PIPE_DEF_BUFFERS 16
|
|
|
|
#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */
|
|
#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */
|
|
#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */
|
|
#define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */
|
|
|
|
/**
|
|
* struct pipe_buffer - a linux kernel pipe buffer
|
|
* @page: the page containing the data for the pipe buffer
|
|
* @offset: offset of data inside the @page
|
|
* @len: length of data inside the @page
|
|
* @ops: operations associated with this buffer. See @pipe_buf_operations.
|
|
* @flags: pipe buffer flags. See above.
|
|
* @private: private data owned by the ops.
|
|
**/
|
|
struct pipe_buffer {
|
|
struct page *page;
|
|
unsigned int offset, len;
|
|
const struct pipe_buf_operations *ops;
|
|
unsigned int flags;
|
|
unsigned long private;
|
|
};
|
|
|
|
/**
|
|
* struct pipe_inode_info - a linux kernel pipe
|
|
* @wait: reader/writer wait point in case of empty/full pipe
|
|
* @nrbufs: the number of non-empty pipe buffers in this pipe
|
|
* @buffers: total number of buffers (should be a power of 2)
|
|
* @curbuf: the current pipe buffer entry
|
|
* @tmp_page: cached released page
|
|
* @readers: number of current readers of this pipe
|
|
* @writers: number of current writers of this pipe
|
|
* @waiting_writers: number of writers blocked waiting for room
|
|
* @r_counter: reader counter
|
|
* @w_counter: writer counter
|
|
* @fasync_readers: reader side fasync
|
|
* @fasync_writers: writer side fasync
|
|
* @inode: inode this pipe is attached to
|
|
* @bufs: the circular array of pipe buffers
|
|
**/
|
|
struct pipe_inode_info {
|
|
wait_queue_head_t wait;
|
|
unsigned int nrbufs, curbuf, buffers;
|
|
unsigned int readers;
|
|
unsigned int writers;
|
|
unsigned int waiting_writers;
|
|
unsigned int r_counter;
|
|
unsigned int w_counter;
|
|
struct page *tmp_page;
|
|
struct fasync_struct *fasync_readers;
|
|
struct fasync_struct *fasync_writers;
|
|
struct inode *inode;
|
|
struct pipe_buffer *bufs;
|
|
};
|
|
|
|
/*
|
|
* Note on the nesting of these functions:
|
|
*
|
|
* ->confirm()
|
|
* ->steal()
|
|
* ...
|
|
* ->map()
|
|
* ...
|
|
* ->unmap()
|
|
*
|
|
* That is, ->map() must be called on a confirmed buffer,
|
|
* same goes for ->steal(). See below for the meaning of each
|
|
* operation. Also see kerneldoc in fs/pipe.c for the pipe
|
|
* and generic variants of these hooks.
|
|
*/
|
|
struct pipe_buf_operations {
|
|
/*
|
|
* This is set to 1, if the generic pipe read/write may coalesce
|
|
* data into an existing buffer. If this is set to 0, a new pipe
|
|
* page segment is always used for new data.
|
|
*/
|
|
int can_merge;
|
|
|
|
/*
|
|
* ->map() returns a virtual address mapping of the pipe buffer.
|
|
* The last integer flag reflects whether this should be an atomic
|
|
* mapping or not. The atomic map is faster, however you can't take
|
|
* page faults before calling ->unmap() again. So if you need to eg
|
|
* access user data through copy_to/from_user(), then you must get
|
|
* a non-atomic map. ->map() uses the KM_USER0 atomic slot for
|
|
* atomic maps, so you can't map more than one pipe_buffer at once
|
|
* and you have to be careful if mapping another page as source
|
|
* or destination for a copy (IOW, it has to use something else
|
|
* than KM_USER0).
|
|
*/
|
|
void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int);
|
|
|
|
/*
|
|
* Undoes ->map(), finishes the virtual mapping of the pipe buffer.
|
|
*/
|
|
void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *);
|
|
|
|
/*
|
|
* ->confirm() verifies that the data in the pipe buffer is there
|
|
* and that the contents are good. If the pages in the pipe belong
|
|
* to a file system, we may need to wait for IO completion in this
|
|
* hook. Returns 0 for good, or a negative error value in case of
|
|
* error.
|
|
*/
|
|
int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *);
|
|
|
|
/*
|
|
* When the contents of this pipe buffer has been completely
|
|
* consumed by a reader, ->release() is called.
|
|
*/
|
|
void (*release)(struct pipe_inode_info *, struct pipe_buffer *);
|
|
|
|
/*
|
|
* Attempt to take ownership of the pipe buffer and its contents.
|
|
* ->steal() returns 0 for success, in which case the contents
|
|
* of the pipe (the buf->page) is locked and now completely owned
|
|
* by the caller. The page may then be transferred to a different
|
|
* mapping, the most often used case is insertion into different
|
|
* file address space cache.
|
|
*/
|
|
int (*steal)(struct pipe_inode_info *, struct pipe_buffer *);
|
|
|
|
/*
|
|
* Get a reference to the pipe buffer.
|
|
*/
|
|
void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
|
|
};
|
|
|
|
/* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
|
|
memory allocation, whereas PIPE_BUF makes atomicity guarantees. */
|
|
#define PIPE_SIZE PAGE_SIZE
|
|
|
|
/* Pipe lock and unlock operations */
|
|
void pipe_lock(struct pipe_inode_info *);
|
|
void pipe_unlock(struct pipe_inode_info *);
|
|
void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
|
|
|
|
extern unsigned int pipe_max_size, pipe_min_size;
|
|
int pipe_proc_fn(struct ctl_table *, int, void __user *, size_t *, loff_t *);
|
|
|
|
|
|
/* Drop the inode semaphore and wait for a pipe event, atomically */
|
|
void pipe_wait(struct pipe_inode_info *pipe);
|
|
|
|
struct pipe_inode_info * alloc_pipe_info(struct inode * inode);
|
|
void free_pipe_info(struct inode * inode);
|
|
void __free_pipe_info(struct pipe_inode_info *);
|
|
|
|
/* Generic pipe buffer ops functions */
|
|
void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int);
|
|
void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *);
|
|
void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
|
|
int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
|
|
int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
|
|
void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
|
|
|
|
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
|
|
long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
|
|
struct pipe_inode_info *get_pipe_info(struct file *file);
|
|
|
|
#endif
|