值得注意的是,本文中的poll并非system call,而是内核中file_operations的poll函数。
强烈建议先阅读select usage and implementation in kernel。
注:代码中的细节可参见Advanced Char Driver Operations。
For every file descriptor, it calls that fd’s
poll()
method, which will add the caller to that fd’s wait queue, and return which events (readable, writeable, exception) currently apply to that fd.
1. How to add poll function to the kernel module code?
Include needed headers:
12
#include <linux/poll.h>
Declare waitqueue variable:
1
static (fortune_wait);
Add
fortune_poll()
function and add it (as.poll
callback) to your file operations structure:123456789101112
static unsigned int fortune_poll(struct file *file, poll_table *wait){ poll_wait(file, &fortune_wait, wait); if (new-data-is-ready) return POLLIN | POLLRDNORM; return 0;}static const struct file_operations proc_test_fops = { .... .poll = fortune_poll,};
Note that you should return
POLLIN
|POLLRDNORM
if you have some new data to read, and 0 in case there is no new data to read.Notify your waitqueue once you have new data:
1
wake_up_interruptible(&fortune_wait);
2. scull
驱动实例
由于Linux设备驱动的耦合设计,对设备的操作函数都是驱动程序自定义的,我们必须要结合一个具体的实例来看看,才能知道f_op->poll
里面弄得是什么鬼。
在这里我们以Linux Device Drivers, Third Edition一书中的例子——scull
设备的驱动程序为例。
scull
(Simple Character Utility for Loading Localities). scull is a char driver that acts on a memory area as though it were a device.
scull
设备不同于硬件设备,它是模拟出来的一块内存,因此对它的读写更快速更自由,内存支持你顺着读倒着读点着读怎么读都可以。 我们以书中“管道”(pipe)式,即FIFO的读写驱动程序为例。
首先是scull_pipe
的结构体,注意wait_queue_head_t
这个队列类型,它就是用来记录等待设备I/O事件的进程的。
12345678910 | struct scull_pipe { wait_queue_head_t inq, outq; char *buffer, *end; /* begin of buf, end of buf*/ int buffersize; /* used in pointer arithmetic*/ char *rp, *wp; /* where to read, where to write */ int nreaders, nwriters; /* number of openings for r/w */ struct fasync_struct *async_queue; /* asynchronous readers */ struct mutex mutex; /* mutual exclusion semaphore */ struct cdev cdev; /* Char device structure */}; |
scull
设备的轮询操作函数scull_p_poll
,驱动模块加载后,这个函数就被挂到(*poll)
函数指针上去了。
我们可以看到它的确是返回了当前设备的I/O状态,并且调用了内核的poll_wait()
函数,这里注意,它把自己的wait_queue_head_t
队列也当作参数传进去了。
123456789101112131415 | static unsigned int scull_p_poll(struct file *filp, poll_table *wait){ struct scull_pipe *dev = filp->private_data; unsigned int mask = 0; mutex_lock(&dev->mutex); poll_wait(filp, &dev->inq, wait); poll_wait(filp, &dev->outq, wait); if (dev->rp != dev->wp) mask |= POLLIN | POLLRDNORM; /* readable */ if (spacefree(dev)) mask |= POLLOUT | POLLWRNORM; /* writable */ mutex_unlock(&dev->mutex); return mask;} |
当scull
有数据写入时,它会把wait_queue_head_t
队列里等待的进程给唤醒。
123456789 | static ssize_t scull_p_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos){ … /* finally, awake any reader */ wake_up_interruptible(&dev->inq); /* blocked in read() and select() */ …} |
可是wait_queue_head_t
队列里的进程是什么时候装进去的? 肯定是poll_wait
搞的鬼!
3. poll_wait
与设备的等待队列
12345678910111213141516171819 | static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p){ if (p && p->_qproc && wait_address) p->_qproc(filp, wait_address, p) 大专栏 file_operations poll function;}/* * Do not touch the structure directly, use the access functions * poll_does_not_wait() and poll_requested_events() instead. */typedef struct poll_table_struct { poll_queue_proc _qproc; unsigned long _key;} poll_table;/* * structures and helpers for f_op->poll implementations */typedef void (*poll_queue_proc)(struct file *, wait_queue_head_t *, struct poll_table_struct *); |
可以看到,poll_wait()
其实就是只是直接调用了struct poll_table_struct
结构里绑定的函数指针。 我们需要找到struct poll_table_struct
初始化的地方。
The
poll_table
structure is just a wrapper around a function that builds the actual data structure. That structure, forpoll
andselect
, is a linked list of memory pages containingpoll_table_entry
structures.
struct poll_table_struct
里的函数指针,是在do_select()
初始化的。
1234567891011121314151617181920212223242526 | int do_select(int n, fd_set_bits *fds, struct timespec *end_time){ struct poll_wqueues table; poll_table *wait; poll_initwait(&table); wait = &table.pt; // …}void poll_initwait(struct poll_wqueues *pwq){ // 初始化poll_table里的函数指针 init_poll_funcptr(&pwq->pt, __pollwait); pwq->polling_task = current; pwq->triggered = 0; pwq->error = 0; pwq->table = NULL; pwq->inline_index = 0;}EXPORT_SYMBOL(poll_initwait);static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc){ pt->_qproc = qproc; pt->_key = ~0UL; /* all events enabled */} |
我们现在终于知道,__pollwait()
函数,就是poll_wait()
幕后的真凶。
add_wait_queue()
把当前进程添加到设备的等待队列wait_queue_head_t
中去。
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253 | /* Add a new entry */static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p){ struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt); struct poll_table_entry *entry = poll_get_entry(pwq); if (!entry) return; entry->filp = get_file(filp); entry->wait_address = wait_address; entry->key = p->_key; init_waitqueue_func_entry(&entry->wait, pollwake); entry->wait.private = pwq; // 把当前进程装到设备的等待队列 add_wait_queue(wait_address, &entry->wait);}void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait){ unsigned long flags; wait->flags &= ~WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&q->lock, flags); __add_wait_queue(q, wait); spin_unlock_irqrestore(&q->lock, flags);}EXPORT_SYMBOL(add_wait_queue);static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new){ list_add(&new->task_list, &head->task_list);}/** * Insert a new element after the given list head. The new element does not * need to be initialised as empty list. * The list changes from: * head → some element → ... * to * head → new element → older element → ... * * Example: * struct foo *newfoo = malloc(...); * list_add(&newfoo->entry, &bar->list_of_foos); * * @param entry The new element to prepend to the list. * @param head The existing list. */static inline voidlist_add(struct list_head *entry, struct list_head *head){ __list_add(entry, head, head->next);} |
参考资料: