linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] New EPOLL flag: EPOLLHEAD
@ 2010-08-02  5:31 Li Yu
  2010-08-03  7:12 ` Davide Libenzi
  0 siblings, 1 reply; 4+ messages in thread
From: Li Yu @ 2010-08-02  5:31 UTC (permalink / raw)
  To: viro; +Cc: linux-fsdevel, linux-kernel


This patch introduces to new epoll flag EPOLLHEAD. Using this flag, the trigged events will be insert at head of ready events linked list, so they likely can be processed earlier in user space. In fact, this is the most simplest events priority, is it right?

BTW: I did not subscribe linux-fsdevel mail list, so please also send reply to my gmail, thanks!

Signed-off-by: Li Yu <raise.sail@gmail.com>
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 3817149..ddd1500 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -72,7 +72,7 @@
  */
 
 /* Epoll private bits inside the event mask */
-#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
+#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET | EPOLLHEAD)
 
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
@@ -281,6 +281,15 @@ static inline int ep_is_linked(struct list_head *p)
 	return !list_empty(p);
 }
 
+/* The events with EPOLLHEAD can be detected first by user space. */
+static inline void ep_event_ready(struct eventpoll *ep, struct epitem *epi)
+{
+	if (epi->event.events & EPOLLHEAD)
+		list_add(&epi->rdllink, &ep->rdllist);
+	else
+		list_add_tail(&epi->rdllink, &ep->rdllist);
+}
+
 /* Get the "struct epitem" from a wait queue pointer */
 static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
 {
@@ -494,7 +503,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
 		 * contain them, and the list_splice() below takes care of them.
 		 */
 		if (!ep_is_linked(&epi->rdllink))
-			list_add_tail(&epi->rdllink, &ep->rdllist);
+			ep_event_ready(ep, epi);
 	}
 	/*
 	 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
@@ -829,7 +838,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
 
 	/* If this file is already in the ready list we exit soon */
 	if (!ep_is_linked(&epi->rdllink))
-		list_add_tail(&epi->rdllink, &ep->rdllist);
+		ep_event_ready(ep, epi);
 
 	/*
 	 * Wake up ( if active ) both the eventpoll wait list and the ->poll()
@@ -957,7 +966,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
 
 	/* If the file is already "ready" we drop it inside the ready list */
 	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
-		list_add_tail(&epi->rdllink, &ep->rdllist);
+		ep_event_ready(ep, epi);
 
 		/* Notify waiting tasks that events are available */
 		if (waitqueue_active(&ep->wq))
@@ -1025,7 +1034,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
 	if (revents & event->events) {
 		spin_lock_irq(&ep->lock);
 		if (!ep_is_linked(&epi->rdllink)) {
-			list_add_tail(&epi->rdllink, &ep->rdllist);
+			ep_event_ready(ep, epi);
 
 			/* Notify waiting tasks that events are available */
 			if (waitqueue_active(&ep->wq))
@@ -1094,7 +1103,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
 				 * ep_scan_ready_list() holding "mtx" and the
 				 * poll callback will queue them in ep->ovflist.
 				 */
-				list_add_tail(&epi->rdllink, &ep->rdllist);
+				ep_event_ready(ep, epi);
 			}
 		}
 	}
diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index f6856a5..b7658e1 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -26,6 +26,9 @@
 #define EPOLL_CTL_DEL 2
 #define EPOLL_CTL_MOD 3
 
+/* When target file descriptor is ready, insert it into head of ready list */
+#define EPOLLHEAD (1 << 29)
+
 /* Set the One Shot behaviour for the target file descriptor */
 #define EPOLLONESHOT (1 << 30)
 

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] New EPOLL flag: EPOLLHEAD
  2010-08-02  5:31 [PATCH] New EPOLL flag: EPOLLHEAD Li Yu
@ 2010-08-03  7:12 ` Davide Libenzi
  2010-08-03 13:09   ` Li Yu
  0 siblings, 1 reply; 4+ messages in thread
From: Davide Libenzi @ 2010-08-03  7:12 UTC (permalink / raw)
  To: Li Yu; +Cc: viro, linux-fsdevel, Linux Kernel Mailing List

On Mon, 2 Aug 2010, Li Yu wrote:

> This patch introduces to new epoll flag EPOLLHEAD. Using this flag, the trigged events will be insert at head of ready events linked list, so they likely can be processed earlier in user space. In fact, this is the most simplest events priority, is it right?

If you have such a high number of ready events in a continous manner, that 
resolving priorities at ready-time (O(Nready)) is a burden for you, you 
can simply create M epoll fds (one per priority), add the fds into the 
proper epoll fd, and use another epoll fd (or even poll(2) - if M is 
small) to gather them.



> 
> BTW: I did not subscribe linux-fsdevel mail list, so please also send reply to my gmail, thanks!
> 
> Signed-off-by: Li Yu <raise.sail@gmail.com>
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index 3817149..ddd1500 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -72,7 +72,7 @@
>   */
>  
>  /* Epoll private bits inside the event mask */
> -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
> +#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET | EPOLLHEAD)
>  
>  /* Maximum number of nesting allowed inside epoll sets */
>  #define EP_MAX_NESTS 4
> @@ -281,6 +281,15 @@ static inline int ep_is_linked(struct list_head *p)
>  	return !list_empty(p);
>  }
>  
> +/* The events with EPOLLHEAD can be detected first by user space. */
> +static inline void ep_event_ready(struct eventpoll *ep, struct epitem *epi)
> +{
> +	if (epi->event.events & EPOLLHEAD)
> +		list_add(&epi->rdllink, &ep->rdllist);
> +	else
> +		list_add_tail(&epi->rdllink, &ep->rdllist);
> +}
> +
>  /* Get the "struct epitem" from a wait queue pointer */
>  static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
>  {
> @@ -494,7 +503,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>  		 * contain them, and the list_splice() below takes care of them.
>  		 */
>  		if (!ep_is_linked(&epi->rdllink))
> -			list_add_tail(&epi->rdllink, &ep->rdllist);
> +			ep_event_ready(ep, epi);
>  	}
>  	/*
>  	 * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
> @@ -829,7 +838,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
>  
>  	/* If this file is already in the ready list we exit soon */
>  	if (!ep_is_linked(&epi->rdllink))
> -		list_add_tail(&epi->rdllink, &ep->rdllist);
> +		ep_event_ready(ep, epi);
>  
>  	/*
>  	 * Wake up ( if active ) both the eventpoll wait list and the ->poll()
> @@ -957,7 +966,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  
>  	/* If the file is already "ready" we drop it inside the ready list */
>  	if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
> -		list_add_tail(&epi->rdllink, &ep->rdllist);
> +		ep_event_ready(ep, epi);
>  
>  		/* Notify waiting tasks that events are available */
>  		if (waitqueue_active(&ep->wq))
> @@ -1025,7 +1034,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>  	if (revents & event->events) {
>  		spin_lock_irq(&ep->lock);
>  		if (!ep_is_linked(&epi->rdllink)) {
> -			list_add_tail(&epi->rdllink, &ep->rdllist);
> +			ep_event_ready(ep, epi);
>  
>  			/* Notify waiting tasks that events are available */
>  			if (waitqueue_active(&ep->wq))
> @@ -1094,7 +1103,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
>  				 * ep_scan_ready_list() holding "mtx" and the
>  				 * poll callback will queue them in ep->ovflist.
>  				 */
> -				list_add_tail(&epi->rdllink, &ep->rdllist);
> +				ep_event_ready(ep, epi);
>  			}
>  		}
>  	}
> diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
> index f6856a5..b7658e1 100644
> --- a/include/linux/eventpoll.h
> +++ b/include/linux/eventpoll.h
> @@ -26,6 +26,9 @@
>  #define EPOLL_CTL_DEL 2
>  #define EPOLL_CTL_MOD 3
>  
> +/* When target file descriptor is ready, insert it into head of ready list */
> +#define EPOLLHEAD (1 << 29)
> +
>  /* Set the One Shot behaviour for the target file descriptor */
>  #define EPOLLONESHOT (1 << 30)
>  
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] New EPOLL flag: EPOLLHEAD
  2010-08-03  7:12 ` Davide Libenzi
@ 2010-08-03 13:09   ` Li Yu
  2010-08-04  6:20     ` Li Yu
  0 siblings, 1 reply; 4+ messages in thread
From: Li Yu @ 2010-08-03 13:09 UTC (permalink / raw)
  To: Davide Libenzi; +Cc: viro, linux-fsdevel, Linux Kernel Mailing List

;), I know that epoll is stackable, i.e. we can use epoll_wait() on fd
that return from epoll_create() However, If we just have a few of
priorities, for example, which are less than 5 (one is controlling
connection, otherwise are some data connections), thus it seem that
stacked epoll() usage is too expensive here, receiving each packet
always requires extra times that switching into kernel in such case,
thus I think that head-inserting ready-events into ep->rdllist is an
excellence solution for this case, in my word, is it right? or have we
more better solution here? Thanks!

Yu

2010/8/3 Davide Libenzi <davidel@xmailserver.org>:
> On Mon, 2 Aug 2010, Li Yu wrote:
>
>> This patch introduces to new epoll flag EPOLLHEAD. Using this flag, the trigged events will be insert at head of ready events linked list, so they likely can be processed earlier in user space. In fact, this is the most simplest events priority, is it right?
>
> If you have such a high number of ready events in a continous manner, that
> resolving priorities at ready-time (O(Nready)) is a burden for you, you
> can simply create M epoll fds (one per priority), add the fds into the
> proper epoll fd, and use another epoll fd (or even poll(2) - if M is
> small) to gather them.
>
>
>
>>
>> BTW: I did not subscribe linux-fsdevel mail list, so please also send reply to my gmail, thanks!
>>
>> Signed-off-by: Li Yu <raise.sail@gmail.com>
>> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
>> index 3817149..ddd1500 100644
>> --- a/fs/eventpoll.c
>> +++ b/fs/eventpoll.c
>> @@ -72,7 +72,7 @@
>>   */
>>
>>  /* Epoll private bits inside the event mask */
>> -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
>> +#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET | EPOLLHEAD)
>>
>>  /* Maximum number of nesting allowed inside epoll sets */
>>  #define EP_MAX_NESTS 4
>> @@ -281,6 +281,15 @@ static inline int ep_is_linked(struct list_head *p)
>>       return !list_empty(p);
>>  }
>>
>> +/* The events with EPOLLHEAD can be detected first by user space. */
>> +static inline void ep_event_ready(struct eventpoll *ep, struct epitem *epi)
>> +{
>> +     if (epi->event.events & EPOLLHEAD)
>> +             list_add(&epi->rdllink, &ep->rdllist);
>> +     else
>> +             list_add_tail(&epi->rdllink, &ep->rdllist);
>> +}
>> +
>>  /* Get the "struct epitem" from a wait queue pointer */
>>  static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
>>  {
>> @@ -494,7 +503,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>>                * contain them, and the list_splice() below takes care of them.
>>                */
>>               if (!ep_is_linked(&epi->rdllink))
>> -                     list_add_tail(&epi->rdllink, &ep->rdllist);
>> +                     ep_event_ready(ep, epi);
>>       }
>>       /*
>>        * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
>> @@ -829,7 +838,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
>>
>>       /* If this file is already in the ready list we exit soon */
>>       if (!ep_is_linked(&epi->rdllink))
>> -             list_add_tail(&epi->rdllink, &ep->rdllist);
>> +             ep_event_ready(ep, epi);
>>
>>       /*
>>        * Wake up ( if active ) both the eventpoll wait list and the ->poll()
>> @@ -957,7 +966,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>>
>>       /* If the file is already "ready" we drop it inside the ready list */
>>       if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
>> -             list_add_tail(&epi->rdllink, &ep->rdllist);
>> +             ep_event_ready(ep, epi);
>>
>>               /* Notify waiting tasks that events are available */
>>               if (waitqueue_active(&ep->wq))
>> @@ -1025,7 +1034,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>>       if (revents & event->events) {
>>               spin_lock_irq(&ep->lock);
>>               if (!ep_is_linked(&epi->rdllink)) {
>> -                     list_add_tail(&epi->rdllink, &ep->rdllist);
>> +                     ep_event_ready(ep, epi);
>>
>>                       /* Notify waiting tasks that events are available */
>>                       if (waitqueue_active(&ep->wq))
>> @@ -1094,7 +1103,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
>>                                * ep_scan_ready_list() holding "mtx" and the
>>                                * poll callback will queue them in ep->ovflist.
>>                                */
>> -                             list_add_tail(&epi->rdllink, &ep->rdllist);
>> +                             ep_event_ready(ep, epi);
>>                       }
>>               }
>>       }
>> diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
>> index f6856a5..b7658e1 100644
>> --- a/include/linux/eventpoll.h
>> +++ b/include/linux/eventpoll.h
>> @@ -26,6 +26,9 @@
>>  #define EPOLL_CTL_DEL 2
>>  #define EPOLL_CTL_MOD 3
>>
>> +/* When target file descriptor is ready, insert it into head of ready list */
>> +#define EPOLLHEAD (1 << 29)
>> +
>>  /* Set the One Shot behaviour for the target file descriptor */
>>  #define EPOLLONESHOT (1 << 30)
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] New EPOLL flag: EPOLLHEAD
  2010-08-03 13:09   ` Li Yu
@ 2010-08-04  6:20     ` Li Yu
  0 siblings, 0 replies; 4+ messages in thread
From: Li Yu @ 2010-08-04  6:20 UTC (permalink / raw)
  To: Davide Libenzi; +Cc: linux-fsdevel, Linux Kernel Mailing List

En, I have a new idea about such stacked event poll usage, if
"eventpoll filesystem" could support memory mapping, then applications
are able to gather ready events from mapped "event area" once
epoll_wait() of upper layer returned. So applications can gather ready
events although they use two layered event poll, in fact, the
applications even can use more layers, the only runtime costs just are
indirect memory access instead of switching into kernel multiple
times.

Thanks!

Yu

2010/8/3 Li Yu <raise.sail@gmail.com>:
> ;), I know that epoll is stackable, i.e. we can use epoll_wait() on fd
> that return from epoll_create() However, If we just have a few of
> priorities, for example, which are less than 5 (one is controlling
> connection, otherwise are some data connections), thus it seem that
> stacked epoll() usage is too expensive here, receiving each packet
> always requires extra times that switching into kernel in such case,
> thus I think that head-inserting ready-events into ep->rdllist is an
> excellence solution for this case, in my word, is it right? or have we
> more better solution here? Thanks!
>
> Yu
>
> 2010/8/3 Davide Libenzi <davidel@xmailserver.org>:
>> On Mon, 2 Aug 2010, Li Yu wrote:
>>
>>> This patch introduces to new epoll flag EPOLLHEAD. Using this flag, the trigged events will be insert at head of ready events linked list, so they likely can be processed earlier in user space. In fact, this is the most simplest events priority, is it right?
>>
>> If you have such a high number of ready events in a continous manner, that
>> resolving priorities at ready-time (O(Nready)) is a burden for you, you
>> can simply create M epoll fds (one per priority), add the fds into the
>> proper epoll fd, and use another epoll fd (or even poll(2) - if M is
>> small) to gather them.
>>
>>
>>
>>>
>>> BTW: I did not subscribe linux-fsdevel mail list, so please also send reply to my gmail, thanks!
>>>
>>> Signed-off-by: Li Yu <raise.sail@gmail.com>
>>> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
>>> index 3817149..ddd1500 100644
>>> --- a/fs/eventpoll.c
>>> +++ b/fs/eventpoll.c
>>> @@ -72,7 +72,7 @@
>>>   */
>>>
>>>  /* Epoll private bits inside the event mask */
>>> -#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET)
>>> +#define EP_PRIVATE_BITS (EPOLLONESHOT | EPOLLET | EPOLLHEAD)
>>>
>>>  /* Maximum number of nesting allowed inside epoll sets */
>>>  #define EP_MAX_NESTS 4
>>> @@ -281,6 +281,15 @@ static inline int ep_is_linked(struct list_head *p)
>>>       return !list_empty(p);
>>>  }
>>>
>>> +/* The events with EPOLLHEAD can be detected first by user space. */
>>> +static inline void ep_event_ready(struct eventpoll *ep, struct epitem *epi)
>>> +{
>>> +     if (epi->event.events & EPOLLHEAD)
>>> +             list_add(&epi->rdllink, &ep->rdllist);
>>> +     else
>>> +             list_add_tail(&epi->rdllink, &ep->rdllist);
>>> +}
>>> +
>>>  /* Get the "struct epitem" from a wait queue pointer */
>>>  static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
>>>  {
>>> @@ -494,7 +503,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>>>                * contain them, and the list_splice() below takes care of them.
>>>                */
>>>               if (!ep_is_linked(&epi->rdllink))
>>> -                     list_add_tail(&epi->rdllink, &ep->rdllist);
>>> +                     ep_event_ready(ep, epi);
>>>       }
>>>       /*
>>>        * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
>>> @@ -829,7 +838,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
>>>
>>>       /* If this file is already in the ready list we exit soon */
>>>       if (!ep_is_linked(&epi->rdllink))
>>> -             list_add_tail(&epi->rdllink, &ep->rdllist);
>>> +             ep_event_ready(ep, epi);
>>>
>>>       /*
>>>        * Wake up ( if active ) both the eventpoll wait list and the ->poll()
>>> @@ -957,7 +966,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>>>
>>>       /* If the file is already "ready" we drop it inside the ready list */
>>>       if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
>>> -             list_add_tail(&epi->rdllink, &ep->rdllist);
>>> +             ep_event_ready(ep, epi);
>>>
>>>               /* Notify waiting tasks that events are available */
>>>               if (waitqueue_active(&ep->wq))
>>> @@ -1025,7 +1034,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>>>       if (revents & event->events) {
>>>               spin_lock_irq(&ep->lock);
>>>               if (!ep_is_linked(&epi->rdllink)) {
>>> -                     list_add_tail(&epi->rdllink, &ep->rdllist);
>>> +                     ep_event_ready(ep, epi);
>>>
>>>                       /* Notify waiting tasks that events are available */
>>>                       if (waitqueue_active(&ep->wq))
>>> @@ -1094,7 +1103,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
>>>                                * ep_scan_ready_list() holding "mtx" and the
>>>                                * poll callback will queue them in ep->ovflist.
>>>                                */
>>> -                             list_add_tail(&epi->rdllink, &ep->rdllist);
>>> +                             ep_event_ready(ep, epi);
>>>                       }
>>>               }
>>>       }
>>> diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
>>> index f6856a5..b7658e1 100644
>>> --- a/include/linux/eventpoll.h
>>> +++ b/include/linux/eventpoll.h
>>> @@ -26,6 +26,9 @@
>>>  #define EPOLL_CTL_DEL 2
>>>  #define EPOLL_CTL_MOD 3
>>>
>>> +/* When target file descriptor is ready, insert it into head of ready list */
>>> +#define EPOLLHEAD (1 << 29)
>>> +
>>>  /* Set the One Shot behaviour for the target file descriptor */
>>>  #define EPOLLONESHOT (1 << 30)
>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
>>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-08-04  6:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-08-02  5:31 [PATCH] New EPOLL flag: EPOLLHEAD Li Yu
2010-08-03  7:12 ` Davide Libenzi
2010-08-03 13:09   ` Li Yu
2010-08-04  6:20     ` Li Yu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).