Commit 02f670b7 authored by Zebediah Figura's avatar Zebediah Figura Committed by Vitaly Lipatov

ntdll, server: Try to avoid poll() for signaled events.

parent 4751c52d
......@@ -457,18 +457,103 @@ NTSTATUS esync_open_event( HANDLE *handle, ACCESS_MASK access,
return open_esync( ESYNC_AUTO_EVENT, handle, access, attr ); /* doesn't matter which */
}
static inline void small_pause(void)
{
#ifdef __i386__
__asm__ __volatile__( "rep;nop" : : : "memory" );
#else
__asm__ __volatile__( "" : : : "memory" );
#endif
}
/* Manual-reset events are actually racier than other objects in terms of shm
* state. With other objects, races don't matter, because we only treat the shm
* state as a hint that lets us skip poll()—we still have to read(). But with
* manual-reset events we don't, which means that the shm state can be out of
* sync with the actual state.
*
* In general we shouldn't have to worry about races between modifying the
* event and waiting on it. If the state changes while we're waiting, it's
* equally plausible that we caught it before or after the state changed.
* However, we can have races between SetEvent() and ResetEvent(), so that the
* event has inconsistent internal state.
*
* To solve this we have to use the other field to lock the event. Currently
* this is implemented as a spinlock, but I'm not sure if a futex might be
* better. I'm also not sure if it's possible to obviate locking by arranging
* writes and reads in a certain way.
*
* Note that we don't have to worry about locking in esync_wait_objects().
* There's only two general patterns:
*
* WaitFor() SetEvent()
* -------------------------
* read()
* signaled = 0
* signaled = 1
* write()
* -------------------------
* read()
* signaled = 1
* signaled = 0
* <no write(), because it was already signaled>
* -------------------------
*
* That is, if SetEvent() tries to signal the event before WaitFor() resets its
* signaled state, it won't bother trying to write(), and then the signaled
* state will be reset, so the result is a consistent non-signaled event.
* There's several variations to this pattern but all of them are protected in
* the same way. Note however this is why we have to use interlocked_xchg()
* event inside of the lock.
*/
/* Removing this spinlock is harder than it looks. esync_wait_objects() can
* deal with inconsistent state well enough, and a race between SetEvent() and
* ResetEvent() gives us license to yield either result as long as we act
* consistently, but that's not enough. Notably, esync_wait_objects() should
* probably act like a fence, so that the second half of esync_set_event() does
* not seep past a subsequent reset. That's one problem, but no guarantee there
* aren't others. */
NTSTATUS esync_set_event( HANDLE handle )
{
static const uint64_t value = 1;
struct esync *obj;
struct event *event;
NTSTATUS ret;
TRACE("%p.\n", handle);
if ((ret = get_object( handle, &obj))) return ret;
if ((ret = get_object( handle, &obj ))) return ret;
event = obj->shm;
if (obj->type == ESYNC_MANUAL_EVENT)
{
/* Acquire the spinlock. */
while (InterlockedCompareExchange( &event->locked, 1, 0 ))
small_pause();
}
/* For manual-reset events, as long as we're in a lock, we can take the
* optimization of only calling write() if the event wasn't already
* signaled.
*
* For auto-reset events, esync_wait_objects() must grab the kernel object.
* Thus if we got into a race so that the shm state is signaled but the
* eventfd is unsignaled (i.e. reset shm, set shm, set fd, reset fd), we
* *must* signal the fd now, or any waiting threads will never wake up. */
if (!InterlockedExchange( &event->signaled, 1 ) || obj->type == ESYNC_AUTO_EVENT)
{
if (write( obj->fd, &value, sizeof(value) ) == -1)
ERR("write: %s\n", strerror(errno));
}
if (write( obj->fd, &value, sizeof(value) ) == -1)
ERR("write: %s\n", strerror(errno));
if (obj->type == ESYNC_MANUAL_EVENT)
{
/* Release the spinlock. */
event->locked = 0;
}
return STATUS_SUCCESS;
}
......@@ -477,14 +562,40 @@ NTSTATUS esync_reset_event( HANDLE handle )
{
uint64_t value;
struct esync *obj;
struct event *event;
NTSTATUS ret;
TRACE("%p.\n", handle);
if ((ret = get_object( handle, &obj))) return ret;
if ((ret = get_object( handle, &obj ))) return ret;
event = obj->shm;
if (read( obj->fd, &value, sizeof(value) ) == -1 && errno != EWOULDBLOCK && errno != EAGAIN)
ERR("read: %s\n", strerror(errno));
if (obj->type == ESYNC_MANUAL_EVENT)
{
/* Acquire the spinlock. */
while (InterlockedCompareExchange( &event->locked, 1, 0 ))
small_pause();
}
/* For manual-reset events, as long as we're in a lock, we can take the
* optimization of only calling read() if the event was already signaled.
*
* For auto-reset events, we have no guarantee that the previous "signaled"
* state is actually correct. We need to leave both states unsignaled after
* leaving this function, so we always have to read(). */
if (InterlockedExchange( &event->signaled, 0 ) || obj->type == ESYNC_AUTO_EVENT)
{
if (read( obj->fd, &value, sizeof(value) ) == -1 && errno != EWOULDBLOCK && errno != EAGAIN)
{
ERR("read: %s\n", strerror(errno));
}
}
if (obj->type == ESYNC_MANUAL_EVENT)
{
/* Release the spinlock. */
event->locked = 0;
}
return STATUS_SUCCESS;
}
......@@ -795,9 +906,31 @@ static NTSTATUS __esync_wait_objects( DWORD count, const HANDLE *handles, BOOLEA
break;
}
case ESYNC_AUTO_EVENT:
{
struct event *event = obj->shm;
if (event->signaled)
{
if ((size = read( obj->fd, &value, sizeof(value) )) == sizeof(value))
{
TRACE("Woken up by handle %p [%d].\n", handles[i], i);
event->signaled = 0;
return i;
}
}
break;
}
case ESYNC_MANUAL_EVENT:
/* TODO */
{
struct event *event = obj->shm;
if (event->signaled)
{
TRACE("Woken up by handle %p [%d].\n", handles[i], i);
return i;
}
break;
}
case ESYNC_AUTO_SERVER:
case ESYNC_MANUAL_SERVER:
case ESYNC_QUEUE:
......
......@@ -376,24 +376,77 @@ void esync_clear( int fd )
read( fd, &value, sizeof(value) );
}
static inline void small_pause(void)
{
#ifdef __i386__
__asm__ __volatile__( "rep;nop" : : : "memory" );
#else
__asm__ __volatile__( "" : : : "memory" );
#endif
}
/* Server-side event support. */
void esync_set_event( struct esync *esync )
{
static const uint64_t value = 1;
struct event *event = get_shm( esync->shm_idx );
assert( esync->obj.ops == &esync_ops );
if (write( esync->fd, &value, sizeof(value) ) == -1)
perror( "esync: write" );
assert( event != NULL );
if (debug_level)
fprintf( stderr, "esync_set_event() fd=%d\n", esync->fd );
if (esync->type == ESYNC_MANUAL_EVENT)
{
/* Acquire the spinlock. */
while (__sync_val_compare_and_swap( &event->locked, 0, 1 ))
small_pause();
}
if (!__atomic_exchange_n( &event->signaled, 1, __ATOMIC_SEQ_CST ))
{
if (write( esync->fd, &value, sizeof(value) ) == -1)
perror( "esync: write" );
}
if (esync->type == ESYNC_MANUAL_EVENT)
{
/* Release the spinlock. */
event->locked = 0;
}
}
void esync_reset_event( struct esync *esync )
{
static uint64_t value = 1;
struct event *event = get_shm( esync->shm_idx );
assert( esync->obj.ops == &esync_ops );
assert( event != NULL );
/* we don't care about the return value */
read( esync->fd, &value, sizeof(value) );
if (debug_level)
fprintf( stderr, "esync_reset_event() fd=%d\n", esync->fd );
if (esync->type == ESYNC_MANUAL_EVENT)
{
/* Acquire the spinlock. */
while (__sync_val_compare_and_swap( &event->locked, 0, 1 ))
small_pause();
}
/* Only bother signaling the fd if we weren't already signaled. */
if (__atomic_exchange_n( &event->signaled, 0, __ATOMIC_SEQ_CST ))
{
/* we don't care about the return value */
read( esync->fd, &value, sizeof(value) );
}
if (esync->type == ESYNC_MANUAL_EVENT)
{
/* Release the spinlock. */
event->locked = 0;
}
}
DECL_HANDLER(create_esync)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment