From c9122964436b16dd44a5fb8dfd92f0768ad6fef3 Mon Sep 17 00:00:00 2001 From: kprotty Date: Tue, 17 Dec 2019 08:57:07 -0600 Subject: [PATCH] SpinLock: loopHint & yield distinction --- lib/std/mutex.zig | 16 ++++++---------- lib/std/reset_event.zig | 7 ++----- lib/std/spinlock.zig | 28 +++++++++++++++++----------- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/lib/std/mutex.zig b/lib/std/mutex.zig index 4ade4c3ef4..26f8e29dac 100644 --- a/lib/std/mutex.zig +++ b/lib/std/mutex.zig @@ -75,7 +75,7 @@ else if (builtin.os == .windows) fn acquireSlow(self: *Mutex) Held { @setCold(true); - while (true) : (SpinLock.yield(1)) { + while (true) : (SpinLock.loopHint(1)) { const waiters = @atomicLoad(u32, &self.waiters, .Monotonic); // try and take lock if unlocked @@ -99,7 +99,7 @@ else if (builtin.os == .windows) // unlock without a rmw/cmpxchg instruction @atomicStore(u8, @ptrCast(*u8, &self.mutex.locked), 0, .Release); - while (true) : (SpinLock.yield(1)) { + while (true) : (SpinLock.loopHint(1)) { const waiters = @atomicLoad(u32, &self.mutex.waiters, .Monotonic); // no one is waiting @@ -142,10 +142,6 @@ else if (builtin.link_libc or builtin.os == .linux) self.* = undefined; } - fn yield() void { - os.sched_yield() catch SpinLock.yield(30); - } - pub fn tryAcquire(self: *Mutex) ?Held { if (@cmpxchgWeak(usize, &self.state, 0, MUTEX_LOCK, .Acquire, .Monotonic) != null) return null; @@ -175,7 +171,7 @@ else if (builtin.link_libc or builtin.os == .linux) } else if (state & QUEUE_MASK == 0) { break; } - yield(); + SpinLock.yield(); state = @atomicLoad(usize, &self.state, .Monotonic); } @@ -198,7 +194,7 @@ else if (builtin.link_libc or builtin.os == .linux) break; }; } - yield(); + SpinLock.yield(); state = @atomicLoad(usize, &self.state, .Monotonic); } } @@ -225,7 +221,7 @@ else if (builtin.link_libc or builtin.os == .linux) // try and lock the LFIO queue to pop a node off, // stopping altogether if its already locked or the queue is empty var state = @atomicLoad(usize, &self.state, .Monotonic); - while (true) : (std.SpinLock.yield(1)) { + while (true) : (SpinLock.loopHint(1)) { if (state & QUEUE_LOCK != 0 or state & QUEUE_MASK == 0) return; state = @cmpxchgWeak(usize, &self.state, state, state | QUEUE_LOCK, .Acquire, .Monotonic) orelse break; @@ -234,7 +230,7 @@ else if (builtin.link_libc or builtin.os == .linux) // acquired the QUEUE_LOCK, try and pop a node to wake it. // if the mutex is locked, then unset QUEUE_LOCK and let // the thread who holds the mutex do the wake-up on unlock() - while (true) : (std.SpinLock.yield(1)) { + while (true) : (SpinLock.loopHint(1)) { if ((state & MUTEX_LOCK) != 0) { state = @cmpxchgWeak(usize, &self.state, state, state & ~QUEUE_LOCK, .Release, .Acquire) orelse return; } else { diff --git a/lib/std/reset_event.zig b/lib/std/reset_event.zig index da26034e1a..30e90641a2 100644 --- a/lib/std/reset_event.zig +++ b/lib/std/reset_event.zig @@ -234,10 +234,7 @@ const AtomicEvent = struct { timer = time.Timer.start() catch unreachable; while (@atomicLoad(i32, ptr, .Acquire) == expected) { - switch (builtin.os) { - .windows => SpinLock.yield(400), - else => os.sched_yield() catch SpinLock.yield(1), - } + SpinLock.yield(); if (timeout) |timeout_ns| { if (timer.read() >= timeout_ns) return error.TimedOut; @@ -320,7 +317,7 @@ const AtomicEvent = struct { return @intToPtr(?windows.HANDLE, handle); }, LOADING => { - SpinLock.yield(1000); + SpinLock.yield(); handle = @atomicLoad(usize, &event_handle, .Monotonic); }, else => { diff --git a/lib/std/spinlock.zig b/lib/std/spinlock.zig index ef6ac482b2..4efd244367 100644 --- a/lib/std/spinlock.zig +++ b/lib/std/spinlock.zig @@ -35,27 +35,33 @@ pub const SpinLock = struct { pub fn acquire(self: *SpinLock) Held { while (true) { return self.tryAcquire() orelse { - // On native windows, SwitchToThread is too expensive, - // and yielding for 380-410 iterations was found to be - // a nice sweet spot. Posix systems on the other hand, - // especially linux, perform better by yielding the thread. - switch (builtin.os) { - .windows => yield(400), - else => std.os.sched_yield() catch yield(1), - } + yield(); continue; }; } } + pub fn yield() void { + // On native windows, SwitchToThread is too expensive, + // and yielding for 380-410 iterations was found to be + // a nice sweet spot. Posix systems on the other hand, + // especially linux, perform better by yielding the thread. + switch (builtin.os) { + .windows => loopHint(400), + else => std.os.sched_yield() catch loopHint(1), + } + } + /// Hint to the cpu that execution is spinning /// for the given amount of iterations. - pub fn yield(iterations: usize) void { + pub fn loopHint(iterations: usize) void { var i = iterations; while (i != 0) : (i -= 1) { switch (builtin.arch) { - .i386, .x86_64 => asm volatile ("pause"), - .arm, .aarch64 => asm volatile ("yield"), + // these instructions use a memory clobber as they + // flush the pipeline of any speculated reads/writes. + .i386, .x86_64 => asm volatile ("pause" ::: "memory"), + .arm, .aarch64 => asm volatile ("yield" ::: "memory"), else => std.os.sched_yield() catch {}, } }