From c9122964436b16dd44a5fb8dfd92f0768ad6fef3 Mon Sep 17 00:00:00 2001
From: kprotty <kbutcher6200@gmail.com>
Date: Tue, 17 Dec 2019 08:57:07 -0600
Subject: [PATCH] SpinLock: loopHint & yield distinction

---
 lib/std/mutex.zig       | 16 ++++++----------
 lib/std/reset_event.zig |  7 ++-----
 lib/std/spinlock.zig    | 28 +++++++++++++++++-----------
 3 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/lib/std/mutex.zig b/lib/std/mutex.zig
index 4ade4c3ef4..26f8e29dac 100644
--- a/lib/std/mutex.zig
+++ b/lib/std/mutex.zig
@@ -75,7 +75,7 @@ else if (builtin.os == .windows)
 
         fn acquireSlow(self: *Mutex) Held {
             @setCold(true);
-            while (true) : (SpinLock.yield(1)) {
+            while (true) : (SpinLock.loopHint(1)) {
                 const waiters = @atomicLoad(u32, &self.waiters, .Monotonic);
 
                 // try and take lock if unlocked
@@ -99,7 +99,7 @@ else if (builtin.os == .windows)
                 // unlock without a rmw/cmpxchg instruction
                 @atomicStore(u8, @ptrCast(*u8, &self.mutex.locked), 0, .Release);
 
-                while (true) : (SpinLock.yield(1)) {
+                while (true) : (SpinLock.loopHint(1)) {
                     const waiters = @atomicLoad(u32, &self.mutex.waiters, .Monotonic);
                 
                     // no one is waiting
@@ -142,10 +142,6 @@ else if (builtin.link_libc or builtin.os == .linux)
             self.* = undefined;
         }
 
-        fn yield() void {
-            os.sched_yield() catch SpinLock.yield(30);
-        }
-
         pub fn tryAcquire(self: *Mutex) ?Held {
             if (@cmpxchgWeak(usize, &self.state, 0, MUTEX_LOCK, .Acquire, .Monotonic) != null)
                 return null;
@@ -175,7 +171,7 @@ else if (builtin.link_libc or builtin.os == .linux)
                     } else if (state & QUEUE_MASK == 0) {
                         break;
                     }
-                    yield();
+                    SpinLock.yield();
                     state = @atomicLoad(usize, &self.state, .Monotonic);
                 }
 
@@ -198,7 +194,7 @@ else if (builtin.link_libc or builtin.os == .linux)
                             break;
                         };
                     }
-                    yield();
+                    SpinLock.yield();
                     state = @atomicLoad(usize, &self.state, .Monotonic);
                 }
             }
@@ -225,7 +221,7 @@ else if (builtin.link_libc or builtin.os == .linux)
             // try and lock the LFIO queue to pop a node off,
             // stopping altogether if its already locked or the queue is empty
             var state = @atomicLoad(usize, &self.state, .Monotonic);
-            while (true) : (std.SpinLock.yield(1)) {
+            while (true) : (SpinLock.loopHint(1)) {
                 if (state & QUEUE_LOCK != 0 or state & QUEUE_MASK == 0)
                     return;
                 state = @cmpxchgWeak(usize, &self.state, state, state | QUEUE_LOCK, .Acquire, .Monotonic) orelse break;
@@ -234,7 +230,7 @@ else if (builtin.link_libc or builtin.os == .linux)
             // acquired the QUEUE_LOCK, try and pop a node to wake it.
             // if the mutex is locked, then unset QUEUE_LOCK and let
             // the thread who holds the mutex do the wake-up on unlock()
-            while (true) : (std.SpinLock.yield(1)) {
+            while (true) : (SpinLock.loopHint(1)) {
                 if ((state & MUTEX_LOCK) != 0) {
                     state = @cmpxchgWeak(usize, &self.state, state, state & ~QUEUE_LOCK, .Release, .Acquire) orelse return;
                 } else {
diff --git a/lib/std/reset_event.zig b/lib/std/reset_event.zig
index da26034e1a..30e90641a2 100644
--- a/lib/std/reset_event.zig
+++ b/lib/std/reset_event.zig
@@ -234,10 +234,7 @@ const AtomicEvent = struct {
                 timer = time.Timer.start() catch unreachable;
 
             while (@atomicLoad(i32, ptr, .Acquire) == expected) {
-                switch (builtin.os) {
-                    .windows => SpinLock.yield(400),
-                    else => os.sched_yield() catch SpinLock.yield(1),
-                }
+                SpinLock.yield();
                 if (timeout) |timeout_ns| {
                     if (timer.read() >= timeout_ns)
                         return error.TimedOut;
@@ -320,7 +317,7 @@ const AtomicEvent = struct {
                         return @intToPtr(?windows.HANDLE, handle);
                     },
                     LOADING => {
-                        SpinLock.yield(1000);
+                        SpinLock.yield();
                         handle = @atomicLoad(usize, &event_handle, .Monotonic);
                     },
                     else => {
diff --git a/lib/std/spinlock.zig b/lib/std/spinlock.zig
index ef6ac482b2..4efd244367 100644
--- a/lib/std/spinlock.zig
+++ b/lib/std/spinlock.zig
@@ -35,27 +35,33 @@ pub const SpinLock = struct {
     pub fn acquire(self: *SpinLock) Held {
         while (true) {
             return self.tryAcquire() orelse {
-                // On native windows, SwitchToThread is too expensive,
-                // and yielding for 380-410 iterations was found to be
-                // a nice sweet spot. Posix systems on the other hand,
-                // especially linux, perform better by yielding the thread.
-                switch (builtin.os) {
-                    .windows => yield(400),
-                    else => std.os.sched_yield() catch yield(1),
-                }
+                yield();
                 continue;
             };
         }
     }
 
+    pub fn yield() void {
+        // On native windows, SwitchToThread is too expensive,
+        // and yielding for 380-410 iterations was found to be
+        // a nice sweet spot. Posix systems on the other hand,
+        // especially linux, perform better by yielding the thread.
+        switch (builtin.os) {
+            .windows => loopHint(400),
+            else => std.os.sched_yield() catch loopHint(1),
+        }
+    }
+
     /// Hint to the cpu that execution is spinning
     /// for the given amount of iterations.
-    pub fn yield(iterations: usize) void {
+    pub fn loopHint(iterations: usize) void {
         var i = iterations;
         while (i != 0) : (i -= 1) {
             switch (builtin.arch) {
-                .i386, .x86_64 => asm volatile ("pause"),
-                .arm, .aarch64 => asm volatile ("yield"),
+                // these instructions use a memory clobber as they
+                // flush the pipeline of any speculated reads/writes.
+                .i386, .x86_64 => asm volatile ("pause" ::: "memory"),
+                .arm, .aarch64 => asm volatile ("yield" ::: "memory"),
                 else => std.os.sched_yield() catch {},
             }
         }