replace ThreadParker with ResetEvent + WordLock mutex

This commit is contained in:
kprotty 2019-11-23 16:24:01 -06:00 committed by Andrew Kelley
parent a0955990dc
commit ca2d566ec8
No known key found for this signature in database
GPG Key ID: 7C5F548F728501A9
3 changed files with 88 additions and 230 deletions

View File

@ -1,13 +1,12 @@
const std = @import("std.zig");
const builtin = @import("builtin");
const testing = std.testing;
const SpinLock = std.SpinLock;
const ThreadParker = std.ThreadParker;
const ResetEvent = std.ResetEvent;
/// Lock may be held only once. If the same thread
/// tries to acquire the same mutex twice, it deadlocks.
/// This type supports static initialization and is based off of Golang 1.13 runtime.lock_futex:
/// https://github.com/golang/go/blob/master/src/runtime/lock_futex.go
/// This type supports static initialization and is based off of Webkit's WTF Lock (via rust parking_lot)
/// https://github.com/Amanieu/parking_lot/blob/master/core/src/word_lock.rs
/// When an application is built in single threaded release mode, all the functions are
/// no-ops. In single threaded debug mode, there is deadlock detection.
pub const Mutex = if (builtin.single_threaded)
@ -39,80 +38,119 @@ pub const Mutex = if (builtin.single_threaded)
}
else
struct {
state: State, // TODO: make this an enum
parker: ThreadParker,
state: usize,
const State = enum(u32) {
Unlocked,
Sleeping,
Locked,
};
const MUTEX_LOCK: usize = 1 << 0;
const QUEUE_LOCK: usize = 1 << 1;
const QUEUE_MASK: usize = ~(MUTEX_LOCK | QUEUE_LOCK);
const QueueNode = std.atomic.Stack(ResetEvent).Node;
/// number of iterations to spin yielding the cpu
const SPIN_CPU = 4;
/// number of iterations to perform in the cpu yield loop
/// number of iterations to spin in the cpu yield loop
const SPIN_CPU_COUNT = 30;
/// number of iterations to spin yielding the thread
const SPIN_THREAD = 1;
pub fn init() Mutex {
return Mutex{
.state = .Unlocked,
.parker = ThreadParker.init(),
};
return Mutex{ .state = 0 };
}
pub fn deinit(self: *Mutex) void {
self.parker.deinit();
self.* = undefined;
}
pub const Held = struct {
mutex: *Mutex,
pub fn release(self: Held) void {
switch (@atomicRmw(State, &self.mutex.state, .Xchg, .Unlocked, .Release)) {
.Locked => {},
.Sleeping => self.mutex.parker.unpark(@ptrCast(*const u32, &self.mutex.state)),
.Unlocked => unreachable, // unlocking an unlocked mutex
else => unreachable, // should never be anything else
// since MUTEX_LOCK is the first bit, we can use (.Sub) instead of (.And, ~MUTEX_LOCK).
// this is because .Sub may be implemented more efficiently than the latter
// (e.g. `lock xadd` vs `cmpxchg` loop on x86)
const state = @atomicRmw(usize, &self.mutex.state, .Sub, MUTEX_LOCK, .Release);
if ((state & QUEUE_MASK) != 0 and (state & QUEUE_LOCK) == 0) {
self.mutex.releaseSlow(state);
}
}
};
pub fn acquire(self: *Mutex) Held {
// Try and speculatively grab the lock.
// If it fails, the state is either Locked or Sleeping
// depending on if theres a thread stuck sleeping below.
var state = @atomicRmw(State, &self.state, .Xchg, .Locked, .Acquire);
if (state == .Unlocked)
return Held{ .mutex = self };
// fast path close to SpinLock fast path
if (@cmpxchgWeak(usize, &self.state, 0, MUTEX_LOCK, .Acquire, .Monotonic)) |current_state| {
self.acquireSlow(current_state);
}
return Held{ .mutex = self };
}
fn acquireSlow(self: *Mutex, current_state: usize) void {
var spin: usize = 0;
var state = current_state;
while (true) {
// try and acquire the lock if unlocked
if ((state & MUTEX_LOCK) == 0) {
state = @cmpxchgWeak(usize, &self.state, state, state | MUTEX_LOCK, .Acquire, .Monotonic) orelse return;
continue;
}
// spin only if the waiting queue isn't empty and when it hasn't spun too much already
if ((state & QUEUE_MASK) == 0 and spin < SPIN_CPU + SPIN_THREAD) {
if (spin < SPIN_CPU) {
std.SpinLock.yield(SPIN_CPU_COUNT);
} else {
std.os.sched_yield() catch std.time.sleep(0);
}
state = @atomicLoad(usize, &self.state, .Monotonic);
continue;
}
// thread should block, try and add this event to the waiting queue
var node = QueueNode{
.next = @intToPtr(?*QueueNode, state & QUEUE_MASK),
.data = ResetEvent.init(),
};
defer node.data.deinit();
const new_state = @ptrToInt(&node) | (state & ~QUEUE_MASK);
state = @cmpxchgWeak(usize, &self.state, state, new_state, .Release, .Monotonic) orelse {
// node is in the queue, wait until a `held.release()` wakes us up.
_ = node.data.wait(null) catch unreachable;
spin = 0;
state = @atomicLoad(usize, &self.state, .Monotonic);
continue;
};
}
}
fn releaseSlow(self: *Mutex, current_state: usize) void {
// grab the QUEUE_LOCK in order to signal a waiting queue node's event.
var state = current_state;
while (true) {
if ((state & QUEUE_LOCK) != 0 or (state & QUEUE_MASK) == 0)
return;
state = @cmpxchgWeak(usize, &self.state, state, state | QUEUE_LOCK, .Acquire, .Monotonic) orelse break;
}
while (true) {
// try and acquire the lock using cpu spinning on failure
var spin: usize = 0;
while (spin < SPIN_CPU) : (spin += 1) {
var value = @atomicLoad(State, &self.state, .Monotonic);
while (value == .Unlocked)
value = @cmpxchgWeak(State, &self.state, .Unlocked, state, .Acquire, .Monotonic) orelse return Held{ .mutex = self };
SpinLock.yield(SPIN_CPU_COUNT);
// barrier needed to observe incoming state changes
defer @fence(.Acquire);
// the mutex is currently locked. try to unset the QUEUE_LOCK and let the locker wake up the next node.
// avoids waking up multiple sleeping threads which try to acquire the lock again which increases contention.
if ((state & MUTEX_LOCK) != 0) {
state = @cmpxchgWeak(usize, &self.state, state, state & ~QUEUE_LOCK, .Release, .Monotonic) orelse return;
continue;
}
// try and acquire the lock using thread rescheduling on failure
spin = 0;
while (spin < SPIN_THREAD) : (spin += 1) {
var value = @atomicLoad(State, &self.state, .Monotonic);
while (value == .Unlocked)
value = @cmpxchgWeak(State, &self.state, .Unlocked, state, .Acquire, .Monotonic) orelse return Held{ .mutex = self };
std.os.sched_yield() catch std.time.sleep(1);
}
// failed to acquire the lock, go to sleep until woken up by `Held.release()`
if (@atomicRmw(State, &self.state, .Xchg, .Sleeping, .Acquire) == .Unlocked)
return Held{ .mutex = self };
state = .Sleeping;
self.parker.park(@ptrCast(*const u32, &self.state), @enumToInt(State.Sleeping));
// try to pop the top node on the waiting queue stack to wake it up
// while at the same time unsetting the QUEUE_LOCK.
const node = @intToPtr(*QueueNode, state & QUEUE_MASK);
const new_state = @ptrToInt(node.next) | (state & MUTEX_LOCK);
state = @cmpxchgWeak(usize, &self.state, state, new_state, .Release, .Monotonic) orelse {
_ = node.data.set(false);
return;
};
}
}
};

View File

@ -1,180 +0,0 @@
const std = @import("std.zig");
const builtin = @import("builtin");
const time = std.time;
const testing = std.testing;
const assert = std.debug.assert;
const SpinLock = std.SpinLock;
const linux = std.os.linux;
const windows = std.os.windows;
pub const ThreadParker = switch (builtin.os) {
.linux => if (builtin.link_libc) PosixParker else LinuxParker,
.windows => WindowsParker,
else => if (builtin.link_libc) PosixParker else SpinParker,
};
const SpinParker = struct {
pub fn init() SpinParker {
return SpinParker{};
}
pub fn deinit(self: *SpinParker) void {}
pub fn unpark(self: *SpinParker, ptr: *const u32) void {}
pub fn park(self: *SpinParker, ptr: *const u32, expected: u32) void {
var backoff = SpinLock.Backoff.init();
while (@atomicLoad(u32, ptr, .Acquire) == expected)
backoff.yield();
}
};
const LinuxParker = struct {
pub fn init() LinuxParker {
return LinuxParker{};
}
pub fn deinit(self: *LinuxParker) void {}
pub fn unpark(self: *LinuxParker, ptr: *const u32) void {
const rc = linux.futex_wake(@ptrCast(*const i32, ptr), linux.FUTEX_WAKE | linux.FUTEX_PRIVATE_FLAG, 1);
assert(linux.getErrno(rc) == 0);
}
pub fn park(self: *LinuxParker, ptr: *const u32, expected: u32) void {
const value = @intCast(i32, expected);
while (@atomicLoad(u32, ptr, .Acquire) == expected) {
const rc = linux.futex_wait(@ptrCast(*const i32, ptr), linux.FUTEX_WAIT | linux.FUTEX_PRIVATE_FLAG, value, null);
switch (linux.getErrno(rc)) {
0, linux.EAGAIN => return,
linux.EINTR => continue,
linux.EINVAL => unreachable,
else => continue,
}
}
}
};
const WindowsParker = struct {
waiters: u32,
pub fn init() WindowsParker {
return WindowsParker{ .waiters = 0 };
}
pub fn deinit(self: *WindowsParker) void {}
pub fn unpark(self: *WindowsParker, ptr: *const u32) void {
const key = @ptrCast(*const c_void, ptr);
const handle = getEventHandle() orelse return;
var waiting = @atomicLoad(u32, &self.waiters, .Monotonic);
while (waiting != 0) {
waiting = @cmpxchgWeak(u32, &self.waiters, waiting, waiting - 1, .Acquire, .Monotonic) orelse {
const rc = windows.ntdll.NtReleaseKeyedEvent(handle, key, windows.FALSE, null);
assert(rc == 0);
return;
};
}
}
pub fn park(self: *WindowsParker, ptr: *const u32, expected: u32) void {
var spin = SpinLock.Backoff.init();
const ev_handle = getEventHandle();
const key = @ptrCast(*const c_void, ptr);
while (@atomicLoad(u32, ptr, .Monotonic) == expected) {
if (ev_handle) |handle| {
_ = @atomicRmw(u32, &self.waiters, .Add, 1, .Release);
const rc = windows.ntdll.NtWaitForKeyedEvent(handle, key, windows.FALSE, null);
assert(rc == 0);
} else {
spin.yield();
}
}
}
var event_handle = std.lazyInit(windows.HANDLE);
fn getEventHandle() ?windows.HANDLE {
if (event_handle.get()) |handle_ptr|
return handle_ptr.*;
defer event_handle.resolve();
const access_mask = windows.GENERIC_READ | windows.GENERIC_WRITE;
if (windows.ntdll.NtCreateKeyedEvent(&event_handle.data, access_mask, null, 0) != 0)
return null;
return event_handle.data;
}
};
const PosixParker = struct {
cond: c.pthread_cond_t,
mutex: c.pthread_mutex_t,
const c = std.c;
pub fn init() PosixParker {
return PosixParker{
.cond = c.PTHREAD_COND_INITIALIZER,
.mutex = c.PTHREAD_MUTEX_INITIALIZER,
};
}
pub fn deinit(self: *PosixParker) void {
// On dragonfly, the destroy functions return EINVAL if they were initialized statically.
const retm = c.pthread_mutex_destroy(&self.mutex);
assert(retm == 0 or retm == (if (builtin.os == .dragonfly) os.EINVAL else 0));
const retc = c.pthread_cond_destroy(&self.cond);
assert(retc == 0 or retc == (if (builtin.os == .dragonfly) os.EINVAL else 0));
}
pub fn unpark(self: *PosixParker, ptr: *const u32) void {
assert(c.pthread_mutex_lock(&self.mutex) == 0);
defer assert(c.pthread_mutex_unlock(&self.mutex) == 0);
assert(c.pthread_cond_signal(&self.cond) == 0);
}
pub fn park(self: *PosixParker, ptr: *const u32, expected: u32) void {
assert(c.pthread_mutex_lock(&self.mutex) == 0);
defer assert(c.pthread_mutex_unlock(&self.mutex) == 0);
while (@atomicLoad(u32, ptr, .Acquire) == expected)
assert(c.pthread_cond_wait(&self.cond, &self.mutex) == 0);
}
};
test "std.ThreadParker" {
if (builtin.single_threaded)
return error.SkipZigTest;
const Context = struct {
parker: ThreadParker,
data: u32,
fn receiver(self: *@This()) void {
self.parker.park(&self.data, 0); // receives 1
assert(@atomicRmw(u32, &self.data, .Xchg, 2, .SeqCst) == 1); // sends 2
self.parker.unpark(&self.data); // wakes up waiters on 2
self.parker.park(&self.data, 2); // receives 3
assert(@atomicRmw(u32, &self.data, .Xchg, 4, .SeqCst) == 3); // sends 4
self.parker.unpark(&self.data); // wakes up waiters on 4
}
fn sender(self: *@This()) void {
assert(@atomicRmw(u32, &self.data, .Xchg, 1, .SeqCst) == 0); // sends 1
self.parker.unpark(&self.data); // wakes up waiters on 1
self.parker.park(&self.data, 1); // receives 2
assert(@atomicRmw(u32, &self.data, .Xchg, 3, .SeqCst) == 2); // sends 3
self.parker.unpark(&self.data); // wakes up waiters on 3
self.parker.park(&self.data, 3); // receives 4
}
};
var context = Context{
.parker = ThreadParker.init(),
.data = 0,
};
defer context.parker.deinit();
var receiver = try std.Thread.spawn(&context, Context.receiver);
defer receiver.wait();
context.sender();
}

View File

@ -16,6 +16,7 @@ pub const PackedIntSlice = @import("packed_int_array.zig").PackedIntSlice;
pub const PackedIntSliceEndian = @import("packed_int_array.zig").PackedIntSliceEndian;
pub const PriorityQueue = @import("priority_queue.zig").PriorityQueue;
pub const Progress = @import("progress.zig").Progress;
pub const ResetEvent = @import("reset_event.zig").ResetEvent;
pub const SegmentedList = @import("segmented_list.zig").SegmentedList;
pub const SinglyLinkedList = @import("linked_list.zig").SinglyLinkedList;
pub const SpinLock = @import("spinlock.zig").SpinLock;
@ -23,7 +24,6 @@ pub const StringHashMap = @import("hash_map.zig").StringHashMap;
pub const TailQueue = @import("linked_list.zig").TailQueue;
pub const Target = @import("target.zig").Target;
pub const Thread = @import("thread.zig").Thread;
pub const ThreadParker = @import("parker.zig").ThreadParker;
pub const atomic = @import("atomic.zig");
pub const base64 = @import("base64.zig");