Merge pull request #13513 from ziglang/faster-wasm-gpa

WebAssembly-only fast allocator
2024-12-05 03:19:08 +00:00 · 2022-11-30 01:46:37 -05:00 · 2022-11-30 01:46:37 -05:00 · 71038c42f5
commit 71038c42f5
parent e35f297aeb 7f063b2c52
4 changed files with 679 additions and 340 deletions
--- a/lib/std/heap.zig
+++ b/lib/std/heap.zig
@ -1,13 +1,13 @@
 const std = @import("std.zig");
 const builtin = @import("builtin");
 const root = @import("root");
-const debug = std.debug;
-const assert = debug.assert;
+const assert = std.debug.assert;
 const testing = std.testing;
 const mem = std.mem;
 const os = std.os;
 const c = std.c;
 const maxInt = std.math.maxInt;
+const Allocator = std.mem.Allocator;

 pub const LoggingAllocator = @import("heap/logging_allocator.zig").LoggingAllocator;
 pub const loggingAllocator = @import("heap/logging_allocator.zig").loggingAllocator;
@ -16,8 +16,12 @@ pub const LogToWriterAllocator = @import("heap/log_to_writer_allocator.zig").Log
 pub const logToWriterAllocator = @import("heap/log_to_writer_allocator.zig").logToWriterAllocator;
 pub const ArenaAllocator = @import("heap/arena_allocator.zig").ArenaAllocator;
 pub const GeneralPurposeAllocator = @import("heap/general_purpose_allocator.zig").GeneralPurposeAllocator;
+pub const WasmAllocator = @import("heap/WasmAllocator.zig");
+pub const WasmPageAllocator = @import("heap/WasmPageAllocator.zig");
+pub const PageAllocator = @import("heap/PageAllocator.zig");

-const Allocator = mem.Allocator;
+/// TODO Utilize this on Windows.
+pub var next_mmap_addr_hint: ?[*]align(mem.page_size) u8 = null;

 const CAllocator = struct {
    comptime {
@ -227,303 +231,6 @@ pub fn alignPageAllocLen(full_len: usize, len: usize) usize {
    return aligned_len;
 }

-/// TODO Utilize this on Windows.
-pub var next_mmap_addr_hint: ?[*]align(mem.page_size) u8 = null;
-
-const PageAllocator = struct {
-    const vtable = Allocator.VTable{
-        .alloc = alloc,
-        .resize = resize,
-        .free = free,
-    };
-
-    fn alloc(_: *anyopaque, n: usize, log2_align: u8, ra: usize) ?[*]u8 {
-        _ = ra;
-        _ = log2_align;
-        assert(n > 0);
-        if (n > maxInt(usize) - (mem.page_size - 1)) return null;
-        const aligned_len = mem.alignForward(n, mem.page_size);
-
-        if (builtin.os.tag == .windows) {
-            const w = os.windows;
-            const addr = w.VirtualAlloc(
-                null,
-                aligned_len,
-                w.MEM_COMMIT | w.MEM_RESERVE,
-                w.PAGE_READWRITE,
-            ) catch return null;
-            return @ptrCast([*]align(mem.page_size) u8, @alignCast(mem.page_size, addr));
-        }
-
-        const hint = @atomicLoad(@TypeOf(next_mmap_addr_hint), &next_mmap_addr_hint, .Unordered);
-        const slice = os.mmap(
-            hint,
-            aligned_len,
-            os.PROT.READ | os.PROT.WRITE,
-            os.MAP.PRIVATE | os.MAP.ANONYMOUS,
-            -1,
-            0,
-        ) catch return null;
-        assert(mem.isAligned(@ptrToInt(slice.ptr), mem.page_size));
-        const new_hint = @alignCast(mem.page_size, slice.ptr + aligned_len);
-        _ = @cmpxchgStrong(@TypeOf(next_mmap_addr_hint), &next_mmap_addr_hint, hint, new_hint, .Monotonic, .Monotonic);
-        return slice.ptr;
-    }
-
-    fn resize(
-        _: *anyopaque,
-        buf_unaligned: []u8,
-        log2_buf_align: u8,
-        new_size: usize,
-        return_address: usize,
-    ) bool {
-        _ = log2_buf_align;
-        _ = return_address;
-        const new_size_aligned = mem.alignForward(new_size, mem.page_size);
-
-        if (builtin.os.tag == .windows) {
-            const w = os.windows;
-            if (new_size <= buf_unaligned.len) {
-                const base_addr = @ptrToInt(buf_unaligned.ptr);
-                const old_addr_end = base_addr + buf_unaligned.len;
-                const new_addr_end = mem.alignForward(base_addr + new_size, mem.page_size);
-                if (old_addr_end > new_addr_end) {
-                    // For shrinking that is not releasing, we will only
-                    // decommit the pages not needed anymore.
-                    w.VirtualFree(
-                        @intToPtr(*anyopaque, new_addr_end),
-                        old_addr_end - new_addr_end,
-                        w.MEM_DECOMMIT,
-                    );
-                }
-                return true;
-            }
-            const old_size_aligned = mem.alignForward(buf_unaligned.len, mem.page_size);
-            if (new_size_aligned <= old_size_aligned) {
-                return true;
-            }
-            return false;
-        }
-
-        const buf_aligned_len = mem.alignForward(buf_unaligned.len, mem.page_size);
-        if (new_size_aligned == buf_aligned_len)
-            return true;
-
-        if (new_size_aligned < buf_aligned_len) {
-            const ptr = @alignCast(mem.page_size, buf_unaligned.ptr + new_size_aligned);
-            // TODO: if the next_mmap_addr_hint is within the unmapped range, update it
-            os.munmap(ptr[0 .. buf_aligned_len - new_size_aligned]);
-            return true;
-        }
-
-        // TODO: call mremap
-        // TODO: if the next_mmap_addr_hint is within the remapped range, update it
-        return false;
-    }
-
-    fn free(_: *anyopaque, slice: []u8, log2_buf_align: u8, return_address: usize) void {
-        _ = log2_buf_align;
-        _ = return_address;
-
-        if (builtin.os.tag == .windows) {
-            os.windows.VirtualFree(slice.ptr, 0, os.windows.MEM_RELEASE);
-        } else {
-            const buf_aligned_len = mem.alignForward(slice.len, mem.page_size);
-            const ptr = @alignCast(mem.page_size, slice.ptr);
-            os.munmap(ptr[0..buf_aligned_len]);
-        }
-    }
-};
-
-const WasmPageAllocator = struct {
-    comptime {
-        if (!builtin.target.isWasm()) {
-            @compileError("WasmPageAllocator is only available for wasm32 arch");
-        }
-    }
-
-    const vtable = Allocator.VTable{
-        .alloc = alloc,
-        .resize = resize,
-        .free = free,
-    };
-
-    const PageStatus = enum(u1) {
-        used = 0,
-        free = 1,
-
-        pub const none_free: u8 = 0;
-    };
-
-    const FreeBlock = struct {
-        data: []u128,
-
-        const Io = std.packed_int_array.PackedIntIo(u1, .Little);
-
-        fn totalPages(self: FreeBlock) usize {
-            return self.data.len * 128;
-        }
-
-        fn isInitialized(self: FreeBlock) bool {
-            return self.data.len > 0;
-        }
-
-        fn getBit(self: FreeBlock, idx: usize) PageStatus {
-            const bit_offset = 0;
-            return @intToEnum(PageStatus, Io.get(mem.sliceAsBytes(self.data), idx, bit_offset));
-        }
-
-        fn setBits(self: FreeBlock, start_idx: usize, len: usize, val: PageStatus) void {
-            const bit_offset = 0;
-            var i: usize = 0;
-            while (i < len) : (i += 1) {
-                Io.set(mem.sliceAsBytes(self.data), start_idx + i, bit_offset, @enumToInt(val));
-            }
-        }
-
-        // Use '0xFFFFFFFF' as a _missing_ sentinel
-        // This saves ~50 bytes compared to returning a nullable
-
-        // We can guarantee that conventional memory never gets this big,
-        // and wasm32 would not be able to address this memory (32 GB > usize).
-
-        // Revisit if this is settled: https://github.com/ziglang/zig/issues/3806
-        const not_found = std.math.maxInt(usize);
-
-        fn useRecycled(self: FreeBlock, num_pages: usize, log2_align: u8) usize {
-            @setCold(true);
-            for (self.data) |segment, i| {
-                const spills_into_next = @bitCast(i128, segment) < 0;
-                const has_enough_bits = @popCount(segment) >= num_pages;
-
-                if (!spills_into_next and !has_enough_bits) continue;
-
-                var j: usize = i * 128;
-                while (j < (i + 1) * 128) : (j += 1) {
-                    var count: usize = 0;
-                    while (j + count < self.totalPages() and self.getBit(j + count) == .free) {
-                        count += 1;
-                        const addr = j * mem.page_size;
-                        if (count >= num_pages and mem.isAlignedLog2(addr, log2_align)) {
-                            self.setBits(j, num_pages, .used);
-                            return j;
-                        }
-                    }
-                    j += count;
-                }
-            }
-            return not_found;
-        }
-
-        fn recycle(self: FreeBlock, start_idx: usize, len: usize) void {
-            self.setBits(start_idx, len, .free);
-        }
-    };
-
-    var _conventional_data = [_]u128{0} ** 16;
-    // Marking `conventional` as const saves ~40 bytes
-    const conventional = FreeBlock{ .data = &_conventional_data };
-    var extended = FreeBlock{ .data = &[_]u128{} };
-
-    fn extendedOffset() usize {
-        return conventional.totalPages();
-    }
-
-    fn nPages(memsize: usize) usize {
-        return mem.alignForward(memsize, mem.page_size) / mem.page_size;
-    }
-
-    fn alloc(_: *anyopaque, len: usize, log2_align: u8, ra: usize) ?[*]u8 {
-        _ = ra;
-        if (len > maxInt(usize) - (mem.page_size - 1)) return null;
-        const page_count = nPages(len);
-        const page_idx = allocPages(page_count, log2_align) catch return null;
-        return @intToPtr([*]u8, page_idx * mem.page_size);
-    }
-
-    fn allocPages(page_count: usize, log2_align: u8) !usize {
-        {
-            const idx = conventional.useRecycled(page_count, log2_align);
-            if (idx != FreeBlock.not_found) {
-                return idx;
-            }
-        }
-
-        const idx = extended.useRecycled(page_count, log2_align);
-        if (idx != FreeBlock.not_found) {
-            return idx + extendedOffset();
-        }
-
-        const next_page_idx = @wasmMemorySize(0);
-        const next_page_addr = next_page_idx * mem.page_size;
-        const aligned_addr = mem.alignForwardLog2(next_page_addr, log2_align);
-        const drop_page_count = @divExact(aligned_addr - next_page_addr, mem.page_size);
-        const result = @wasmMemoryGrow(0, @intCast(u32, drop_page_count + page_count));
-        if (result <= 0)
-            return error.OutOfMemory;
-        assert(result == next_page_idx);
-        const aligned_page_idx = next_page_idx + drop_page_count;
-        if (drop_page_count > 0) {
-            freePages(next_page_idx, aligned_page_idx);
-        }
-        return @intCast(usize, aligned_page_idx);
-    }
-
-    fn freePages(start: usize, end: usize) void {
-        if (start < extendedOffset()) {
-            conventional.recycle(start, @min(extendedOffset(), end) - start);
-        }
-        if (end > extendedOffset()) {
-            var new_end = end;
-            if (!extended.isInitialized()) {
-                // Steal the last page from the memory currently being recycled
-                // TODO: would it be better if we use the first page instead?
-                new_end -= 1;
-
-                extended.data = @intToPtr([*]u128, new_end * mem.page_size)[0 .. mem.page_size / @sizeOf(u128)];
-                // Since this is the first page being freed and we consume it, assume *nothing* is free.
-                mem.set(u128, extended.data, PageStatus.none_free);
-            }
-            const clamped_start = @max(extendedOffset(), start);
-            extended.recycle(clamped_start - extendedOffset(), new_end - clamped_start);
-        }
-    }
-
-    fn resize(
-        _: *anyopaque,
-        buf: []u8,
-        log2_buf_align: u8,
-        new_len: usize,
-        return_address: usize,
-    ) bool {
-        _ = log2_buf_align;
-        _ = return_address;
-        const aligned_len = mem.alignForward(buf.len, mem.page_size);
-        if (new_len > aligned_len) return false;
-        const current_n = nPages(aligned_len);
-        const new_n = nPages(new_len);
-        if (new_n != current_n) {
-            const base = nPages(@ptrToInt(buf.ptr));
-            freePages(base + new_n, base + current_n);
-        }
-        return true;
-    }
-
-    fn free(
-        _: *anyopaque,
-        buf: []u8,
-        log2_buf_align: u8,
-        return_address: usize,
-    ) void {
-        _ = log2_buf_align;
-        _ = return_address;
-        const aligned_len = mem.alignForward(buf.len, mem.page_size);
-        const current_n = nPages(aligned_len);
-        const base = nPages(@ptrToInt(buf.ptr));
-        freePages(base, base + current_n);
-    }
-};
-
 pub const HeapAllocator = switch (builtin.os.tag) {
    .windows => struct {
        heap_handle: ?HeapHandle,
@ -859,43 +566,6 @@ test "raw_c_allocator" {
    }
 }

-test "WasmPageAllocator internals" {
-    if (comptime builtin.target.isWasm()) {
-        const conventional_memsize = WasmPageAllocator.conventional.totalPages() * mem.page_size;
-        const initial = try page_allocator.alloc(u8, mem.page_size);
-        try testing.expect(@ptrToInt(initial.ptr) < conventional_memsize); // If this isn't conventional, the rest of these tests don't make sense. Also we have a serious memory leak in the test suite.
-
-        var inplace = try page_allocator.realloc(initial, 1);
-        try testing.expectEqual(initial.ptr, inplace.ptr);
-        inplace = try page_allocator.realloc(inplace, 4);
-        try testing.expectEqual(initial.ptr, inplace.ptr);
-        page_allocator.free(inplace);
-
-        const reuse = try page_allocator.alloc(u8, 1);
-        try testing.expectEqual(initial.ptr, reuse.ptr);
-        page_allocator.free(reuse);
-
-        // This segment may span conventional and extended which has really complex rules so we're just ignoring it for now.
-        const padding = try page_allocator.alloc(u8, conventional_memsize);
-        page_allocator.free(padding);
-
-        const extended = try page_allocator.alloc(u8, conventional_memsize);
-        try testing.expect(@ptrToInt(extended.ptr) >= conventional_memsize);
-
-        const use_small = try page_allocator.alloc(u8, 1);
-        try testing.expectEqual(initial.ptr, use_small.ptr);
-        page_allocator.free(use_small);
-
-        inplace = try page_allocator.realloc(extended, 1);
-        try testing.expectEqual(extended.ptr, inplace.ptr);
-        page_allocator.free(inplace);
-
-        const reuse_extended = try page_allocator.alloc(u8, conventional_memsize);
-        try testing.expectEqual(extended.ptr, reuse_extended.ptr);
-        page_allocator.free(reuse_extended);
-    }
-}
-
 test "PageAllocator" {
    const allocator = page_allocator;
    try testAllocator(allocator);
@ -1163,7 +833,14 @@ pub fn testAllocatorAlignedShrink(base_allocator: mem.Allocator) !void {
    try testing.expect(slice[60] == 0x34);
 }

-test "heap" {
-    _ = @import("heap/logging_allocator.zig");
-    _ = @import("heap/log_to_writer_allocator.zig");
+test {
+    _ = LoggingAllocator;
+    _ = LogToWriterAllocator;
+    _ = ScopedLoggingAllocator;
+    _ = ArenaAllocator;
+    _ = GeneralPurposeAllocator;
+    if (comptime builtin.target.isWasm()) {
+        _ = WasmAllocator;
+        _ = WasmPageAllocator;
+    }
 }
--- a/lib/std/heap/PageAllocator.zig
+++ b/lib/std/heap/PageAllocator.zig
@ -0,0 +1,110 @@
+const std = @import("../std.zig");
+const builtin = @import("builtin");
+const Allocator = std.mem.Allocator;
+const mem = std.mem;
+const os = std.os;
+const maxInt = std.math.maxInt;
+const assert = std.debug.assert;
+
+pub const vtable = Allocator.VTable{
+    .alloc = alloc,
+    .resize = resize,
+    .free = free,
+};
+
+fn alloc(_: *anyopaque, n: usize, log2_align: u8, ra: usize) ?[*]u8 {
+    _ = ra;
+    _ = log2_align;
+    assert(n > 0);
+    if (n > maxInt(usize) - (mem.page_size - 1)) return null;
+    const aligned_len = mem.alignForward(n, mem.page_size);
+
+    if (builtin.os.tag == .windows) {
+        const w = os.windows;
+        const addr = w.VirtualAlloc(
+            null,
+            aligned_len,
+            w.MEM_COMMIT | w.MEM_RESERVE,
+            w.PAGE_READWRITE,
+        ) catch return null;
+        return @ptrCast([*]align(mem.page_size) u8, @alignCast(mem.page_size, addr));
+    }
+
+    const hint = @atomicLoad(@TypeOf(std.heap.next_mmap_addr_hint), &std.heap.next_mmap_addr_hint, .Unordered);
+    const slice = os.mmap(
+        hint,
+        aligned_len,
+        os.PROT.READ | os.PROT.WRITE,
+        os.MAP.PRIVATE | os.MAP.ANONYMOUS,
+        -1,
+        0,
+    ) catch return null;
+    assert(mem.isAligned(@ptrToInt(slice.ptr), mem.page_size));
+    const new_hint = @alignCast(mem.page_size, slice.ptr + aligned_len);
+    _ = @cmpxchgStrong(@TypeOf(std.heap.next_mmap_addr_hint), &std.heap.next_mmap_addr_hint, hint, new_hint, .Monotonic, .Monotonic);
+    return slice.ptr;
+}
+
+fn resize(
+    _: *anyopaque,
+    buf_unaligned: []u8,
+    log2_buf_align: u8,
+    new_size: usize,
+    return_address: usize,
+) bool {
+    _ = log2_buf_align;
+    _ = return_address;
+    const new_size_aligned = mem.alignForward(new_size, mem.page_size);
+
+    if (builtin.os.tag == .windows) {
+        const w = os.windows;
+        if (new_size <= buf_unaligned.len) {
+            const base_addr = @ptrToInt(buf_unaligned.ptr);
+            const old_addr_end = base_addr + buf_unaligned.len;
+            const new_addr_end = mem.alignForward(base_addr + new_size, mem.page_size);
+            if (old_addr_end > new_addr_end) {
+                // For shrinking that is not releasing, we will only
+                // decommit the pages not needed anymore.
+                w.VirtualFree(
+                    @intToPtr(*anyopaque, new_addr_end),
+                    old_addr_end - new_addr_end,
+                    w.MEM_DECOMMIT,
+                );
+            }
+            return true;
+        }
+        const old_size_aligned = mem.alignForward(buf_unaligned.len, mem.page_size);
+        if (new_size_aligned <= old_size_aligned) {
+            return true;
+        }
+        return false;
+    }
+
+    const buf_aligned_len = mem.alignForward(buf_unaligned.len, mem.page_size);
+    if (new_size_aligned == buf_aligned_len)
+        return true;
+
+    if (new_size_aligned < buf_aligned_len) {
+        const ptr = @alignCast(mem.page_size, buf_unaligned.ptr + new_size_aligned);
+        // TODO: if the next_mmap_addr_hint is within the unmapped range, update it
+        os.munmap(ptr[0 .. buf_aligned_len - new_size_aligned]);
+        return true;
+    }
+
+    // TODO: call mremap
+    // TODO: if the next_mmap_addr_hint is within the remapped range, update it
+    return false;
+}
+
+fn free(_: *anyopaque, slice: []u8, log2_buf_align: u8, return_address: usize) void {
+    _ = log2_buf_align;
+    _ = return_address;
+
+    if (builtin.os.tag == .windows) {
+        os.windows.VirtualFree(slice.ptr, 0, os.windows.MEM_RELEASE);
+    } else {
+        const buf_aligned_len = mem.alignForward(slice.len, mem.page_size);
+        const ptr = @alignCast(mem.page_size, slice.ptr);
+        os.munmap(ptr[0..buf_aligned_len]);
+    }
+}
--- a/lib/std/heap/WasmAllocator.zig
+++ b/lib/std/heap/WasmAllocator.zig
@ -0,0 +1,317 @@
+//! This is intended to be merged into GeneralPurposeAllocator at some point.
+
+const std = @import("../std.zig");
+const builtin = @import("builtin");
+const Allocator = std.mem.Allocator;
+const mem = std.mem;
+const assert = std.debug.assert;
+const wasm = std.wasm;
+const math = std.math;
+
+comptime {
+    if (!builtin.target.isWasm()) {
+        @compileError("WasmPageAllocator is only available for wasm32 arch");
+    }
+}
+
+pub const vtable = Allocator.VTable{
+    .alloc = alloc,
+    .resize = resize,
+    .free = free,
+};
+
+pub const Error = Allocator.Error;
+
+const max_usize = math.maxInt(usize);
+const ushift = math.Log2Int(usize);
+const bigpage_size = 64 * 1024;
+const pages_per_bigpage = bigpage_size / wasm.page_size;
+const bigpage_count = max_usize / bigpage_size;
+
+/// Because of storing free list pointers, the minimum size class is 3.
+const min_class = math.log2(math.ceilPowerOfTwoAssert(usize, 1 + @sizeOf(usize)));
+const size_class_count = math.log2(bigpage_size) - min_class;
+/// 0 - 1 bigpage
+/// 1 - 2 bigpages
+/// 2 - 4 bigpages
+/// etc.
+const big_size_class_count = math.log2(bigpage_count);
+
+var next_addrs = [1]usize{0} ** size_class_count;
+/// For each size class, points to the freed pointer.
+var frees = [1]usize{0} ** size_class_count;
+/// For each big size class, points to the freed pointer.
+var big_frees = [1]usize{0} ** big_size_class_count;
+
+fn alloc(ctx: *anyopaque, len: usize, log2_align: u8, return_address: usize) ?[*]u8 {
+    _ = ctx;
+    _ = return_address;
+    // Make room for the freelist next pointer.
+    const alignment = @as(usize, 1) << @intCast(Allocator.Log2Align, log2_align);
+    const actual_len = @max(len +| @sizeOf(usize), alignment);
+    const slot_size = math.ceilPowerOfTwo(usize, actual_len) catch return null;
+    const class = math.log2(slot_size) - min_class;
+    if (class < size_class_count) {
+        const addr = a: {
+            const top_free_ptr = frees[class];
+            if (top_free_ptr != 0) {
+                const node = @intToPtr(*usize, top_free_ptr + (slot_size - @sizeOf(usize)));
+                frees[class] = node.*;
+                break :a top_free_ptr;
+            }
+
+            const next_addr = next_addrs[class];
+            if (next_addr % wasm.page_size == 0) {
+                const addr = allocBigPages(1);
+                if (addr == 0) return null;
+                //std.debug.print("allocated fresh slot_size={d} class={d} addr=0x{x}\n", .{
+                //    slot_size, class, addr,
+                //});
+                next_addrs[class] = addr + slot_size;
+                break :a addr;
+            } else {
+                next_addrs[class] = next_addr + slot_size;
+                break :a next_addr;
+            }
+        };
+        return @intToPtr([*]u8, addr);
+    }
+    const bigpages_needed = bigPagesNeeded(actual_len);
+    const addr = allocBigPages(bigpages_needed);
+    return @intToPtr([*]u8, addr);
+}
+
+fn resize(
+    ctx: *anyopaque,
+    buf: []u8,
+    log2_buf_align: u8,
+    new_len: usize,
+    return_address: usize,
+) bool {
+    _ = ctx;
+    _ = return_address;
+    // We don't want to move anything from one size class to another, but we
+    // can recover bytes in between powers of two.
+    const buf_align = @as(usize, 1) << @intCast(Allocator.Log2Align, log2_buf_align);
+    const old_actual_len = @max(buf.len + @sizeOf(usize), buf_align);
+    const new_actual_len = @max(new_len +| @sizeOf(usize), buf_align);
+    const old_small_slot_size = math.ceilPowerOfTwoAssert(usize, old_actual_len);
+    const old_small_class = math.log2(old_small_slot_size) - min_class;
+    if (old_small_class < size_class_count) {
+        const new_small_slot_size = math.ceilPowerOfTwo(usize, new_actual_len) catch return false;
+        return old_small_slot_size == new_small_slot_size;
+    } else {
+        const old_bigpages_needed = bigPagesNeeded(old_actual_len);
+        const old_big_slot_pages = math.ceilPowerOfTwoAssert(usize, old_bigpages_needed);
+        const new_bigpages_needed = bigPagesNeeded(new_actual_len);
+        const new_big_slot_pages = math.ceilPowerOfTwo(usize, new_bigpages_needed) catch return false;
+        return old_big_slot_pages == new_big_slot_pages;
+    }
+}
+
+fn free(
+    ctx: *anyopaque,
+    buf: []u8,
+    log2_buf_align: u8,
+    return_address: usize,
+) void {
+    _ = ctx;
+    _ = return_address;
+    const buf_align = @as(usize, 1) << @intCast(Allocator.Log2Align, log2_buf_align);
+    const actual_len = @max(buf.len + @sizeOf(usize), buf_align);
+    const slot_size = math.ceilPowerOfTwoAssert(usize, actual_len);
+    const class = math.log2(slot_size) - min_class;
+    const addr = @ptrToInt(buf.ptr);
+    if (class < size_class_count) {
+        const node = @intToPtr(*usize, addr + (slot_size - @sizeOf(usize)));
+        node.* = frees[class];
+        frees[class] = addr;
+    } else {
+        const bigpages_needed = bigPagesNeeded(actual_len);
+        const pow2_pages = math.ceilPowerOfTwoAssert(usize, bigpages_needed);
+        const big_slot_size_bytes = pow2_pages * bigpage_size;
+        const node = @intToPtr(*usize, addr + (big_slot_size_bytes - @sizeOf(usize)));
+        const big_class = math.log2(pow2_pages);
+        node.* = big_frees[big_class];
+        big_frees[big_class] = addr;
+    }
+}
+
+inline fn bigPagesNeeded(byte_count: usize) usize {
+    return (byte_count + (bigpage_size + (@sizeOf(usize) - 1))) / bigpage_size;
+}
+
+fn allocBigPages(n: usize) usize {
+    const pow2_pages = math.ceilPowerOfTwoAssert(usize, n);
+    const slot_size_bytes = pow2_pages * bigpage_size;
+    const class = math.log2(pow2_pages);
+
+    const top_free_ptr = big_frees[class];
+    if (top_free_ptr != 0) {
+        const node = @intToPtr(*usize, top_free_ptr + (slot_size_bytes - @sizeOf(usize)));
+        big_frees[class] = node.*;
+        return top_free_ptr;
+    }
+
+    const page_index = @wasmMemoryGrow(0, pow2_pages * pages_per_bigpage);
+    if (page_index <= 0) return 0;
+    const addr = @intCast(u32, page_index) * wasm.page_size;
+    return addr;
+}
+
+const test_ally = Allocator{
+    .ptr = undefined,
+    .vtable = &vtable,
+};
+
+test "small allocations - free in same order" {
+    var list: [513]*u64 = undefined;
+
+    var i: usize = 0;
+    while (i < 513) : (i += 1) {
+        const ptr = try test_ally.create(u64);
+        list[i] = ptr;
+    }
+
+    for (list) |ptr| {
+        test_ally.destroy(ptr);
+    }
+}
+
+test "small allocations - free in reverse order" {
+    var list: [513]*u64 = undefined;
+
+    var i: usize = 0;
+    while (i < 513) : (i += 1) {
+        const ptr = try test_ally.create(u64);
+        list[i] = ptr;
+    }
+
+    i = list.len;
+    while (i > 0) {
+        i -= 1;
+        const ptr = list[i];
+        test_ally.destroy(ptr);
+    }
+}
+
+test "large allocations" {
+    const ptr1 = try test_ally.alloc(u64, 42768);
+    const ptr2 = try test_ally.alloc(u64, 52768);
+    test_ally.free(ptr1);
+    const ptr3 = try test_ally.alloc(u64, 62768);
+    test_ally.free(ptr3);
+    test_ally.free(ptr2);
+}
+
+test "very large allocation" {
+    try std.testing.expectError(error.OutOfMemory, test_ally.alloc(u8, math.maxInt(usize)));
+}
+
+test "realloc" {
+    var slice = try test_ally.alignedAlloc(u8, @alignOf(u32), 1);
+    defer test_ally.free(slice);
+    slice[0] = 0x12;
+
+    // This reallocation should keep its pointer address.
+    const old_slice = slice;
+    slice = try test_ally.realloc(slice, 2);
+    try std.testing.expect(old_slice.ptr == slice.ptr);
+    try std.testing.expect(slice[0] == 0x12);
+    slice[1] = 0x34;
+
+    // This requires upgrading to a larger size class
+    slice = try test_ally.realloc(slice, 17);
+    try std.testing.expect(slice[0] == 0x12);
+    try std.testing.expect(slice[1] == 0x34);
+}
+
+test "shrink" {
+    var slice = try test_ally.alloc(u8, 20);
+    defer test_ally.free(slice);
+
+    mem.set(u8, slice, 0x11);
+
+    try std.testing.expect(test_ally.resize(slice, 17));
+    slice = slice[0..17];
+
+    for (slice) |b| {
+        try std.testing.expect(b == 0x11);
+    }
+
+    try std.testing.expect(test_ally.resize(slice, 16));
+    slice = slice[0..16];
+
+    for (slice) |b| {
+        try std.testing.expect(b == 0x11);
+    }
+}
+
+test "large object - grow" {
+    var slice1 = try test_ally.alloc(u8, bigpage_size * 2 - 20);
+    defer test_ally.free(slice1);
+
+    const old = slice1;
+    slice1 = try test_ally.realloc(slice1, bigpage_size * 2 - 10);
+    try std.testing.expect(slice1.ptr == old.ptr);
+
+    slice1 = try test_ally.realloc(slice1, bigpage_size * 2);
+    slice1 = try test_ally.realloc(slice1, bigpage_size * 2 + 1);
+}
+
+test "realloc small object to large object" {
+    var slice = try test_ally.alloc(u8, 70);
+    defer test_ally.free(slice);
+    slice[0] = 0x12;
+    slice[60] = 0x34;
+
+    // This requires upgrading to a large object
+    const large_object_size = bigpage_size * 2 + 50;
+    slice = try test_ally.realloc(slice, large_object_size);
+    try std.testing.expect(slice[0] == 0x12);
+    try std.testing.expect(slice[60] == 0x34);
+}
+
+test "shrink large object to large object" {
+    var slice = try test_ally.alloc(u8, bigpage_size * 2 + 50);
+    defer test_ally.free(slice);
+    slice[0] = 0x12;
+    slice[60] = 0x34;
+
+    try std.testing.expect(test_ally.resize(slice, bigpage_size * 2 + 1));
+    slice = slice[0 .. bigpage_size * 2 + 1];
+    try std.testing.expect(slice[0] == 0x12);
+    try std.testing.expect(slice[60] == 0x34);
+
+    try std.testing.expect(test_ally.resize(slice, bigpage_size * 2 + 1));
+    try std.testing.expect(slice[0] == 0x12);
+    try std.testing.expect(slice[60] == 0x34);
+
+    slice = try test_ally.realloc(slice, bigpage_size * 2);
+    try std.testing.expect(slice[0] == 0x12);
+    try std.testing.expect(slice[60] == 0x34);
+}
+
+test "realloc large object to small object" {
+    var slice = try test_ally.alloc(u8, bigpage_size * 2 + 50);
+    defer test_ally.free(slice);
+    slice[0] = 0x12;
+    slice[16] = 0x34;
+
+    slice = try test_ally.realloc(slice, 19);
+    try std.testing.expect(slice[0] == 0x12);
+    try std.testing.expect(slice[16] == 0x34);
+}
+
+test "objects of size 1024 and 2048" {
+    const slice = try test_ally.alloc(u8, 1025);
+    const slice2 = try test_ally.alloc(u8, 3000);
+
+    test_ally.free(slice);
+    test_ally.free(slice2);
+}
+
+test "standard allocator tests" {
+    try std.heap.testAllocator(test_ally);
+    try std.heap.testAllocatorAligned(test_ally);
+}
--- a/lib/std/heap/WasmPageAllocator.zig
+++ b/lib/std/heap/WasmPageAllocator.zig
@ -0,0 +1,235 @@
+const WasmPageAllocator = @This();
+const std = @import("../std.zig");
+const builtin = @import("builtin");
+const Allocator = std.mem.Allocator;
+const mem = std.mem;
+const maxInt = std.math.maxInt;
+const assert = std.debug.assert;
+
+comptime {
+    if (!builtin.target.isWasm()) {
+        @compileError("WasmPageAllocator is only available for wasm32 arch");
+    }
+}
+
+pub const vtable = Allocator.VTable{
+    .alloc = alloc,
+    .resize = resize,
+    .free = free,
+};
+
+const PageStatus = enum(u1) {
+    used = 0,
+    free = 1,
+
+    pub const none_free: u8 = 0;
+};
+
+const FreeBlock = struct {
+    data: []u128,
+
+    const Io = std.packed_int_array.PackedIntIo(u1, .Little);
+
+    fn totalPages(self: FreeBlock) usize {
+        return self.data.len * 128;
+    }
+
+    fn isInitialized(self: FreeBlock) bool {
+        return self.data.len > 0;
+    }
+
+    fn getBit(self: FreeBlock, idx: usize) PageStatus {
+        const bit_offset = 0;
+        return @intToEnum(PageStatus, Io.get(mem.sliceAsBytes(self.data), idx, bit_offset));
+    }
+
+    fn setBits(self: FreeBlock, start_idx: usize, len: usize, val: PageStatus) void {
+        const bit_offset = 0;
+        var i: usize = 0;
+        while (i < len) : (i += 1) {
+            Io.set(mem.sliceAsBytes(self.data), start_idx + i, bit_offset, @enumToInt(val));
+        }
+    }
+
+    // Use '0xFFFFFFFF' as a _missing_ sentinel
+    // This saves ~50 bytes compared to returning a nullable
+
+    // We can guarantee that conventional memory never gets this big,
+    // and wasm32 would not be able to address this memory (32 GB > usize).
+
+    // Revisit if this is settled: https://github.com/ziglang/zig/issues/3806
+    const not_found = maxInt(usize);
+
+    fn useRecycled(self: FreeBlock, num_pages: usize, log2_align: u8) usize {
+        @setCold(true);
+        for (self.data) |segment, i| {
+            const spills_into_next = @bitCast(i128, segment) < 0;
+            const has_enough_bits = @popCount(segment) >= num_pages;
+
+            if (!spills_into_next and !has_enough_bits) continue;
+
+            var j: usize = i * 128;
+            while (j < (i + 1) * 128) : (j += 1) {
+                var count: usize = 0;
+                while (j + count < self.totalPages() and self.getBit(j + count) == .free) {
+                    count += 1;
+                    const addr = j * mem.page_size;
+                    if (count >= num_pages and mem.isAlignedLog2(addr, log2_align)) {
+                        self.setBits(j, num_pages, .used);
+                        return j;
+                    }
+                }
+                j += count;
+            }
+        }
+        return not_found;
+    }
+
+    fn recycle(self: FreeBlock, start_idx: usize, len: usize) void {
+        self.setBits(start_idx, len, .free);
+    }
+};
+
+var _conventional_data = [_]u128{0} ** 16;
+// Marking `conventional` as const saves ~40 bytes
+const conventional = FreeBlock{ .data = &_conventional_data };
+var extended = FreeBlock{ .data = &[_]u128{} };
+
+fn extendedOffset() usize {
+    return conventional.totalPages();
+}
+
+fn nPages(memsize: usize) usize {
+    return mem.alignForward(memsize, mem.page_size) / mem.page_size;
+}
+
+fn alloc(ctx: *anyopaque, len: usize, log2_align: u8, ra: usize) ?[*]u8 {
+    _ = ctx;
+    _ = ra;
+    if (len > maxInt(usize) - (mem.page_size - 1)) return null;
+    const page_count = nPages(len);
+    const page_idx = allocPages(page_count, log2_align) catch return null;
+    return @intToPtr([*]u8, page_idx * mem.page_size);
+}
+
+fn allocPages(page_count: usize, log2_align: u8) !usize {
+    {
+        const idx = conventional.useRecycled(page_count, log2_align);
+        if (idx != FreeBlock.not_found) {
+            return idx;
+        }
+    }
+
+    const idx = extended.useRecycled(page_count, log2_align);
+    if (idx != FreeBlock.not_found) {
+        return idx + extendedOffset();
+    }
+
+    const next_page_idx = @wasmMemorySize(0);
+    const next_page_addr = next_page_idx * mem.page_size;
+    const aligned_addr = mem.alignForwardLog2(next_page_addr, log2_align);
+    const drop_page_count = @divExact(aligned_addr - next_page_addr, mem.page_size);
+    const result = @wasmMemoryGrow(0, @intCast(u32, drop_page_count + page_count));
+    if (result <= 0)
+        return error.OutOfMemory;
+    assert(result == next_page_idx);
+    const aligned_page_idx = next_page_idx + drop_page_count;
+    if (drop_page_count > 0) {
+        freePages(next_page_idx, aligned_page_idx);
+    }
+    return @intCast(usize, aligned_page_idx);
+}
+
+fn freePages(start: usize, end: usize) void {
+    if (start < extendedOffset()) {
+        conventional.recycle(start, @min(extendedOffset(), end) - start);
+    }
+    if (end > extendedOffset()) {
+        var new_end = end;
+        if (!extended.isInitialized()) {
+            // Steal the last page from the memory currently being recycled
+            // TODO: would it be better if we use the first page instead?
+            new_end -= 1;
+
+            extended.data = @intToPtr([*]u128, new_end * mem.page_size)[0 .. mem.page_size / @sizeOf(u128)];
+            // Since this is the first page being freed and we consume it, assume *nothing* is free.
+            mem.set(u128, extended.data, PageStatus.none_free);
+        }
+        const clamped_start = @max(extendedOffset(), start);
+        extended.recycle(clamped_start - extendedOffset(), new_end - clamped_start);
+    }
+}
+
+fn resize(
+    ctx: *anyopaque,
+    buf: []u8,
+    log2_buf_align: u8,
+    new_len: usize,
+    return_address: usize,
+) bool {
+    _ = ctx;
+    _ = log2_buf_align;
+    _ = return_address;
+    const aligned_len = mem.alignForward(buf.len, mem.page_size);
+    if (new_len > aligned_len) return false;
+    const current_n = nPages(aligned_len);
+    const new_n = nPages(new_len);
+    if (new_n != current_n) {
+        const base = nPages(@ptrToInt(buf.ptr));
+        freePages(base + new_n, base + current_n);
+    }
+    return true;
+}
+
+fn free(
+    ctx: *anyopaque,
+    buf: []u8,
+    log2_buf_align: u8,
+    return_address: usize,
+) void {
+    _ = ctx;
+    _ = log2_buf_align;
+    _ = return_address;
+    const aligned_len = mem.alignForward(buf.len, mem.page_size);
+    const current_n = nPages(aligned_len);
+    const base = nPages(@ptrToInt(buf.ptr));
+    freePages(base, base + current_n);
+}
+
+test "internals" {
+    const page_allocator = std.heap.page_allocator;
+    const testing = std.testing;
+
+    const conventional_memsize = WasmPageAllocator.conventional.totalPages() * mem.page_size;
+    const initial = try page_allocator.alloc(u8, mem.page_size);
+    try testing.expect(@ptrToInt(initial.ptr) < conventional_memsize); // If this isn't conventional, the rest of these tests don't make sense. Also we have a serious memory leak in the test suite.
+
+    var inplace = try page_allocator.realloc(initial, 1);
+    try testing.expectEqual(initial.ptr, inplace.ptr);
+    inplace = try page_allocator.realloc(inplace, 4);
+    try testing.expectEqual(initial.ptr, inplace.ptr);
+    page_allocator.free(inplace);
+
+    const reuse = try page_allocator.alloc(u8, 1);
+    try testing.expectEqual(initial.ptr, reuse.ptr);
+    page_allocator.free(reuse);
+
+    // This segment may span conventional and extended which has really complex rules so we're just ignoring it for now.
+    const padding = try page_allocator.alloc(u8, conventional_memsize);
+    page_allocator.free(padding);
+
+    const ext = try page_allocator.alloc(u8, conventional_memsize);
+    try testing.expect(@ptrToInt(ext.ptr) >= conventional_memsize);
+
+    const use_small = try page_allocator.alloc(u8, 1);
+    try testing.expectEqual(initial.ptr, use_small.ptr);
+    page_allocator.free(use_small);
+
+    inplace = try page_allocator.realloc(ext, 1);
+    try testing.expectEqual(ext.ptr, inplace.ptr);
+    page_allocator.free(inplace);
+
+    const reuse_extended = try page_allocator.alloc(u8, conventional_memsize);
+    try testing.expectEqual(ext.ptr, reuse_extended.ptr);
+    page_allocator.free(reuse_extended);
+}