diff --git a/lib/std/fmt.zig b/lib/std/fmt.zig index df6aac2e21..6bb7e26da8 100644 --- a/lib/std/fmt.zig +++ b/lib/std/fmt.zig @@ -23,7 +23,7 @@ pub const FormatOptions = struct { precision: ?usize = null, width: ?usize = null, alignment: Alignment = .right, - fill: u8 = ' ', + fill: u21 = ' ', }; /// Renders fmt string with args, calling `writer` with slices of bytes. @@ -211,14 +211,18 @@ fn cacheString(str: anytype) []const u8 { pub const Placeholder = struct { specifier_arg: []const u8, - fill: u8, + fill: u21, alignment: Alignment, arg: Specifier, width: Specifier, precision: Specifier, pub fn parse(comptime str: anytype) Placeholder { - comptime var parser = Parser{ .buf = &str }; + const view = std.unicode.Utf8View.initComptime(&str); + comptime var parser = Parser{ + .buf = &str, + .iter = view.iterator(), + }; // Parse the positional argument number const arg = comptime parser.specifier() catch |err| @@ -230,7 +234,7 @@ pub const Placeholder = struct { // Skip the colon, if present if (comptime parser.char()) |ch| { if (ch != ':') { - @compileError("expected : or }, found '" ++ [1]u8{ch} ++ "'"); + @compileError("expected : or }, found '" ++ unicode.utf8EncodeComptime(ch) ++ "'"); } } @@ -265,7 +269,7 @@ pub const Placeholder = struct { // Skip the dot, if present if (comptime parser.char()) |ch| { if (ch != '.') { - @compileError("expected . or }, found '" ++ [1]u8{ch} ++ "'"); + @compileError("expected . or }, found '" ++ unicode.utf8EncodeComptime(ch) ++ "'"); } } @@ -274,7 +278,7 @@ pub const Placeholder = struct { @compileError(@errorName(err)); if (comptime parser.char()) |ch| { - @compileError("extraneous trailing character '" ++ [1]u8{ch} ++ "'"); + @compileError("extraneous trailing character '" ++ unicode.utf8EncodeComptime(ch) ++ "'"); } return Placeholder{ @@ -297,21 +301,23 @@ pub const Specifier = union(enum) { pub const Parser = struct { buf: []const u8, pos: usize = 0, + iter: std.unicode.Utf8Iterator = undefined, // Returns a decimal number or null if the current character is not a // digit pub fn number(self: *@This()) ?usize { var r: ?usize = null; - while (self.pos < self.buf.len) : (self.pos += 1) { - switch (self.buf[self.pos]) { + while (self.peek(0)) |code_point| { + switch (code_point) { '0'...'9' => { if (r == null) r = 0; r.? *= 10; - r.? += self.buf[self.pos] - '0'; + r.? += code_point - '0'; }, else => break, } + _ = self.iter.nextCodepoint(); } return r; @@ -319,31 +325,27 @@ pub const Parser = struct { // Returns a substring of the input starting from the current position // and ending where `ch` is found or until the end if not found - pub fn until(self: *@This(), ch: u8) []const u8 { - const start = self.pos; - - if (start >= self.buf.len) - return &[_]u8{}; - - while (self.pos < self.buf.len) : (self.pos += 1) { - if (self.buf[self.pos] == ch) break; + pub fn until(self: *@This(), ch: u21) []const u8 { + var result: []const u8 = &[_]u8{}; + while (self.peek(0)) |code_point| { + if (code_point == ch) + break; + result = result ++ (self.iter.nextCodepointSlice() orelse &[_]u8{}); } - return self.buf[start..self.pos]; + return result; } // Returns one character, if available - pub fn char(self: *@This()) ?u8 { - if (self.pos < self.buf.len) { - const ch = self.buf[self.pos]; - self.pos += 1; - return ch; + pub fn char(self: *@This()) ?u21 { + if (self.iter.nextCodepoint()) |code_point| { + return code_point; } return null; } - pub fn maybe(self: *@This(), val: u8) bool { - if (self.pos < self.buf.len and self.buf[self.pos] == val) { - self.pos += 1; + pub fn maybe(self: *@This(), val: u21) bool { + if (self.peek(0) == val) { + _ = self.iter.nextCodepoint(); return true; } return false; @@ -367,8 +369,17 @@ pub const Parser = struct { } // Returns the n-th next character or null if that's past the end - pub fn peek(self: *@This(), n: usize) ?u8 { - return if (self.pos + n < self.buf.len) self.buf[self.pos + n] else null; + pub fn peek(self: *@This(), n: usize) ?u21 { + const original_i = self.iter.i; + defer self.iter.i = original_i; + + var i = 0; + var code_point: ?u21 = null; + while (i <= n) : (i += 1) { + code_point = self.iter.nextCodepoint(); + if (code_point == null) return null; + } + return code_point; } }; @@ -965,8 +976,7 @@ pub fn formatUnicodeCodepoint( var buf: [4]u8 = undefined; const len = unicode.utf8Encode(c, &buf) catch |err| switch (err) { error.Utf8CannotEncodeSurrogateHalf, error.CodepointTooLarge => { - const len = unicode.utf8Encode(unicode.replacement_character, &buf) catch unreachable; - return formatBuf(buf[0..len], options, writer); + return formatBuf(&unicode.utf8EncodeComptime(unicode.replacement_character), options, writer); }, }; return formatBuf(buf[0..len], options, writer); @@ -985,20 +995,28 @@ pub fn formatBuf( if (padding == 0) return writer.writeAll(buf); + var fill_buffer: [4]u8 = undefined; + const fill_utf8 = if (unicode.utf8Encode(options.fill, &fill_buffer)) |len| + fill_buffer[0..len] + else |err| switch (err) { + error.Utf8CannotEncodeSurrogateHalf, + error.CodepointTooLarge, + => &unicode.utf8EncodeComptime(unicode.replacement_character), + }; switch (options.alignment) { .left => { try writer.writeAll(buf); - try writer.writeByteNTimes(options.fill, padding); + try writer.writeBytesNTimes(fill_utf8, padding); }, .center => { const left_padding = padding / 2; const right_padding = (padding + 1) / 2; - try writer.writeByteNTimes(options.fill, left_padding); + try writer.writeBytesNTimes(fill_utf8, left_padding); try writer.writeAll(buf); - try writer.writeByteNTimes(options.fill, right_padding); + try writer.writeBytesNTimes(fill_utf8, right_padding); }, .right => { - try writer.writeByteNTimes(options.fill, padding); + try writer.writeBytesNTimes(fill_utf8, padding); try writer.writeAll(buf); }, } @@ -2793,6 +2811,15 @@ test "padding" { try expectFmt("a====", "{c:=<5}", .{'a'}); } +test "padding fill char utf" { + try expectFmt("──crêpe───", "{s:─^10}", .{"crêpe"}); + try expectFmt("─────crêpe", "{s:─>10}", .{"crêpe"}); + try expectFmt("crêpe─────", "{s:─<10}", .{"crêpe"}); + try expectFmt("────a", "{c:─>5}", .{'a'}); + try expectFmt("──a──", "{c:─^5}", .{'a'}); + try expectFmt("a────", "{c:─<5}", .{'a'}); +} + test "decimal float padding" { const number: f32 = 3.1415; try expectFmt("left-pad: **3.141\n", "left-pad: {d:*>7.3}\n", .{number}); diff --git a/lib/std/io/writer.zig b/lib/std/io/writer.zig index f1c0efda90..5fe06120cd 100644 --- a/lib/std/io/writer.zig +++ b/lib/std/io/writer.zig @@ -45,6 +45,13 @@ pub fn Writer( } } + pub fn writeBytesNTimes(self: Self, bytes: []const u8, n: usize) Error!void { + var i: usize = 0; + while (i < n) : (i += 1) { + try self.writeAll(bytes); + } + } + pub inline fn writeInt(self: Self, comptime T: type, value: T, endian: std.builtin.Endian) Error!void { var bytes: [@divExact(@typeInfo(T).Int.bits, 8)]u8 = undefined; mem.writeInt(std.math.ByteAlignedInt(@TypeOf(value)), &bytes, value, endian); diff --git a/lib/std/unicode.zig b/lib/std/unicode.zig index eacb62613a..8aae6a1b5f 100644 --- a/lib/std/unicode.zig +++ b/lib/std/unicode.zig @@ -69,6 +69,19 @@ pub fn utf8Encode(c: u21, out: []u8) !u3 { return length; } +pub inline fn utf8EncodeComptime(comptime c: u21) [ + utf8CodepointSequenceLength(c) catch |err| + @compileError(@errorName(err)) +]u8 { + comptime var result: [ + utf8CodepointSequenceLength(c) catch + unreachable + ]u8 = undefined; + comptime assert((utf8Encode(c, &result) catch |err| + @compileError(@errorName(err))) == result.len); + return result; +} + const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error; /// Decodes the UTF-8 codepoint encoded in the given slice of bytes. @@ -525,6 +538,13 @@ fn testUtf8Encode() !void { try testing.expect(array[3] == 0b10001000); } +test "utf8 encode comptime" { + try testing.expectEqualSlices(u8, "€", &utf8EncodeComptime('€')); + try testing.expectEqualSlices(u8, "$", &utf8EncodeComptime('$')); + try testing.expectEqualSlices(u8, "¢", &utf8EncodeComptime('¢')); + try testing.expectEqualSlices(u8, "𐍈", &utf8EncodeComptime('𐍈')); +} + test "utf8 encode error" { try comptime testUtf8EncodeError(); try testUtf8EncodeError();