mirror of
https://github.com/ziglang/zig.git
synced 2024-11-28 08:02:32 +00:00
Fix fmt UTF-8 characters as fill (#18533)
Co-authored-by: Jacob Young <jacobly0@users.noreply.github.com>
This commit is contained in:
parent
b723296e1f
commit
279607cae5
@ -23,7 +23,7 @@ pub const FormatOptions = struct {
|
||||
precision: ?usize = null,
|
||||
width: ?usize = null,
|
||||
alignment: Alignment = .right,
|
||||
fill: u8 = ' ',
|
||||
fill: u21 = ' ',
|
||||
};
|
||||
|
||||
/// Renders fmt string with args, calling `writer` with slices of bytes.
|
||||
@ -211,14 +211,18 @@ fn cacheString(str: anytype) []const u8 {
|
||||
|
||||
pub const Placeholder = struct {
|
||||
specifier_arg: []const u8,
|
||||
fill: u8,
|
||||
fill: u21,
|
||||
alignment: Alignment,
|
||||
arg: Specifier,
|
||||
width: Specifier,
|
||||
precision: Specifier,
|
||||
|
||||
pub fn parse(comptime str: anytype) Placeholder {
|
||||
comptime var parser = Parser{ .buf = &str };
|
||||
const view = std.unicode.Utf8View.initComptime(&str);
|
||||
comptime var parser = Parser{
|
||||
.buf = &str,
|
||||
.iter = view.iterator(),
|
||||
};
|
||||
|
||||
// Parse the positional argument number
|
||||
const arg = comptime parser.specifier() catch |err|
|
||||
@ -230,7 +234,7 @@ pub const Placeholder = struct {
|
||||
// Skip the colon, if present
|
||||
if (comptime parser.char()) |ch| {
|
||||
if (ch != ':') {
|
||||
@compileError("expected : or }, found '" ++ [1]u8{ch} ++ "'");
|
||||
@compileError("expected : or }, found '" ++ unicode.utf8EncodeComptime(ch) ++ "'");
|
||||
}
|
||||
}
|
||||
|
||||
@ -265,7 +269,7 @@ pub const Placeholder = struct {
|
||||
// Skip the dot, if present
|
||||
if (comptime parser.char()) |ch| {
|
||||
if (ch != '.') {
|
||||
@compileError("expected . or }, found '" ++ [1]u8{ch} ++ "'");
|
||||
@compileError("expected . or }, found '" ++ unicode.utf8EncodeComptime(ch) ++ "'");
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,7 +278,7 @@ pub const Placeholder = struct {
|
||||
@compileError(@errorName(err));
|
||||
|
||||
if (comptime parser.char()) |ch| {
|
||||
@compileError("extraneous trailing character '" ++ [1]u8{ch} ++ "'");
|
||||
@compileError("extraneous trailing character '" ++ unicode.utf8EncodeComptime(ch) ++ "'");
|
||||
}
|
||||
|
||||
return Placeholder{
|
||||
@ -297,21 +301,23 @@ pub const Specifier = union(enum) {
|
||||
pub const Parser = struct {
|
||||
buf: []const u8,
|
||||
pos: usize = 0,
|
||||
iter: std.unicode.Utf8Iterator = undefined,
|
||||
|
||||
// Returns a decimal number or null if the current character is not a
|
||||
// digit
|
||||
pub fn number(self: *@This()) ?usize {
|
||||
var r: ?usize = null;
|
||||
|
||||
while (self.pos < self.buf.len) : (self.pos += 1) {
|
||||
switch (self.buf[self.pos]) {
|
||||
while (self.peek(0)) |code_point| {
|
||||
switch (code_point) {
|
||||
'0'...'9' => {
|
||||
if (r == null) r = 0;
|
||||
r.? *= 10;
|
||||
r.? += self.buf[self.pos] - '0';
|
||||
r.? += code_point - '0';
|
||||
},
|
||||
else => break,
|
||||
}
|
||||
_ = self.iter.nextCodepoint();
|
||||
}
|
||||
|
||||
return r;
|
||||
@ -319,31 +325,27 @@ pub const Parser = struct {
|
||||
|
||||
// Returns a substring of the input starting from the current position
|
||||
// and ending where `ch` is found or until the end if not found
|
||||
pub fn until(self: *@This(), ch: u8) []const u8 {
|
||||
const start = self.pos;
|
||||
|
||||
if (start >= self.buf.len)
|
||||
return &[_]u8{};
|
||||
|
||||
while (self.pos < self.buf.len) : (self.pos += 1) {
|
||||
if (self.buf[self.pos] == ch) break;
|
||||
pub fn until(self: *@This(), ch: u21) []const u8 {
|
||||
var result: []const u8 = &[_]u8{};
|
||||
while (self.peek(0)) |code_point| {
|
||||
if (code_point == ch)
|
||||
break;
|
||||
result = result ++ (self.iter.nextCodepointSlice() orelse &[_]u8{});
|
||||
}
|
||||
return self.buf[start..self.pos];
|
||||
return result;
|
||||
}
|
||||
|
||||
// Returns one character, if available
|
||||
pub fn char(self: *@This()) ?u8 {
|
||||
if (self.pos < self.buf.len) {
|
||||
const ch = self.buf[self.pos];
|
||||
self.pos += 1;
|
||||
return ch;
|
||||
pub fn char(self: *@This()) ?u21 {
|
||||
if (self.iter.nextCodepoint()) |code_point| {
|
||||
return code_point;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
pub fn maybe(self: *@This(), val: u8) bool {
|
||||
if (self.pos < self.buf.len and self.buf[self.pos] == val) {
|
||||
self.pos += 1;
|
||||
pub fn maybe(self: *@This(), val: u21) bool {
|
||||
if (self.peek(0) == val) {
|
||||
_ = self.iter.nextCodepoint();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -367,8 +369,17 @@ pub const Parser = struct {
|
||||
}
|
||||
|
||||
// Returns the n-th next character or null if that's past the end
|
||||
pub fn peek(self: *@This(), n: usize) ?u8 {
|
||||
return if (self.pos + n < self.buf.len) self.buf[self.pos + n] else null;
|
||||
pub fn peek(self: *@This(), n: usize) ?u21 {
|
||||
const original_i = self.iter.i;
|
||||
defer self.iter.i = original_i;
|
||||
|
||||
var i = 0;
|
||||
var code_point: ?u21 = null;
|
||||
while (i <= n) : (i += 1) {
|
||||
code_point = self.iter.nextCodepoint();
|
||||
if (code_point == null) return null;
|
||||
}
|
||||
return code_point;
|
||||
}
|
||||
};
|
||||
|
||||
@ -965,8 +976,7 @@ pub fn formatUnicodeCodepoint(
|
||||
var buf: [4]u8 = undefined;
|
||||
const len = unicode.utf8Encode(c, &buf) catch |err| switch (err) {
|
||||
error.Utf8CannotEncodeSurrogateHalf, error.CodepointTooLarge => {
|
||||
const len = unicode.utf8Encode(unicode.replacement_character, &buf) catch unreachable;
|
||||
return formatBuf(buf[0..len], options, writer);
|
||||
return formatBuf(&unicode.utf8EncodeComptime(unicode.replacement_character), options, writer);
|
||||
},
|
||||
};
|
||||
return formatBuf(buf[0..len], options, writer);
|
||||
@ -985,20 +995,28 @@ pub fn formatBuf(
|
||||
if (padding == 0)
|
||||
return writer.writeAll(buf);
|
||||
|
||||
var fill_buffer: [4]u8 = undefined;
|
||||
const fill_utf8 = if (unicode.utf8Encode(options.fill, &fill_buffer)) |len|
|
||||
fill_buffer[0..len]
|
||||
else |err| switch (err) {
|
||||
error.Utf8CannotEncodeSurrogateHalf,
|
||||
error.CodepointTooLarge,
|
||||
=> &unicode.utf8EncodeComptime(unicode.replacement_character),
|
||||
};
|
||||
switch (options.alignment) {
|
||||
.left => {
|
||||
try writer.writeAll(buf);
|
||||
try writer.writeByteNTimes(options.fill, padding);
|
||||
try writer.writeBytesNTimes(fill_utf8, padding);
|
||||
},
|
||||
.center => {
|
||||
const left_padding = padding / 2;
|
||||
const right_padding = (padding + 1) / 2;
|
||||
try writer.writeByteNTimes(options.fill, left_padding);
|
||||
try writer.writeBytesNTimes(fill_utf8, left_padding);
|
||||
try writer.writeAll(buf);
|
||||
try writer.writeByteNTimes(options.fill, right_padding);
|
||||
try writer.writeBytesNTimes(fill_utf8, right_padding);
|
||||
},
|
||||
.right => {
|
||||
try writer.writeByteNTimes(options.fill, padding);
|
||||
try writer.writeBytesNTimes(fill_utf8, padding);
|
||||
try writer.writeAll(buf);
|
||||
},
|
||||
}
|
||||
@ -2793,6 +2811,15 @@ test "padding" {
|
||||
try expectFmt("a====", "{c:=<5}", .{'a'});
|
||||
}
|
||||
|
||||
test "padding fill char utf" {
|
||||
try expectFmt("──crêpe───", "{s:─^10}", .{"crêpe"});
|
||||
try expectFmt("─────crêpe", "{s:─>10}", .{"crêpe"});
|
||||
try expectFmt("crêpe─────", "{s:─<10}", .{"crêpe"});
|
||||
try expectFmt("────a", "{c:─>5}", .{'a'});
|
||||
try expectFmt("──a──", "{c:─^5}", .{'a'});
|
||||
try expectFmt("a────", "{c:─<5}", .{'a'});
|
||||
}
|
||||
|
||||
test "decimal float padding" {
|
||||
const number: f32 = 3.1415;
|
||||
try expectFmt("left-pad: **3.141\n", "left-pad: {d:*>7.3}\n", .{number});
|
||||
|
@ -45,6 +45,13 @@ pub fn Writer(
|
||||
}
|
||||
}
|
||||
|
||||
pub fn writeBytesNTimes(self: Self, bytes: []const u8, n: usize) Error!void {
|
||||
var i: usize = 0;
|
||||
while (i < n) : (i += 1) {
|
||||
try self.writeAll(bytes);
|
||||
}
|
||||
}
|
||||
|
||||
pub inline fn writeInt(self: Self, comptime T: type, value: T, endian: std.builtin.Endian) Error!void {
|
||||
var bytes: [@divExact(@typeInfo(T).Int.bits, 8)]u8 = undefined;
|
||||
mem.writeInt(std.math.ByteAlignedInt(@TypeOf(value)), &bytes, value, endian);
|
||||
|
@ -69,6 +69,19 @@ pub fn utf8Encode(c: u21, out: []u8) !u3 {
|
||||
return length;
|
||||
}
|
||||
|
||||
pub inline fn utf8EncodeComptime(comptime c: u21) [
|
||||
utf8CodepointSequenceLength(c) catch |err|
|
||||
@compileError(@errorName(err))
|
||||
]u8 {
|
||||
comptime var result: [
|
||||
utf8CodepointSequenceLength(c) catch
|
||||
unreachable
|
||||
]u8 = undefined;
|
||||
comptime assert((utf8Encode(c, &result) catch |err|
|
||||
@compileError(@errorName(err))) == result.len);
|
||||
return result;
|
||||
}
|
||||
|
||||
const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error;
|
||||
|
||||
/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
|
||||
@ -525,6 +538,13 @@ fn testUtf8Encode() !void {
|
||||
try testing.expect(array[3] == 0b10001000);
|
||||
}
|
||||
|
||||
test "utf8 encode comptime" {
|
||||
try testing.expectEqualSlices(u8, "€", &utf8EncodeComptime('€'));
|
||||
try testing.expectEqualSlices(u8, "$", &utf8EncodeComptime('$'));
|
||||
try testing.expectEqualSlices(u8, "¢", &utf8EncodeComptime('¢'));
|
||||
try testing.expectEqualSlices(u8, "𐍈", &utf8EncodeComptime('𐍈'));
|
||||
}
|
||||
|
||||
test "utf8 encode error" {
|
||||
try comptime testUtf8EncodeError();
|
||||
try testUtf8EncodeError();
|
||||
|
Loading…
Reference in New Issue
Block a user