mirror of
https://github.com/ziglang/zig.git
synced 2024-11-30 17:12:31 +00:00
637 lines
22 KiB
Zig
637 lines
22 KiB
Zig
//! The 7-bit [ASCII](https://en.wikipedia.org/wiki/ASCII) character encoding standard.
|
|
//!
|
|
//! This is not to be confused with the 8-bit [extended ASCII](https://en.wikipedia.org/wiki/Extended_ASCII) character encoding.
|
|
//!
|
|
//! Even though this module concerns itself with 7-bit ASCII,
|
|
//! functions use `u8` as the type instead of `u7` for convenience and compatibility.
|
|
//! Characters outside of the 7-bit range are gracefully handled (e.g. by returning `false`).
|
|
//!
|
|
//! See also: https://en.wikipedia.org/wiki/ASCII#Character_set
|
|
|
|
const std = @import("std");
|
|
|
|
// TODO: remove all decls marked as DEPRECATED after 0.10.0's release
|
|
|
|
/// The C0 control codes of the ASCII encoding.
|
|
///
|
|
/// See also: https://en.wikipedia.org/wiki/C0_and_C1_control_codes and `isControl`.
|
|
pub const control_code = struct {
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const NUL = 0x00;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const SOH = 0x01;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const STX = 0x02;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const ETX = 0x03;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const EOT = 0x04;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const ENQ = 0x05;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const ACK = 0x06;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const BEL = 0x07;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const BS = 0x08;
|
|
// DEPRECATED: use `ht`
|
|
pub const TAB = 0x09;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const LF = 0x0A;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const VT = 0x0B;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const FF = 0x0C;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const CR = 0x0D;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const SO = 0x0E;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const SI = 0x0F;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const DLE = 0x10;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const DC1 = 0x11;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const DC2 = 0x12;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const DC3 = 0x13;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const DC4 = 0x14;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const NAK = 0x15;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const SYN = 0x16;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const ETB = 0x17;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const CAN = 0x18;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const EM = 0x19;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const SUB = 0x1A;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const ESC = 0x1B;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const FS = 0x1C;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const GS = 0x1D;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const RS = 0x1E;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const US = 0x1F;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const DEL = 0x7F;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const XON = 0x11;
|
|
// DEPRECATED: use the lowercase variant
|
|
pub const XOFF = 0x13;
|
|
|
|
/// Null.
|
|
pub const nul = 0x00;
|
|
/// Start of Heading.
|
|
pub const soh = 0x01;
|
|
/// Start of Text.
|
|
pub const stx = 0x02;
|
|
/// End of Text.
|
|
pub const etx = 0x03;
|
|
/// End of Transmission.
|
|
pub const eot = 0x04;
|
|
/// Enquiry.
|
|
pub const enq = 0x05;
|
|
/// Acknowledge.
|
|
pub const ack = 0x06;
|
|
/// Bell, Alert.
|
|
pub const bel = 0x07;
|
|
/// Backspace.
|
|
pub const bs = 0x08;
|
|
/// Horizontal Tab, Tab ('\t').
|
|
pub const ht = 0x09;
|
|
/// Line Feed, Newline ('\n').
|
|
pub const lf = 0x0A;
|
|
/// Vertical Tab.
|
|
pub const vt = 0x0B;
|
|
/// Form Feed.
|
|
pub const ff = 0x0C;
|
|
/// Carriage Return ('\r').
|
|
pub const cr = 0x0D;
|
|
/// Shift Out.
|
|
pub const so = 0x0E;
|
|
/// Shift In.
|
|
pub const si = 0x0F;
|
|
/// Data Link Escape.
|
|
pub const dle = 0x10;
|
|
/// Device Control One (XON).
|
|
pub const dc1 = 0x11;
|
|
/// Device Control Two.
|
|
pub const dc2 = 0x12;
|
|
/// Device Control Three (XOFF).
|
|
pub const dc3 = 0x13;
|
|
/// Device Control Four.
|
|
pub const dc4 = 0x14;
|
|
/// Negative Acknowledge.
|
|
pub const nak = 0x15;
|
|
/// Synchronous Idle.
|
|
pub const syn = 0x16;
|
|
/// End of Transmission Block
|
|
pub const etb = 0x17;
|
|
/// Cancel.
|
|
pub const can = 0x18;
|
|
/// End of Medium.
|
|
pub const em = 0x19;
|
|
/// Substitute.
|
|
pub const sub = 0x1A;
|
|
/// Escape.
|
|
pub const esc = 0x1B;
|
|
/// File Separator.
|
|
pub const fs = 0x1C;
|
|
/// Group Separator.
|
|
pub const gs = 0x1D;
|
|
/// Record Separator.
|
|
pub const rs = 0x1E;
|
|
/// Unit Separator.
|
|
pub const us = 0x1F;
|
|
|
|
/// Delete.
|
|
pub const del = 0x7F;
|
|
|
|
/// An alias to `dc1`.
|
|
pub const xon = dc1;
|
|
/// An alias to `dc3`.
|
|
pub const xoff = dc3;
|
|
};
|
|
|
|
const tIndex = enum(u3) {
|
|
Alpha,
|
|
Hex,
|
|
Space,
|
|
Digit,
|
|
Lower,
|
|
Upper,
|
|
// Ctrl, < 0x20 || == DEL
|
|
// Print, = Graph || == ' '. NOT '\t' et cetera
|
|
Punct,
|
|
Graph,
|
|
//ASCII, | ~0b01111111
|
|
//isBlank, == ' ' || == '\x09'
|
|
};
|
|
|
|
const combinedTable = init: {
|
|
comptime var table: [256]u8 = undefined;
|
|
|
|
const mem = std.mem;
|
|
|
|
const alpha = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
};
|
|
const lower = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
};
|
|
const upper = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const digit = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const hex = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const space = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
};
|
|
const punct = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
|
|
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
|
|
};
|
|
const graph = [_]u1{
|
|
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
|
};
|
|
|
|
comptime var i = 0;
|
|
inline while (i < 128) : (i += 1) {
|
|
table[i] =
|
|
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
|
|
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
|
|
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
|
|
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
|
|
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
|
|
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
|
|
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
|
|
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
|
|
}
|
|
mem.set(u8, table[128..256], 0);
|
|
break :init table;
|
|
};
|
|
|
|
fn inTable(c: u8, t: tIndex) bool {
|
|
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
|
|
}
|
|
|
|
/// DEPRECATED: use `isAlphanumeric`
|
|
pub const isAlNum = isAlphanumeric;
|
|
/// DEPRECATED: use `isAlphabetic`
|
|
pub const isAlpha = isAlphabetic;
|
|
/// DEPRECATED: use `isControl`
|
|
pub const isCntrl = isControl;
|
|
/// DEPRECATED: use `isWhitespace`.
|
|
pub const isSpace = isWhitespace;
|
|
/// DEPRECATED: use `whitespace`.
|
|
pub const spaces = whitespace;
|
|
/// DEPRECATED: use `isHex`.
|
|
pub const isXDigit = isHex;
|
|
|
|
/// Returns whether the character is alphanumeric.
|
|
pub fn isAlphanumeric(c: u8) bool {
|
|
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
|
|
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
|
|
}
|
|
|
|
/// Returns whether the character is alphabetic.
|
|
pub fn isAlphabetic(c: u8) bool {
|
|
return inTable(c, tIndex.Alpha);
|
|
}
|
|
|
|
/// Returns whether the character is a control character.
|
|
/// This is the same as `!isPrint(c)`.
|
|
///
|
|
/// See also: `control_code`.
|
|
pub fn isControl(c: u8) bool {
|
|
return c <= control_code.us or c == control_code.del;
|
|
}
|
|
|
|
/// Returns whether the character is a digit.
|
|
pub fn isDigit(c: u8) bool {
|
|
return inTable(c, tIndex.Digit);
|
|
}
|
|
|
|
/// DEPRECATED: use `isPrint(c) and c != ' '` instead
|
|
pub fn isGraph(c: u8) bool {
|
|
return inTable(c, tIndex.Graph);
|
|
}
|
|
|
|
/// Returns whether the character is a lowercased letter.
|
|
pub fn isLower(c: u8) bool {
|
|
return inTable(c, tIndex.Lower);
|
|
}
|
|
|
|
/// Returns whether the character has some graphical representation and can be printed.
|
|
/// This also returns `true` for the space character.
|
|
/// This is the same as `!isControl(c)`.
|
|
pub fn isPrint(c: u8) bool {
|
|
return inTable(c, tIndex.Graph) or c == ' ';
|
|
}
|
|
|
|
/// DEPRECATED: create your own function based on your needs and what you want to do.
|
|
pub fn isPunct(c: u8) bool {
|
|
return inTable(c, tIndex.Punct);
|
|
}
|
|
|
|
/// Returns whether this character is included in `whitespace`.
|
|
pub fn isWhitespace(c: u8) bool {
|
|
return inTable(c, tIndex.Space);
|
|
}
|
|
|
|
/// Whitespace for general use.
|
|
/// This may be used with e.g. `std.mem.trim` to trim whitespace.
|
|
///
|
|
/// See also: `isWhitespace`.
|
|
pub const whitespace = [_]u8{ ' ', '\t', '\n', '\r', control_code.vt, control_code.ff };
|
|
|
|
test "whitespace" {
|
|
for (whitespace) |char| try std.testing.expect(isWhitespace(char));
|
|
|
|
var i: u8 = 0;
|
|
while (isASCII(i)) : (i += 1) {
|
|
if (isWhitespace(i)) try std.testing.expect(std.mem.indexOfScalar(u8, &whitespace, i) != null);
|
|
}
|
|
}
|
|
|
|
/// Returns whether the character is an uppercased letter.
|
|
pub fn isUpper(c: u8) bool {
|
|
return inTable(c, tIndex.Upper);
|
|
}
|
|
|
|
/// Returns whether the character is a hexadecimal digit. This is case-insensitive.
|
|
pub fn isHex(c: u8) bool {
|
|
return inTable(c, tIndex.Hex);
|
|
}
|
|
|
|
/// Returns whether the character is a 7-bit ASCII character.
|
|
pub fn isASCII(c: u8) bool {
|
|
return c < 128;
|
|
}
|
|
|
|
/// DEPRECATED: use `c == ' ' or c == '\t'` or try `isWhitespace`
|
|
pub fn isBlank(c: u8) bool {
|
|
return (c == ' ') or (c == '\x09');
|
|
}
|
|
|
|
/// Uppercases the character and returns it as-is if it's already uppercased or not a letter.
|
|
pub fn toUpper(c: u8) u8 {
|
|
if (isLower(c)) {
|
|
return c & 0b11011111;
|
|
} else {
|
|
return c;
|
|
}
|
|
}
|
|
|
|
/// Lowercases the character and returns it as-is if it's already lowercased or not a letter.
|
|
pub fn toLower(c: u8) u8 {
|
|
if (isUpper(c)) {
|
|
return c | 0b00100000;
|
|
} else {
|
|
return c;
|
|
}
|
|
}
|
|
|
|
test "ascii character classes" {
|
|
const testing = std.testing;
|
|
|
|
try testing.expect(!isControl('a'));
|
|
try testing.expect(!isControl('z'));
|
|
try testing.expect(isControl(control_code.nul));
|
|
try testing.expect(isControl(control_code.ff));
|
|
try testing.expect(isControl(control_code.us));
|
|
|
|
try testing.expect('C' == toUpper('c'));
|
|
try testing.expect(':' == toUpper(':'));
|
|
try testing.expect('\xab' == toUpper('\xab'));
|
|
try testing.expect(!isUpper('z'));
|
|
|
|
try testing.expect('c' == toLower('C'));
|
|
try testing.expect(':' == toLower(':'));
|
|
try testing.expect('\xab' == toLower('\xab'));
|
|
try testing.expect(!isLower('Z'));
|
|
|
|
try testing.expect(isAlphanumeric('Z'));
|
|
try testing.expect(isAlphanumeric('z'));
|
|
try testing.expect(isAlphanumeric('5'));
|
|
try testing.expect(isAlphanumeric('5'));
|
|
try testing.expect(!isAlphanumeric('!'));
|
|
|
|
try testing.expect(!isAlpha('5'));
|
|
try testing.expect(isAlpha('c'));
|
|
try testing.expect(!isAlpha('5'));
|
|
|
|
try testing.expect(isWhitespace(' '));
|
|
try testing.expect(isWhitespace('\t'));
|
|
try testing.expect(isWhitespace('\r'));
|
|
try testing.expect(isWhitespace('\n'));
|
|
try testing.expect(!isWhitespace('.'));
|
|
|
|
try testing.expect(!isHex('g'));
|
|
try testing.expect(isHex('b'));
|
|
try testing.expect(isHex('9'));
|
|
|
|
try testing.expect(!isDigit('~'));
|
|
try testing.expect(isDigit('0'));
|
|
try testing.expect(isDigit('9'));
|
|
|
|
try testing.expect(isPrint(' '));
|
|
try testing.expect(isPrint('@'));
|
|
try testing.expect(isPrint('~'));
|
|
try testing.expect(!isPrint(control_code.esc));
|
|
}
|
|
|
|
/// Writes a lower case copy of `ascii_string` to `output`.
|
|
/// Asserts `output.len >= ascii_string.len`.
|
|
pub fn lowerString(output: []u8, ascii_string: []const u8) []u8 {
|
|
std.debug.assert(output.len >= ascii_string.len);
|
|
for (ascii_string) |c, i| {
|
|
output[i] = toLower(c);
|
|
}
|
|
return output[0..ascii_string.len];
|
|
}
|
|
|
|
test "lowerString" {
|
|
var buf: [1024]u8 = undefined;
|
|
const result = lowerString(&buf, "aBcDeFgHiJkLmNOPqrst0234+💩!");
|
|
try std.testing.expectEqualStrings("abcdefghijklmnopqrst0234+💩!", result);
|
|
}
|
|
|
|
/// Allocates a lower case copy of `ascii_string`.
|
|
/// Caller owns returned string and must free with `allocator`.
|
|
pub fn allocLowerString(allocator: std.mem.Allocator, ascii_string: []const u8) ![]u8 {
|
|
const result = try allocator.alloc(u8, ascii_string.len);
|
|
return lowerString(result, ascii_string);
|
|
}
|
|
|
|
test "allocLowerString" {
|
|
const result = try allocLowerString(std.testing.allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!");
|
|
defer std.testing.allocator.free(result);
|
|
try std.testing.expectEqualStrings("abcdefghijklmnopqrst0234+💩!", result);
|
|
}
|
|
|
|
/// Writes an upper case copy of `ascii_string` to `output`.
|
|
/// Asserts `output.len >= ascii_string.len`.
|
|
pub fn upperString(output: []u8, ascii_string: []const u8) []u8 {
|
|
std.debug.assert(output.len >= ascii_string.len);
|
|
for (ascii_string) |c, i| {
|
|
output[i] = toUpper(c);
|
|
}
|
|
return output[0..ascii_string.len];
|
|
}
|
|
|
|
test "upperString" {
|
|
var buf: [1024]u8 = undefined;
|
|
const result = upperString(&buf, "aBcDeFgHiJkLmNOPqrst0234+💩!");
|
|
try std.testing.expectEqualStrings("ABCDEFGHIJKLMNOPQRST0234+💩!", result);
|
|
}
|
|
|
|
/// Allocates an upper case copy of `ascii_string`.
|
|
/// Caller owns returned string and must free with `allocator`.
|
|
pub fn allocUpperString(allocator: std.mem.Allocator, ascii_string: []const u8) ![]u8 {
|
|
const result = try allocator.alloc(u8, ascii_string.len);
|
|
return upperString(result, ascii_string);
|
|
}
|
|
|
|
test "allocUpperString" {
|
|
const result = try allocUpperString(std.testing.allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!");
|
|
defer std.testing.allocator.free(result);
|
|
try std.testing.expectEqualStrings("ABCDEFGHIJKLMNOPQRST0234+💩!", result);
|
|
}
|
|
|
|
/// Compares strings `a` and `b` case-insensitively and returns whether they are equal.
|
|
pub fn eqlIgnoreCase(a: []const u8, b: []const u8) bool {
|
|
if (a.len != b.len) return false;
|
|
for (a) |a_c, i| {
|
|
if (toLower(a_c) != toLower(b[i])) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
test "eqlIgnoreCase" {
|
|
try std.testing.expect(eqlIgnoreCase("HEl💩Lo!", "hel💩lo!"));
|
|
try std.testing.expect(!eqlIgnoreCase("hElLo!", "hello! "));
|
|
try std.testing.expect(!eqlIgnoreCase("hElLo!", "helro!"));
|
|
}
|
|
|
|
pub fn startsWithIgnoreCase(haystack: []const u8, needle: []const u8) bool {
|
|
return if (needle.len > haystack.len) false else eqlIgnoreCase(haystack[0..needle.len], needle);
|
|
}
|
|
|
|
test "ascii.startsWithIgnoreCase" {
|
|
try std.testing.expect(startsWithIgnoreCase("boB", "Bo"));
|
|
try std.testing.expect(!startsWithIgnoreCase("Needle in hAyStAcK", "haystack"));
|
|
}
|
|
|
|
pub fn endsWithIgnoreCase(haystack: []const u8, needle: []const u8) bool {
|
|
return if (needle.len > haystack.len) false else eqlIgnoreCase(haystack[haystack.len - needle.len ..], needle);
|
|
}
|
|
|
|
test "ascii.endsWithIgnoreCase" {
|
|
try std.testing.expect(endsWithIgnoreCase("Needle in HaYsTaCk", "haystack"));
|
|
try std.testing.expect(!endsWithIgnoreCase("BoB", "Bo"));
|
|
}
|
|
|
|
/// Finds `needle` in `haystack`, ignoring case, starting at index 0.
|
|
pub fn indexOfIgnoreCase(haystack: []const u8, needle: []const u8) ?usize {
|
|
return indexOfIgnoreCasePos(haystack, 0, needle);
|
|
}
|
|
|
|
/// Finds `needle` in `haystack`, ignoring case, starting at `start_index`.
|
|
/// Uses Boyer-Moore-Horspool algorithm on large inputs; `indexOfIgnoreCasePosLinear` on small inputs.
|
|
pub fn indexOfIgnoreCasePos(haystack: []const u8, start_index: usize, needle: []const u8) ?usize {
|
|
if (needle.len > haystack.len) return null;
|
|
if (needle.len == 0) return start_index;
|
|
|
|
if (haystack.len < 52 or needle.len <= 4)
|
|
return indexOfIgnoreCasePosLinear(haystack, start_index, needle);
|
|
|
|
var skip_table: [256]usize = undefined;
|
|
boyerMooreHorspoolPreprocessIgnoreCase(needle, skip_table[0..]);
|
|
|
|
var i: usize = start_index;
|
|
while (i <= haystack.len - needle.len) {
|
|
if (eqlIgnoreCase(haystack[i .. i + needle.len], needle)) return i;
|
|
i += skip_table[toLower(haystack[i + needle.len - 1])];
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
/// Consider using `indexOfIgnoreCasePos` instead of this, which will automatically use a
|
|
/// more sophisticated algorithm on larger inputs.
|
|
pub fn indexOfIgnoreCasePosLinear(haystack: []const u8, start_index: usize, needle: []const u8) ?usize {
|
|
var i: usize = start_index;
|
|
const end = haystack.len - needle.len;
|
|
while (i <= end) : (i += 1) {
|
|
if (eqlIgnoreCase(haystack[i .. i + needle.len], needle)) return i;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
fn boyerMooreHorspoolPreprocessIgnoreCase(pattern: []const u8, table: *[256]usize) void {
|
|
for (table) |*c| {
|
|
c.* = pattern.len;
|
|
}
|
|
|
|
var i: usize = 0;
|
|
// The last item is intentionally ignored and the skip size will be pattern.len.
|
|
// This is the standard way Boyer-Moore-Horspool is implemented.
|
|
while (i < pattern.len - 1) : (i += 1) {
|
|
table[toLower(pattern[i])] = pattern.len - 1 - i;
|
|
}
|
|
}
|
|
|
|
test "indexOfIgnoreCase" {
|
|
try std.testing.expect(indexOfIgnoreCase("one Two Three Four", "foUr").? == 14);
|
|
try std.testing.expect(indexOfIgnoreCase("one two three FouR", "gOur") == null);
|
|
try std.testing.expect(indexOfIgnoreCase("foO", "Foo").? == 0);
|
|
try std.testing.expect(indexOfIgnoreCase("foo", "fool") == null);
|
|
try std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0);
|
|
|
|
try std.testing.expect(indexOfIgnoreCase("one two three four five six seven eight nine ten eleven", "ThReE fOUr").? == 8);
|
|
try std.testing.expect(indexOfIgnoreCase("one two three four five six seven eight nine ten eleven", "Two tWo") == null);
|
|
}
|
|
|
|
/// Returns the lexicographical order of two slices. O(n).
|
|
pub fn orderIgnoreCase(lhs: []const u8, rhs: []const u8) std.math.Order {
|
|
const n = std.math.min(lhs.len, rhs.len);
|
|
var i: usize = 0;
|
|
while (i < n) : (i += 1) {
|
|
switch (std.math.order(toLower(lhs[i]), toLower(rhs[i]))) {
|
|
.eq => continue,
|
|
.lt => return .lt,
|
|
.gt => return .gt,
|
|
}
|
|
}
|
|
return std.math.order(lhs.len, rhs.len);
|
|
}
|
|
|
|
/// Returns whether the lexicographical order of `lhs` is lower than `rhs`.
|
|
pub fn lessThanIgnoreCase(lhs: []const u8, rhs: []const u8) bool {
|
|
return orderIgnoreCase(lhs, rhs) == .lt;
|
|
}
|