zig/lib/std/ascii.zig

284 lines
9.2 KiB
Zig
Raw Normal View History

std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
// Does NOT look at the locale the way C89's toupper(3), isspace() et cetera does.
// I could have taken only a u7 to make this clear, but it would be slower
// It is my opinion that encodings other than UTF-8 should not be supported.
//
// (and 128 bytes is not much to pay).
// Also does not handle Unicode character classes.
//
// https://upload.wikimedia.org/wikipedia/commons/thumb/c/cf/USASCII_code_chart.png/1200px-USASCII_code_chart.png
const std = @import("std");
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
const tIndex = enum(u3) {
Alpha,
Hex,
Space,
Digit,
Lower,
Upper,
// Ctrl, < 0x20 || == DEL
// Print, = Graph || == ' '. NOT '\t' et cetera
Punct,
Graph,
//ASCII, | ~0b01111111
//isBlank, == ' ' || == '\x09'
};
const combinedTable = init: {
comptime var table: [256]u8 = undefined;
const mem = std.mem;
const alpha = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const lower = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
};
const upper = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const digit = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const hex = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const space = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
const punct = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
};
const graph = [_]u1{
2019-05-12 17:56:01 +01:00
// 0, 1, 2, 3, 4, 5, 6, 7 ,8, 9,10,11,12,13,14,15
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
};
comptime var i = 0;
inline while (i < 128) : (i += 1) {
table[i] =
2019-11-07 04:25:57 +00:00
@as(u8, alpha[i]) << @enumToInt(tIndex.Alpha) |
@as(u8, hex[i]) << @enumToInt(tIndex.Hex) |
@as(u8, space[i]) << @enumToInt(tIndex.Space) |
@as(u8, digit[i]) << @enumToInt(tIndex.Digit) |
@as(u8, lower[i]) << @enumToInt(tIndex.Lower) |
@as(u8, upper[i]) << @enumToInt(tIndex.Upper) |
@as(u8, punct[i]) << @enumToInt(tIndex.Punct) |
@as(u8, graph[i]) << @enumToInt(tIndex.Graph);
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
}
mem.set(u8, table[128..256], 0);
break :init table;
};
fn inTable(c: u8, t: tIndex) bool {
2019-11-07 04:25:57 +00:00
return (combinedTable[c] & (@as(u8, 1) << @enumToInt(t))) != 0;
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
}
pub fn isAlNum(c: u8) bool {
2019-11-07 04:25:57 +00:00
return (combinedTable[c] & ((@as(u8, 1) << @enumToInt(tIndex.Alpha)) |
@as(u8, 1) << @enumToInt(tIndex.Digit))) != 0;
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
}
pub fn isAlpha(c: u8) bool {
return inTable(c, tIndex.Alpha);
}
pub fn isCntrl(c: u8) bool {
return c < 0x20 or c == 127; //DEL
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
}
pub fn isDigit(c: u8) bool {
return inTable(c, tIndex.Digit);
}
pub fn isGraph(c: u8) bool {
return inTable(c, tIndex.Graph);
}
pub fn isLower(c: u8) bool {
return inTable(c, tIndex.Lower);
}
pub fn isPrint(c: u8) bool {
return inTable(c, tIndex.Graph) or c == ' ';
}
pub fn isPunct(c: u8) bool {
return inTable(c, tIndex.Punct);
}
pub fn isSpace(c: u8) bool {
return inTable(c, tIndex.Space);
}
pub fn isUpper(c: u8) bool {
return inTable(c, tIndex.Upper);
}
pub fn isXDigit(c: u8) bool {
return inTable(c, tIndex.Hex);
}
pub fn isASCII(c: u8) bool {
return c < 128;
}
pub fn isBlank(c: u8) bool {
return (c == ' ') or (c == '\x09');
}
pub fn toUpper(c: u8) u8 {
if (isLower(c)) {
return c & 0b11011111;
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
} else {
return c;
}
}
pub fn toLower(c: u8) u8 {
if (isUpper(c)) {
return c | 0b00100000;
std: add ascii with C ASCII character classes Does NOT look at the locale the way the C functions do. int isalnum(int c); int isalpha(int c); int iscntrl(int c); int isdigit(int c); int isgraph(int c); int islower(int c); int isprint(int c); int ispunct(int c); int isspace(int c); int isupper(int c); int isxdigit(int c); int isascii(int c); int isblank(int c); int toupper(int c); int tolower(int c); Tested to match glibc (when using C locale) with this program: const c = @cImport({ // See https://github.com/ziglang/zig/issues/515 @cDefine("_NO_CRT_STDIO_INLINE", "1"); @cInclude("stdio.h"); @cInclude("string.h"); @cInclude("ctype.h"); }); const std = @import("std"); const ascii = std.ascii; const abort = std.os.abort; export fn main(argc: c_int, argv: **u8) c_int { var i: u8 = undefined; i = 0; while (true) { if (ascii.isAlNum(i) != (c.isalnum(i) > 0)) { abort(); } if (ascii.isAlpha(i) != (c.isalpha(i) > 0)) { abort(); } if (ascii.isCtrl(i) != (c.iscntrl(i) > 0)) { abort(); } if (ascii.isDigit(i) != (c.isdigit(i) > 0)) { abort(); } if (ascii.isGraph(i) != (c.isgraph(i) > 0)) { abort(); } if (ascii.isLower(i) != (c.islower(i) > 0)) { abort(); } if (ascii.isPrint(i) != (c.isprint(i) > 0)) { abort(); } if (ascii.isPunct(i) != (c.ispunct(i) > 0)) { abort(); } if (ascii.isSpace(i) != (c.isspace(i) > 0)) { abort(); } if (ascii.isUpper(i) != (c.isupper(i) > 0)) { abort(); } if (ascii.isXDigit(i) != (c.isxdigit(i) > 0)) { abort(); } if (i == 255) { break; } i += 1; } _ = c.printf(c"Success!\n"); return 0; }
2019-03-22 02:33:37 +00:00
} else {
return c;
}
}
test "ascii character classes" {
const testing = std.testing;
testing.expect('C' == toUpper('c'));
testing.expect(':' == toUpper(':'));
testing.expect('\xab' == toUpper('\xab'));
testing.expect('c' == toLower('C'));
testing.expect(isAlpha('c'));
testing.expect(!isAlpha('5'));
testing.expect(isSpace(' '));
}
pub fn allocLowerString(allocator: *std.mem.Allocator, ascii_string: []const u8) ![]u8 {
const result = try allocator.alloc(u8, ascii_string.len);
for (result) |*c, i| {
c.* = toLower(ascii_string[i]);
}
return result;
}
test "allocLowerString" {
const result = try allocLowerString(std.testing.allocator, "aBcDeFgHiJkLmNOPqrst0234+💩!");
defer std.testing.allocator.free(result);
std.testing.expect(std.mem.eql(u8, "abcdefghijklmnopqrst0234+💩!", result));
}
pub fn eqlIgnoreCase(a: []const u8, b: []const u8) bool {
if (a.len != b.len) return false;
for (a) |a_c, i| {
if (toLower(a_c) != toLower(b[i])) return false;
}
return true;
}
test "eqlIgnoreCase" {
std.testing.expect(eqlIgnoreCase("HEl💩Lo!", "hel💩lo!"));
std.testing.expect(!eqlIgnoreCase("hElLo!", "hello! "));
std.testing.expect(!eqlIgnoreCase("hElLo!", "helro!"));
}
/// Finds `substr` in `container`, starting at `start_index`.
/// TODO boyer-moore algorithm
pub fn indexOfIgnoreCasePos(container: []const u8, start_index: usize, substr: []const u8) ?usize {
if (substr.len > container.len) return null;
var i: usize = start_index;
const end = container.len - substr.len;
while (i <= end) : (i += 1) {
if (eqlIgnoreCase(container[i .. i + substr.len], substr)) return i;
}
return null;
}
/// Finds `substr` in `container`, starting at `start_index`.
pub fn indexOfIgnoreCase(container: []const u8, substr: []const u8) ?usize {
return indexOfIgnoreCasePos(container, 0, substr);
}
test "indexOfIgnoreCase" {
std.testing.expect(indexOfIgnoreCase("one Two Three Four", "foUr").? == 14);
std.testing.expect(indexOfIgnoreCase("one two three FouR", "gOur") == null);
std.testing.expect(indexOfIgnoreCase("foO", "Foo").? == 0);
std.testing.expect(indexOfIgnoreCase("foo", "fool") == null);
std.testing.expect(indexOfIgnoreCase("FOO foo", "fOo").? == 0);
}