Initial commit

This commit is contained in:
Mid
2025-08-31 16:22:38 +03:00
commit 64c21ca43a
62 changed files with 13346 additions and 0 deletions

1340
stc/priv/cregex_prv.c Normal file

File diff suppressed because it is too large Load Diff

291
stc/priv/cstr_prv.c Normal file
View File

@@ -0,0 +1,291 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// ------------------- STC_CSTR_CORE --------------------
#if !defined STC_CSTR_CORE_C_INCLUDED && \
(defined i_implement || defined STC_CSTR_CORE)
#define STC_CSTR_CORE_C_INCLUDED
void cstr_drop(const cstr* self) {
if (cstr_is_long(self))
cstr_l_drop(self);
}
cstr* cstr_take(cstr* self, const cstr s) {
if (cstr_is_long(self) && self->lon.data != s.lon.data)
cstr_l_drop(self);
*self = s;
return self;
}
size_t cstr_hash(const cstr *self) {
csview sv = cstr_sv(self);
return c_hash_str(sv.buf);
}
isize cstr_find_sv(const cstr* self, csview search) {
csview sv = cstr_sv(self);
char* res = c_strnstrn(sv.buf, sv.size, search.buf, search.size);
return res ? (res - sv.buf) : c_NPOS;
}
char* _cstr_internal_move(cstr* self, const isize pos1, const isize pos2) {
cstr_buf b = cstr_getbuf(self);
if (pos1 != pos2) {
const isize newlen = (b.size + pos2 - pos1);
if (newlen > b.cap)
b.data = cstr_reserve(self, b.size*3/2 + pos2 - pos1);
c_memmove(&b.data[pos2], &b.data[pos1], b.size - pos1);
_cstr_set_size(self, newlen);
}
return b.data;
}
char* _cstr_init(cstr* self, const isize len, const isize cap) {
if (cap > cstr_s_cap) {
self->lon.data = (char *)c_malloc(cap + 1);
cstr_l_set_size(self, len);
cstr_l_set_cap(self, cap);
return self->lon.data;
}
cstr_s_set_size(self, len);
return self->sml.data;
}
char* cstr_reserve(cstr* self, const isize cap) {
if (cstr_is_long(self)) {
if (cap > cstr_l_cap(self)) {
self->lon.data = (char *)c_realloc(self->lon.data, cstr_l_cap(self) + 1, cap + 1);
cstr_l_set_cap(self, cap);
}
return self->lon.data;
}
/* from short to long: */
if (cap > cstr_s_cap) {
char* data = (char *)c_malloc(cap + 1);
const isize len = cstr_s_size(self);
/* copy full short buffer to emulate realloc() */
c_memcpy(data, self->sml.data, c_sizeof self->sml);
self->lon.data = data;
self->lon.size = (size_t)len;
cstr_l_set_cap(self, cap);
return data;
}
return self->sml.data;
}
char* cstr_resize(cstr* self, const isize size, const char value) {
cstr_buf b = cstr_getbuf(self);
if (size > b.size) {
if (size > b.cap && (b.data = cstr_reserve(self, size)) == NULL)
return NULL;
c_memset(b.data + b.size, value, size - b.size);
}
_cstr_set_size(self, size);
return b.data;
}
isize cstr_find_at(const cstr* self, const isize pos, const char* search) {
csview sv = cstr_sv(self);
if (pos > sv.size) return c_NPOS;
const char* res = strstr((char*)sv.buf + pos, search);
return res ? (res - sv.buf) : c_NPOS;
}
char* cstr_assign_n(cstr* self, const char* str, const isize len) {
char* d = cstr_reserve(self, len);
if (d) { _cstr_set_size(self, len); c_memmove(d, str, len); }
return d;
}
char* cstr_append_n(cstr* self, const char* str, const isize len) {
cstr_buf b = cstr_getbuf(self);
if (b.size + len > b.cap) {
const size_t off = (size_t)(str - b.data);
b.data = cstr_reserve(self, b.size*3/2 + len);
if (b.data == NULL) return NULL;
if (off <= (size_t)b.size) str = b.data + off; /* handle self append */
}
c_memcpy(b.data + b.size, str, len);
_cstr_set_size(self, b.size + len);
return b.data;
}
cstr cstr_from_replace(csview in, csview search, csview repl, int32_t count) {
cstr out = cstr_init();
isize from = 0; char* res;
if (count == 0) count = INT32_MAX;
if (search.size)
while (count-- && (res = c_strnstrn(in.buf + from, in.size - from, search.buf, search.size))) {
const isize pos = (res - in.buf);
cstr_append_n(&out, in.buf + from, pos - from);
cstr_append_n(&out, repl.buf, repl.size);
from = pos + search.size;
}
cstr_append_n(&out, in.buf + from, in.size - from);
return out;
}
void cstr_erase(cstr* self, const isize pos, isize len) {
cstr_buf b = cstr_getbuf(self);
if (len > b.size - pos) len = b.size - pos;
c_memmove(&b.data[pos], &b.data[pos + len], b.size - (pos + len));
_cstr_set_size(self, b.size - len);
}
void cstr_shrink_to_fit(cstr* self) {
cstr_buf b = cstr_getbuf(self);
if (b.size == b.cap)
return;
if (b.size > cstr_s_cap) {
self->lon.data = (char *)c_realloc(self->lon.data, cstr_l_cap(self) + 1, b.size + 1);
cstr_l_set_cap(self, b.size);
} else if (b.cap > cstr_s_cap) {
c_memcpy(self->sml.data, b.data, b.size + 1);
cstr_s_set_size(self, b.size);
c_free(b.data, b.cap + 1);
}
}
#endif // STC_CSTR_CORE_C_INCLUDED
// ------------------- STC_CSTR_IO --------------------
#if !defined STC_CSTR_IO_C_INCLUDED && \
(defined i_import || defined STC_CSTR_IO)
#define STC_CSTR_IO_C_INCLUDED
char* cstr_append_uninit(cstr *self, isize len) {
cstr_buf b = cstr_getbuf(self);
if (b.size + len > b.cap && (b.data = cstr_reserve(self, b.size*3/2 + len)) == NULL)
return NULL;
_cstr_set_size(self, b.size + len);
return b.data + b.size;
}
bool cstr_getdelim(cstr *self, const int delim, FILE *fp) {
int c = fgetc(fp);
if (c == EOF)
return false;
isize pos = 0;
cstr_buf b = cstr_getbuf(self);
for (;;) {
if (c == delim || c == EOF) {
_cstr_set_size(self, pos);
return true;
}
if (pos == b.cap) {
_cstr_set_size(self, pos);
char* data = cstr_reserve(self, (b.cap = b.cap*3/2 + 16));
b.data = data;
}
b.data[pos++] = (char) c;
c = fgetc(fp);
}
}
isize cstr_vfmt(cstr* self, isize start, const char* fmt, va_list args) {
va_list args2;
va_copy(args2, args);
const int n = vsnprintf(NULL, 0ULL, fmt, args);
vsnprintf(cstr_reserve(self, start + n) + start, (size_t)n+1, fmt, args2);
va_end(args2);
_cstr_set_size(self, start + n);
return n;
}
cstr cstr_from_fmt(const char* fmt, ...) {
cstr s = cstr_init();
va_list args;
va_start(args, fmt);
cstr_vfmt(&s, 0, fmt, args);
va_end(args);
return s;
}
isize cstr_append_fmt(cstr* self, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
const isize n = cstr_vfmt(self, cstr_size(self), fmt, args);
va_end(args);
return n;
}
/* NB! self-data in args is UB */
isize cstr_printf(cstr* self, const char* fmt, ...) {
va_list args;
va_start(args, fmt);
const isize n = cstr_vfmt(self, 0, fmt, args);
va_end(args);
return n;
}
#endif // STC_CSTR_IO_C_INCLUDED
// ------------------- STC_CSTR_UTF8 --------------------
#if !defined STC_CSTR_UTF8_C_INCLUDED && \
(defined i_import || defined STC_CSTR_UTF8 || defined STC_UTF8_PRV_C_INCLUDED)
#define STC_CSTR_UTF8_C_INCLUDED
#include <ctype.h>
void cstr_u8_erase(cstr* self, const isize u8pos, const isize u8len) {
csview b = cstr_sv(self);
csview span = utf8_subview(b.buf, u8pos, u8len);
c_memmove((void *)&span.buf[0], &span.buf[span.size], b.size - span.size - (span.buf - b.buf));
_cstr_set_size(self, b.size - span.size);
}
bool cstr_u8_valid(const cstr* self)
{ return utf8_valid(cstr_str(self)); }
static int toLower(int c)
{ return c >= 'A' && c <= 'Z' ? c + 32 : c; }
static int toUpper(int c)
{ return c >= 'a' && c <= 'z' ? c - 32 : c; }
static struct {
int (*conv_asc)(int);
uint32_t (*conv_utf)(uint32_t);
}
fn_tocase[] = {{toLower, utf8_casefold},
{toLower, utf8_tolower},
{toUpper, utf8_toupper}};
cstr cstr_tocase_sv(csview sv, int k) {
cstr out = {0};
char *buf = cstr_reserve(&out, sv.size*3/2);
isize sz = 0;
utf8_decode_t d = {.state=0};
const char* end = sv.buf + sv.size;
while (sv.buf < end) {
sv.buf += utf8_decode_codepoint(&d, sv.buf, end);
if (d.codep < 0x80)
buf[sz++] = (char)fn_tocase[k].conv_asc((int)d.codep);
else {
uint32_t cp = fn_tocase[k].conv_utf(d.codep);
sz += utf8_encode(buf + sz, cp);
}
}
_cstr_set_size(&out, sz);
cstr_shrink_to_fit(&out);
return out;
}
#endif // i_import STC_CSTR_UTF8_C_INCLUDED

420
stc/priv/cstr_prv.h Normal file
View File

@@ -0,0 +1,420 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// IWYU pragma: private, include "stc/cstr.h"
#ifndef STC_CSTR_PRV_H_INCLUDED
#define STC_CSTR_PRV_H_INCLUDED
#include <stdio.h> /* FILE*, vsnprintf */
#include <stdlib.h> /* malloc */
#include <stddef.h> /* size_t */
#include <stdarg.h> /* cstr_vfmt() */
/**************************** PRIVATE API **********************************/
#if defined __GNUC__ && !defined __clang__
// linkage.h already does diagnostic push
// Warns wrongfully on -O3 on cstr_assign(&str, "literal longer than 23 ...");
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
enum { cstr_s_cap = sizeof(cstr_buf) - 2 };
#define cstr_s_size(s) ((isize)(s)->sml.size)
#define cstr_s_set_size(s, len) ((s)->sml.data[(s)->sml.size = (uint8_t)(len)] = 0)
#define cstr_s_data(s) (s)->sml.data
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define byte_rotl_(x, b) ((x) << (b)*8 | (x) >> (sizeof(x) - (b))*8)
#define cstr_l_cap(s) (isize)(~byte_rotl_((s)->lon.ncap, sizeof((s)->lon.ncap) - 1))
#define cstr_l_set_cap(s, cap) ((s)->lon.ncap = ~byte_rotl_((uintptr_t)(cap), 1))
#else
#define cstr_l_cap(s) (isize)(~(s)->lon.ncap)
#define cstr_l_set_cap(s, cap) ((s)->lon.ncap = ~(uintptr_t)(cap))
#endif
#define cstr_l_size(s) (isize)((s)->lon.size)
#define cstr_l_set_size(s, len) ((s)->lon.data[(s)->lon.size = (uintptr_t)(len)] = 0)
#define cstr_l_data(s) (s)->lon.data
#define cstr_l_drop(s) c_free((s)->lon.data, cstr_l_cap(s) + 1)
#define cstr_is_long(s) ((s)->sml.size >= 128)
extern char* _cstr_init(cstr* self, isize len, isize cap);
extern char* _cstr_internal_move(cstr* self, isize pos1, isize pos2);
/**************************** PUBLIC API **********************************/
#define cstr_init() (c_literal(cstr){0})
#define cstr_lit(literal) cstr_from_n(literal, c_litstrlen(literal))
extern cstr cstr_from_replace(csview sv, csview search, csview repl, int32_t count);
extern cstr cstr_from_fmt(const char* fmt, ...) c_GNUATTR(format(printf, 1, 2));
extern void cstr_drop(const cstr* self);
extern cstr* cstr_take(cstr* self, const cstr s);
extern char* cstr_reserve(cstr* self, isize cap);
extern void cstr_shrink_to_fit(cstr* self);
extern char* cstr_resize(cstr* self, isize size, char value);
extern isize cstr_find_at(const cstr* self, isize pos, const char* search);
extern isize cstr_find_sv(const cstr* self, csview search);
extern char* cstr_assign_n(cstr* self, const char* str, isize len);
extern char* cstr_append_n(cstr* self, const char* str, isize len);
extern isize cstr_append_fmt(cstr* self, const char* fmt, ...) c_GNUATTR(format(printf, 2, 3));
extern char* cstr_append_uninit(cstr *self, isize len);
extern bool cstr_getdelim(cstr *self, int delim, FILE *fp);
extern void cstr_erase(cstr* self, isize pos, isize len);
extern isize cstr_printf(cstr* self, const char* fmt, ...) c_GNUATTR(format(printf, 2, 3));
extern isize cstr_vfmt(cstr* self, isize start, const char* fmt, va_list args);
extern size_t cstr_hash(const cstr *self);
extern bool cstr_u8_valid(const cstr* self);
extern void cstr_u8_erase(cstr* self, isize u8pos, isize u8len);
STC_INLINE cstr_buf cstr_getbuf(cstr* s) {
return cstr_is_long(s) ? c_literal(cstr_buf){s->lon.data, cstr_l_size(s), cstr_l_cap(s)}
: c_literal(cstr_buf){s->sml.data, cstr_s_size(s), cstr_s_cap};
}
STC_INLINE zsview cstr_zv(const cstr* s) {
return cstr_is_long(s) ? c_literal(zsview){s->lon.data, cstr_l_size(s)}
: c_literal(zsview){s->sml.data, cstr_s_size(s)};
}
STC_INLINE csview cstr_sv(const cstr* s) {
return cstr_is_long(s) ? c_literal(csview){s->lon.data, cstr_l_size(s)}
: c_literal(csview){s->sml.data, cstr_s_size(s)};
}
STC_INLINE cstr cstr_from_n(const char* str, const isize len) {
cstr s;
c_memcpy(_cstr_init(&s, len, len), str, len);
return s;
}
STC_INLINE cstr cstr_from(const char* str)
{ return cstr_from_n(str, c_strlen(str)); }
STC_INLINE cstr cstr_from_sv(csview sv)
{ return cstr_from_n(sv.buf, sv.size); }
STC_INLINE cstr cstr_from_zv(zsview zv)
{ return cstr_from_n(zv.str, zv.size); }
STC_INLINE cstr cstr_with_size(const isize size, const char value) {
cstr s;
c_memset(_cstr_init(&s, size, size), value, size);
return s;
}
STC_INLINE cstr cstr_with_capacity(const isize cap) {
cstr s;
_cstr_init(&s, 0, cap);
return s;
}
STC_INLINE cstr cstr_move(cstr* self) {
cstr tmp = *self;
*self = cstr_init();
return tmp;
}
STC_INLINE cstr cstr_clone(cstr s) {
csview sv = cstr_sv(&s);
return cstr_from_n(sv.buf, sv.size);
}
#define SSO_CALL(s, call) (cstr_is_long(s) ? cstr_l_##call : cstr_s_##call)
STC_INLINE void _cstr_set_size(cstr* self, isize len)
{ SSO_CALL(self, set_size(self, len)); }
STC_INLINE void cstr_clear(cstr* self)
{ _cstr_set_size(self, 0); }
STC_INLINE char* cstr_data(cstr* self)
{ return SSO_CALL(self, data(self)); }
STC_INLINE const char* cstr_str(const cstr* self)
{ return SSO_CALL(self, data(self)); }
STC_INLINE const char* cstr_toraw(const cstr* self)
{ return SSO_CALL(self, data(self)); }
STC_INLINE isize cstr_size(const cstr* self)
{ return SSO_CALL(self, size(self)); }
STC_INLINE bool cstr_is_empty(const cstr* self)
{ return cstr_size(self) == 0; }
STC_INLINE isize cstr_capacity(const cstr* self)
{ return cstr_is_long(self) ? cstr_l_cap(self) : cstr_s_cap; }
STC_INLINE isize cstr_to_index(const cstr* self, cstr_iter it)
{ return it.ref - cstr_str(self); }
STC_INLINE cstr cstr_from_s(cstr s, isize pos, isize len)
{ return cstr_from_n(cstr_str(&s) + pos, len); }
STC_INLINE csview cstr_subview(const cstr* self, isize pos, isize len) {
csview sv = cstr_sv(self);
c_assert(((size_t)pos <= (size_t)sv.size) & (len >= 0));
if (pos + len > sv.size) len = sv.size - pos;
return c_literal(csview){sv.buf + pos, len};
}
STC_INLINE zsview cstr_tail(const cstr* self, isize len) {
c_assert(len >= 0);
csview sv = cstr_sv(self);
if (len > sv.size) len = sv.size;
return c_literal(zsview){&sv.buf[sv.size - len], len};
}
// BEGIN utf8 functions =====
STC_INLINE cstr cstr_u8_from(const char* str, isize u8pos, isize u8len)
{ str = utf8_at(str, u8pos); return cstr_from_n(str, utf8_to_index(str, u8len)); }
STC_INLINE isize cstr_u8_size(const cstr* self)
{ return utf8_count(cstr_str(self)); }
STC_INLINE isize cstr_u8_to_index(const cstr* self, isize u8pos)
{ return utf8_to_index(cstr_str(self), u8pos); }
STC_INLINE zsview cstr_u8_tail(const cstr* self, isize u8len) {
csview sv = cstr_sv(self);
const char* p = &sv.buf[sv.size];
while (u8len && p != sv.buf)
u8len -= (*--p & 0xC0) != 0x80;
return c_literal(zsview){p, sv.size - (p - sv.buf)};
}
STC_INLINE csview cstr_u8_subview(const cstr* self, isize u8pos, isize u8len)
{ return utf8_subview(cstr_str(self), u8pos, u8len); }
STC_INLINE cstr_iter cstr_u8_at(const cstr* self, isize u8pos) {
csview sv;
sv.buf = utf8_at(cstr_str(self), u8pos);
sv.size = utf8_chr_size(sv.buf);
c_assert(sv.size);
return c_literal(cstr_iter){.chr = sv};
}
// utf8 iterator
STC_INLINE cstr_iter cstr_begin(const cstr* self) {
csview sv = cstr_sv(self);
cstr_iter it = {.chr = {sv.buf, utf8_chr_size(sv.buf)}};
return it;
}
STC_INLINE cstr_iter cstr_end(const cstr* self) {
(void)self; cstr_iter it = {0}; return it;
}
STC_INLINE void cstr_next(cstr_iter* it) {
it->ref += it->chr.size;
it->chr.size = utf8_chr_size(it->ref);
if (*it->ref == '\0') it->ref = NULL;
}
STC_INLINE cstr_iter cstr_advance(cstr_iter it, isize u8pos) {
it.ref = utf8_offset(it.ref, u8pos);
it.chr.size = utf8_chr_size(it.ref);
if (*it.ref == '\0') it.ref = NULL;
return it;
}
// utf8 case conversion: requires `#define i_import` before including cstr.h in one TU.
extern cstr cstr_tocase_sv(csview sv, int k);
STC_INLINE cstr cstr_casefold_sv(csview sv)
{ return cstr_tocase_sv(sv, 0); }
STC_INLINE cstr cstr_tolower_sv(csview sv)
{ return cstr_tocase_sv(sv, 1); }
STC_INLINE cstr cstr_toupper_sv(csview sv)
{ return cstr_tocase_sv(sv, 2); }
STC_INLINE cstr cstr_tolower(const char* str)
{ return cstr_tolower_sv(c_sv(str, c_strlen(str))); }
STC_INLINE cstr cstr_toupper(const char* str)
{ return cstr_toupper_sv(c_sv(str, c_strlen(str))); }
STC_INLINE void cstr_lowercase(cstr* self)
{ cstr_take(self, cstr_tolower_sv(cstr_sv(self))); }
STC_INLINE void cstr_uppercase(cstr* self)
{ cstr_take(self, cstr_toupper_sv(cstr_sv(self))); }
STC_INLINE bool cstr_istarts_with(const cstr* self, const char* sub) {
csview sv = cstr_sv(self);
isize len = c_strlen(sub);
return len <= sv.size && !utf8_icompare((sv.size = len, sv), c_sv(sub, len));
}
STC_INLINE bool cstr_iends_with(const cstr* self, const char* sub) {
csview sv = cstr_sv(self);
isize len = c_strlen(sub);
return len <= sv.size && !utf8_icmp(sv.buf + sv.size - len, sub);
}
STC_INLINE int cstr_icmp(const cstr* s1, const cstr* s2)
{ return utf8_icmp(cstr_str(s1), cstr_str(s2)); }
STC_INLINE bool cstr_ieq(const cstr* s1, const cstr* s2) {
csview x = cstr_sv(s1), y = cstr_sv(s2);
return x.size == y.size && !utf8_icompare(x, y);
}
STC_INLINE bool cstr_iequals(const cstr* self, const char* str)
{ return !utf8_icmp(cstr_str(self), str); }
// END utf8 =====
STC_INLINE int cstr_cmp(const cstr* s1, const cstr* s2)
{ return strcmp(cstr_str(s1), cstr_str(s2)); }
STC_INLINE bool cstr_eq(const cstr* s1, const cstr* s2) {
csview x = cstr_sv(s1), y = cstr_sv(s2);
return x.size == y.size && !c_memcmp(x.buf, y.buf, x.size);
}
STC_INLINE bool cstr_equals(const cstr* self, const char* str)
{ return !strcmp(cstr_str(self), str); }
STC_INLINE bool cstr_equals_sv(const cstr* self, csview sv)
{ return sv.size == cstr_size(self) && !c_memcmp(cstr_str(self), sv.buf, sv.size); }
STC_INLINE isize cstr_find(const cstr* self, const char* search) {
const char *str = cstr_str(self), *res = strstr((char*)str, search);
return res ? (res - str) : c_NPOS;
}
STC_INLINE bool cstr_contains(const cstr* self, const char* search)
{ return strstr((char*)cstr_str(self), search) != NULL; }
STC_INLINE bool cstr_contains_sv(const cstr* self, csview search)
{ return cstr_find_sv(self, search) != c_NPOS; }
STC_INLINE bool cstr_starts_with_sv(const cstr* self, csview sub) {
if (sub.size > cstr_size(self)) return false;
return !c_memcmp(cstr_str(self), sub.buf, sub.size);
}
STC_INLINE bool cstr_starts_with(const cstr* self, const char* sub) {
const char* str = cstr_str(self);
while (*sub && *str == *sub) ++str, ++sub;
return !*sub;
}
STC_INLINE bool cstr_ends_with_sv(const cstr* self, csview sub) {
csview sv = cstr_sv(self);
if (sub.size > sv.size) return false;
return !c_memcmp(sv.buf + sv.size - sub.size, sub.buf, sub.size);
}
STC_INLINE bool cstr_ends_with(const cstr* self, const char* sub)
{ return cstr_ends_with_sv(self, c_sv(sub, c_strlen(sub))); }
STC_INLINE char* cstr_assign(cstr* self, const char* str)
{ return cstr_assign_n(self, str, c_strlen(str)); }
STC_INLINE char* cstr_assign_sv(cstr* self, csview sv)
{ return cstr_assign_n(self, sv.buf, sv.size); }
STC_INLINE char* cstr_copy(cstr* self, cstr s) {
csview sv = cstr_sv(&s);
return cstr_assign_n(self, sv.buf, sv.size);
}
STC_INLINE char* cstr_push(cstr* self, const char* chr)
{ return cstr_append_n(self, chr, utf8_chr_size(chr)); }
STC_INLINE void cstr_pop(cstr* self) {
csview sv = cstr_sv(self);
const char* s = sv.buf + sv.size;
while ((*--s & 0xC0) == 0x80) ;
_cstr_set_size(self, (s - sv.buf));
}
STC_INLINE char* cstr_append(cstr* self, const char* str)
{ return cstr_append_n(self, str, c_strlen(str)); }
STC_INLINE char* cstr_append_sv(cstr* self, csview sv)
{ return cstr_append_n(self, sv.buf, sv.size); }
STC_INLINE char* cstr_append_s(cstr* self, cstr s)
{ return cstr_append_sv(self, cstr_sv(&s)); }
#define cstr_join(self, sep, vec) do { \
struct _vec_s { cstr* data; ptrdiff_t size; } \
*_vec = (struct _vec_s*)&(vec); \
(void)sizeof((vec).data == _vec->data && &(vec).size == &_vec->size); \
cstr_join_sn(self, sep, _vec->data, _vec->size); \
} while (0);
#define cstr_join_items(self, sep, ...) \
cstr_join_n(self, sep, c_make_array(const char*, __VA_ARGS__), c_sizeof((const char*[])__VA_ARGS__)/c_sizeof(char*))
STC_INLINE void cstr_join_n(cstr* self, const char* sep, const char* arr[], isize n) {
const char* _sep = cstr_is_empty(self) ? "" : sep;
while (n--) { cstr_append(self, _sep); cstr_append(self, *arr++); _sep = sep; }
}
STC_INLINE void cstr_join_sn(cstr* self, const char* sep, const cstr arr[], isize n) {
const char* _sep = cstr_is_empty(self) ? "" : sep;
while (n--) { cstr_append(self, _sep); cstr_append_s(self, *arr++); _sep = sep; }
}
STC_INLINE void cstr_replace_sv(cstr* self, csview search, csview repl, int32_t count)
{ cstr_take(self, cstr_from_replace(cstr_sv(self), search, repl, count)); }
STC_INLINE void cstr_replace_nfirst(cstr* self, const char* search, const char* repl, int32_t count)
{ cstr_replace_sv(self, c_sv(search, c_strlen(search)), c_sv(repl, c_strlen(repl)), count); }
STC_INLINE void cstr_replace(cstr* self, const char* search, const char* repl)
{ cstr_replace_nfirst(self, search, repl, INT32_MAX); }
STC_INLINE void cstr_replace_at_sv(cstr* self, isize pos, isize len, const csview repl) {
char* d = _cstr_internal_move(self, pos + len, pos + repl.size);
c_memcpy(d + pos, repl.buf, repl.size);
}
STC_INLINE void cstr_replace_at(cstr* self, isize pos, isize len, const char* repl)
{ cstr_replace_at_sv(self, pos, len, c_sv(repl, c_strlen(repl))); }
STC_INLINE void cstr_u8_replace(cstr* self, isize u8pos, isize u8len, const char* repl) {
const char* s = cstr_str(self); csview span = utf8_subview(s, u8pos, u8len);
cstr_replace_at(self, span.buf - s, span.size, repl);
}
STC_INLINE void cstr_insert_sv(cstr* self, isize pos, csview sv)
{ cstr_replace_at_sv(self, pos, 0, sv); }
STC_INLINE void cstr_insert(cstr* self, isize pos, const char* str)
{ cstr_replace_at_sv(self, pos, 0, c_sv(str, c_strlen(str))); }
STC_INLINE void cstr_u8_insert(cstr* self, isize u8pos, const char* str)
{ cstr_insert(self, utf8_to_index(cstr_str(self), u8pos), str); }
STC_INLINE bool cstr_getline(cstr *self, FILE *fp)
{ return cstr_getdelim(self, '\n', fp); }
#endif // STC_CSTR_PRV_H_INCLUDED

77
stc/priv/linkage.h Normal file
View File

@@ -0,0 +1,77 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#undef STC_API
#undef STC_DEF
#if !defined i_static && !defined STC_STATIC && (defined i_header || defined STC_HEADER || \
defined i_implement || defined STC_IMPLEMENT)
#define STC_API extern
#define STC_DEF
#else
#define i_implement
#if defined __GNUC__ || defined __clang__ || defined __INTEL_LLVM_COMPILER
#define STC_API static __attribute__((unused))
#else
#define STC_API static inline
#endif
#define STC_DEF static
#endif
#if defined STC_IMPLEMENT || defined i_import
#define i_implement
#endif
#if defined i_aux && defined i_allocator
#define _i_aux_alloc
#endif
#ifndef i_allocator
#define i_allocator c
#endif
#ifndef i_free
#define i_malloc c_JOIN(i_allocator, _malloc)
#define i_calloc c_JOIN(i_allocator, _calloc)
#define i_realloc c_JOIN(i_allocator, _realloc)
#define i_free c_JOIN(i_allocator, _free)
#endif
#if defined __clang__ && !defined __cplusplus
#pragma clang diagnostic push
#pragma clang diagnostic warning "-Wall"
#pragma clang diagnostic warning "-Wextra"
#pragma clang diagnostic warning "-Wpedantic"
#pragma clang diagnostic warning "-Wconversion"
#pragma clang diagnostic warning "-Wwrite-strings"
// ignored
#pragma clang diagnostic ignored "-Wmissing-field-initializers"
#elif defined __GNUC__ && !defined __cplusplus
#pragma GCC diagnostic push
#pragma GCC diagnostic warning "-Wall"
#pragma GCC diagnostic warning "-Wextra"
#pragma GCC diagnostic warning "-Wpedantic"
#pragma GCC diagnostic warning "-Wconversion"
#pragma GCC diagnostic warning "-Wwrite-strings"
// ignored
#pragma GCC diagnostic ignored "-Wclobbered"
#pragma GCC diagnostic ignored "-Wimplicit-fallthrough=3"
#pragma GCC diagnostic ignored "-Wstringop-overflow="
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
#endif

42
stc/priv/linkage2.h Normal file
View File

@@ -0,0 +1,42 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#undef i_aux
#undef _i_aux_alloc
#undef i_allocator
#undef i_malloc
#undef i_calloc
#undef i_realloc
#undef i_free
#undef i_static
#undef i_header
#undef i_implement
#undef i_import
#if defined __clang__ && !defined __cplusplus
#pragma clang diagnostic pop
#elif defined __GNUC__ && !defined __cplusplus
#pragma GCC diagnostic pop
#endif

285
stc/priv/queue_prv.h Normal file
View File

@@ -0,0 +1,285 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// IWYU pragma: private
#ifndef i_declared
_c_DEFTYPES(_declare_queue, Self, i_key, _i_aux_def);
#endif
typedef i_keyraw _m_raw;
STC_API bool _c_MEMB(_reserve)(Self* self, const isize cap);
STC_API void _c_MEMB(_clear)(Self* self);
STC_API void _c_MEMB(_drop)(const Self* cself);
STC_API _m_value* _c_MEMB(_push)(Self* self, _m_value value); // push_back
STC_API void _c_MEMB(_shrink_to_fit)(Self *self);
STC_API _m_iter _c_MEMB(_advance)(_m_iter it, isize n);
#define _cbuf_toidx(self, pos) (((pos) - (self)->start) & (self)->capmask)
#define _cbuf_topos(self, idx) (((self)->start + (idx)) & (self)->capmask)
STC_INLINE void _c_MEMB(_put_n)(Self* self, const _m_raw* raw, isize n)
{ while (n--) _c_MEMB(_push)(self, i_keyfrom((*raw))), ++raw; }
STC_INLINE void _c_MEMB(_value_drop)(const Self* self, _m_value* val)
{ (void)self; i_keydrop(val); }
#ifndef _i_aux_alloc
STC_INLINE Self _c_MEMB(_init)(void)
{ Self out = {0}; return out; }
STC_INLINE Self _c_MEMB(_with_capacity)(isize cap) {
cap = c_next_pow2(cap + 1);
Self out = {_i_new_n(_m_value, cap), 0, 0, cap - 1};
return out;
}
STC_INLINE Self _c_MEMB(_with_size_uninit)(isize size)
{ Self out = _c_MEMB(_with_capacity)(size); out.end = size; return out; }
STC_INLINE Self _c_MEMB(_with_size)(isize size, _m_raw default_raw) {
Self out = _c_MEMB(_with_capacity)(size);
while (out.end < size) out.cbuf[out.end++] = i_keyfrom(default_raw);
return out;
}
STC_INLINE Self _c_MEMB(_from_n)(const _m_raw* raw, isize n) {
Self out = _c_MEMB(_with_capacity)(n);
_c_MEMB(_put_n)(&out, raw, n); return out;
}
#endif
#if !defined i_no_emplace
STC_INLINE _m_value* _c_MEMB(_emplace)(Self* self, _m_raw raw)
{ return _c_MEMB(_push)(self, i_keyfrom(raw)); }
#endif
#if defined _i_has_eq
STC_API bool _c_MEMB(_eq)(const Self* self, const Self* other);
#endif
#if !defined i_no_clone
STC_API Self _c_MEMB(_clone)(Self q);
STC_INLINE _m_value _c_MEMB(_value_clone)(const Self* self, _m_value val)
{ (void)self; return i_keyclone(val); }
STC_INLINE void _c_MEMB(_copy)(Self* self, const Self* other) {
if (self == other) return;
_c_MEMB(_drop)(self);
*self = _c_MEMB(_clone)(*other);
}
#endif // !i_no_clone
STC_INLINE isize _c_MEMB(_size)(const Self* self)
{ return _cbuf_toidx(self, self->end); }
STC_INLINE isize _c_MEMB(_capacity)(const Self* self)
{ return self->capmask; }
STC_INLINE bool _c_MEMB(_is_empty)(const Self* self)
{ return self->start == self->end; }
STC_INLINE _m_raw _c_MEMB(_value_toraw)(const _m_value* pval)
{ return i_keytoraw(pval); }
STC_INLINE const _m_value* _c_MEMB(_front)(const Self* self)
{ return self->cbuf + self->start; }
STC_INLINE _m_value* _c_MEMB(_front_mut)(Self* self)
{ return self->cbuf + self->start; }
STC_INLINE const _m_value* _c_MEMB(_back)(const Self* self)
{ return self->cbuf + ((self->end - 1) & self->capmask); }
STC_INLINE _m_value* _c_MEMB(_back_mut)(Self* self)
{ return (_m_value*)_c_MEMB(_back)(self); }
STC_INLINE Self _c_MEMB(_move)(Self *self) {
Self m = *self;
self->capmask = self->start = self->end = 0;
self->cbuf = NULL;
return m;
}
STC_INLINE void _c_MEMB(_take)(Self *self, Self unowned)
{ _c_MEMB(_drop)(self); *self = unowned; }
STC_INLINE void _c_MEMB(_pop)(Self* self) { // pop_front
c_assert(!_c_MEMB(_is_empty)(self));
i_keydrop((self->cbuf + self->start));
self->start = (self->start + 1) & self->capmask;
}
STC_INLINE _m_value _c_MEMB(_pull)(Self* self) { // move front out of queue
c_assert(!_c_MEMB(_is_empty)(self));
isize s = self->start;
self->start = (s + 1) & self->capmask;
return self->cbuf[s];
}
STC_INLINE _m_iter _c_MEMB(_begin)(const Self* self) {
return c_literal(_m_iter){
.ref=_c_MEMB(_is_empty)(self) ? NULL : self->cbuf + self->start,
.pos=self->start, ._s=self};
}
STC_INLINE _m_iter _c_MEMB(_rbegin)(const Self* self) {
isize pos = (self->end - 1) & self->capmask;
return c_literal(_m_iter){
.ref=_c_MEMB(_is_empty)(self) ? NULL : self->cbuf + pos,
.pos=pos, ._s=self};
}
STC_INLINE _m_iter _c_MEMB(_end)(const Self* self)
{ (void)self; return c_literal(_m_iter){0}; }
STC_INLINE _m_iter _c_MEMB(_rend)(const Self* self)
{ (void)self; return c_literal(_m_iter){0}; }
STC_INLINE void _c_MEMB(_next)(_m_iter* it) {
if (it->pos != it->_s->capmask) { ++it->ref; ++it->pos; }
else { it->ref -= it->pos; it->pos = 0; }
if (it->pos == it->_s->end) it->ref = NULL;
}
STC_INLINE void _c_MEMB(_rnext)(_m_iter* it) {
if (it->pos == it->_s->start) it->ref = NULL;
else if (it->pos != 0) { --it->ref; --it->pos; }
else it->ref += (it->pos = it->_s->capmask);
}
STC_INLINE isize _c_MEMB(_index)(const Self* self, _m_iter it)
{ return _cbuf_toidx(self, it.pos); }
STC_INLINE void _c_MEMB(_adjust_end_)(Self* self, isize n)
{ self->end = (self->end + n) & self->capmask; }
/* -------------------------- IMPLEMENTATION ------------------------- */
#if defined i_implement
STC_DEF _m_iter _c_MEMB(_advance)(_m_iter it, isize n) {
isize len = _c_MEMB(_size)(it._s);
isize pos = it.pos, idx = _cbuf_toidx(it._s, pos);
it.pos = (pos + n) & it._s->capmask;
it.ref += it.pos - pos;
if (!c_uless(idx + n, len)) it.ref = NULL;
return it;
}
STC_DEF void
_c_MEMB(_clear)(Self* self) {
for (c_each(i, Self, *self))
{ i_keydrop(i.ref); }
self->start = 0, self->end = 0;
}
STC_DEF void
_c_MEMB(_drop)(const Self* cself) {
Self* self = (Self*)cself;
_c_MEMB(_clear)(self);
_i_free_n(self->cbuf, self->capmask + 1);
}
STC_DEF bool
_c_MEMB(_reserve)(Self* self, const isize cap) {
isize oldpow2 = self->capmask + (self->capmask & 1); // handle capmask = 0
isize newpow2 = c_next_pow2(cap + 1);
if (newpow2 <= oldpow2)
return self->cbuf != NULL;
_m_value* d = (_m_value *)_i_realloc_n(self->cbuf, oldpow2, newpow2);
if (d == NULL)
return false;
isize head = oldpow2 - self->start;
if (self->start <= self->end) // [..S########E....|................]
;
else if (head < self->end) { // [#######E.....S##|.............s!!]
c_memcpy(d + newpow2 - head, d + self->start, head*c_sizeof *d);
self->start = newpow2 - head;
} else { // [##E.....S#######|!!e.............]
c_memcpy(d + oldpow2, d, self->end*c_sizeof *d);
self->end += oldpow2;
}
self->capmask = newpow2 - 1;
self->cbuf = d;
return true;
}
STC_DEF _m_value*
_c_MEMB(_push)(Self* self, _m_value value) { // push_back
isize end = (self->end + 1) & self->capmask;
if (end == self->start) { // full
if (!_c_MEMB(_reserve)(self, self->capmask + 3)) // => 2x expand
return NULL;
end = (self->end + 1) & self->capmask;
}
_m_value *v = self->cbuf + self->end;
self->end = end;
*v = value;
return v;
}
STC_DEF void
_c_MEMB(_shrink_to_fit)(Self *self) {
isize sz = _c_MEMB(_size)(self);
isize newpow2 = c_next_pow2(sz + 1);
if (newpow2 > self->capmask)
return;
if (self->start <= self->end) {
c_memmove(self->cbuf, self->cbuf + self->start, sz*c_sizeof *self->cbuf);
self->start = 0, self->end = sz;
} else {
isize n = self->capmask - self->start + 1;
c_memmove(self->cbuf + (newpow2 - n), self->cbuf + self->start, n*c_sizeof *self->cbuf);
self->start = newpow2 - n;
}
self->cbuf = (_m_value *)_i_realloc_n(self->cbuf, self->capmask + 1, newpow2);
self->capmask = newpow2 - 1;
}
#if !defined i_no_clone
STC_DEF Self
_c_MEMB(_clone)(Self q) {
Self out = q, *self = &out; (void)self; // may be used by _i_new_n/i_keyclone via i_aux.
out.start = 0; out.end = _c_MEMB(_size)(&q);
out.capmask = c_next_pow2(out.end + 1) - 1;
out.cbuf = _i_new_n(_m_value, out.capmask + 1);
isize i = 0;
if (out.cbuf)
for (c_each(it, Self, q))
out.cbuf[i++] = i_keyclone((*it.ref));
return out;
}
#endif // i_no_clone
#if defined _i_has_eq
STC_DEF bool
_c_MEMB(_eq)(const Self* self, const Self* other) {
if (_c_MEMB(_size)(self) != _c_MEMB(_size)(other)) return false;
for (_m_iter i = _c_MEMB(_begin)(self), j = _c_MEMB(_begin)(other);
i.ref; _c_MEMB(_next)(&i), _c_MEMB(_next)(&j))
{
const _m_raw _rx = i_keytoraw(i.ref), _ry = i_keytoraw(j.ref);
if (!(i_eq((&_rx), (&_ry)))) return false;
}
return true;
}
#endif // _i_has_eq
#endif // IMPLEMENTATION

136
stc/priv/sort_prv.h Normal file
View File

@@ -0,0 +1,136 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// IWYU pragma: private
#ifdef _i_is_list
#define i_at(self, idx) (&((_m_value *)(self)->last)[idx])
#define i_at_mut i_at
#elif !defined i_at
#define i_at(self, idx) _c_MEMB(_at)(self, idx)
#define i_at_mut(self, idx) _c_MEMB(_at_mut)(self, idx)
#endif
STC_API void _c_MEMB(_sort_lowhigh)(Self* self, isize lo, isize hi);
#ifdef _i_is_array
STC_API isize _c_MEMB(_lower_bound_range)(const Self* self, const _m_raw raw, isize start, isize end);
STC_API isize _c_MEMB(_binary_search_range)(const Self* self, const _m_raw raw, isize start, isize end);
static inline void _c_MEMB(_sort)(Self* arr, isize n)
{ _c_MEMB(_sort_lowhigh)(arr, 0, n - 1); }
static inline isize // c_NPOS = not found
_c_MEMB(_lower_bound)(const Self* arr, const _m_raw raw, isize n)
{ return _c_MEMB(_lower_bound_range)(arr, raw, 0, n); }
static inline isize // c_NPOS = not found
_c_MEMB(_binary_search)(const Self* arr, const _m_raw raw, isize n)
{ return _c_MEMB(_binary_search_range)(arr, raw, 0, n); }
#elif !defined _i_is_list
STC_API isize _c_MEMB(_lower_bound_range)(const Self* self, const _m_raw raw, isize start, isize end);
STC_API isize _c_MEMB(_binary_search_range)(const Self* self, const _m_raw raw, isize start, isize end);
static inline void _c_MEMB(_sort)(Self* self)
{ _c_MEMB(_sort_lowhigh)(self, 0, _c_MEMB(_size)(self) - 1); }
static inline isize // c_NPOS = not found
_c_MEMB(_lower_bound)(const Self* self, const _m_raw raw)
{ return _c_MEMB(_lower_bound_range)(self, raw, 0, _c_MEMB(_size)(self)); }
static inline isize // c_NPOS = not found
_c_MEMB(_binary_search)(const Self* self, const _m_raw raw)
{ return _c_MEMB(_binary_search_range)(self, raw, 0, _c_MEMB(_size)(self)); }
#endif
/* -------------------------- IMPLEMENTATION ------------------------- */
#if defined i_implement
static void _c_MEMB(_insertsort_lowhigh)(Self* self, isize lo, isize hi) {
for (isize j = lo, i = lo + 1; i <= hi; j = i, ++i) {
_m_value x = *i_at(self, i);
_m_raw rx = i_keytoraw((&x));
while (j >= 0) {
_m_raw ry = i_keytoraw(i_at(self, j));
if (!(i_less((&rx), (&ry)))) break;
*i_at_mut(self, j + 1) = *i_at(self, j);
--j;
}
*i_at_mut(self, j + 1) = x;
}
}
STC_DEF void _c_MEMB(_sort_lowhigh)(Self* self, isize lo, isize hi) {
isize i = lo, j;
while (lo < hi) {
_m_raw pivot = i_keytoraw(i_at(self, (isize)(lo + (hi - lo)*7LL/16))), rx;
j = hi;
do {
do { rx = i_keytoraw(i_at(self, i)); } while ((i_less((&rx), (&pivot))) && ++i);
do { rx = i_keytoraw(i_at(self, j)); } while ((i_less((&pivot), (&rx))) && --j);
if (i > j) break;
c_swap(i_at_mut(self, i), i_at_mut(self, j));
++i; --j;
} while (i <= j);
if (j - lo > hi - i) {
c_swap(&lo, &i);
c_swap(&hi, &j);
}
if (j - lo > 64) _c_MEMB(_sort_lowhigh)(self, lo, j);
else if (j > lo) _c_MEMB(_insertsort_lowhigh)(self, lo, j);
lo = i;
}
}
#ifndef _i_is_list
STC_DEF isize // c_NPOS = not found
_c_MEMB(_lower_bound_range)(const Self* self, const _m_raw raw, isize start, isize end) {
isize count = end - start, step = count/2;
while (count > 0) {
const _m_raw rx = i_keytoraw(i_at(self, start + step));
if (i_less((&rx), (&raw))) {
start += step + 1;
count -= step + 1;
step = count*7/8;
} else {
count = step;
step = count/8;
}
}
return start >= end ? c_NPOS : start;
}
STC_DEF isize // c_NPOS = not found
_c_MEMB(_binary_search_range)(const Self* self, const _m_raw raw, isize start, isize end) {
isize res = _c_MEMB(_lower_bound_range)(self, raw, start, end);
if (res != c_NPOS) {
const _m_raw rx = i_keytoraw(i_at(self, res));
if (i_less((&raw), (&rx))) res = c_NPOS;
}
return res;
}
#endif // !_i_is_list
#endif // IMPLEMENTATION
#undef i_at
#undef i_at_mut

297
stc/priv/template.h Normal file
View File

@@ -0,0 +1,297 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// IWYU pragma: private
#ifndef _i_template
#define _i_template
#ifndef STC_TEMPLATE_H_INCLUDED
#define STC_TEMPLATE_H_INCLUDED
#define _c_MEMB(name) c_JOIN(Self, name)
#define _c_DEFTYPES(macro, SELF, ...) macro(SELF, __VA_ARGS__)
#define _m_value _c_MEMB(_value)
#define _m_key _c_MEMB(_key)
#define _m_mapped _c_MEMB(_mapped)
#define _m_rmapped _c_MEMB(_rmapped)
#define _m_raw _c_MEMB(_raw)
#define _m_keyraw _c_MEMB(_keyraw)
#define _m_iter _c_MEMB(_iter)
#define _m_result _c_MEMB(_result)
#define _m_node _c_MEMB(_node)
#define c_OPTION(flag) ((i_opt) & (flag))
#define c_declared (1<<0)
#define c_no_atomic (1<<1)
#define c_arc2 (1<<2)
#define c_no_clone (1<<3)
#define c_no_hash (1<<4)
#define c_use_cmp (1<<5)
#define c_use_eq (1<<6)
#define c_cmpclass (1<<7)
#define c_keyclass (1<<8)
#define c_valclass (1<<9)
#define c_keypro (1<<10)
#define c_valpro (1<<11)
#endif
#if defined i_rawclass // [deprecated]
#define i_cmpclass i_rawclass
#endif
#if defined T && !defined i_type
#define i_type T
#endif
#if defined i_type && c_NUMARGS(i_type) > 1
#define Self c_GETARG(1, i_type)
#define i_key c_GETARG(2, i_type)
#if c_NUMARGS(i_type) == 3
#if defined _i_is_map
#define i_val c_GETARG(3, i_type)
#else
#define i_opt c_GETARG(3, i_type)
#endif
#elif c_NUMARGS(i_type) == 4
#define i_val c_GETARG(3, i_type)
#define i_opt c_GETARG(4, i_type)
#endif
#elif !defined Self && defined i_type
#define Self i_type
#elif !defined Self
#define Self c_JOIN(_i_prefix, i_tag)
#endif
#if defined i_aux && c_NUMARGS(i_aux) == 2
// shorthand for defining i_aux AND i_allocator as a one-liner combo.
#define _i_aux_alloc
#define _i_aux_def c_GETARG(1, i_aux) aux;
#undef i_allocator // override:
#define i_allocator c_GETARG(2, i_aux)
#elif defined i_aux
#define _i_aux_def i_aux aux;
#else
#define _i_aux_def
#endif
#if c_OPTION(c_declared)
#define i_declared
#endif
#if c_OPTION(c_no_hash)
#define i_no_hash
#endif
#if c_OPTION(c_use_cmp)
#define i_use_cmp
#endif
#if c_OPTION(c_use_eq)
#define i_use_eq
#endif
#if c_OPTION(c_no_clone) || defined _i_is_arc
#define i_no_clone
#endif
#if c_OPTION(c_keyclass)
#define i_keyclass i_key
#endif
#if c_OPTION(c_valclass)
#define i_valclass i_val
#endif
#if c_OPTION(c_cmpclass)
#define i_cmpclass i_key
#define i_use_cmp
#endif
#if c_OPTION(c_keypro)
#define i_keypro i_key
#endif
#if c_OPTION(c_valpro)
#define i_valpro i_val
#endif
#if defined i_keypro
#define i_keyclass i_keypro
#define i_cmpclass c_JOIN(i_keypro, _raw)
#endif
#if defined i_cmpclass
#define i_keyraw i_cmpclass
#if !(defined i_key || defined i_keyclass)
#define i_key i_cmpclass
#endif
#elif defined i_keyclass && !defined i_keyraw
// Special: When only i_keyclass is defined, also define i_cmpclass to the same.
// Do not define i_keyraw here, otherwise _from() / _toraw() is expected to exist.
#define i_cmpclass i_key
#endif
// Bind to i_key "class members": _clone, _drop, _from and _toraw (when conditions are met).
#if defined i_keyclass
#ifndef i_key
#define i_key i_keyclass
#endif
#if !defined i_keyclone && !defined i_no_clone
#define i_keyclone c_JOIN(i_keyclass, _clone)
#endif
#ifndef i_keydrop
#define i_keydrop c_JOIN(i_keyclass, _drop)
#endif
#if !defined i_keyfrom && defined i_keyraw
#define i_keyfrom c_JOIN(i_keyclass, _from)
#endif
#if !defined i_keytoraw && defined i_keyraw
#define i_keytoraw c_JOIN(i_keyclass, _toraw)
#endif
#endif
// Define when container has support for sorting (cmp) and linear search (eq)
#if defined i_use_cmp || defined i_cmp || defined i_less
#define _i_has_cmp
#endif
#if defined i_use_cmp || defined i_cmp || defined i_use_eq || defined i_eq
#define _i_has_eq
#endif
// Bind to i_cmpclass "class members": _cmp, _eq and _hash (when conditions are met).
#if defined i_cmpclass
#if !(defined i_cmp || defined i_less) && (defined i_use_cmp || defined _i_sorted)
#define i_cmp c_JOIN(i_cmpclass, _cmp)
#endif
#if !defined i_eq && (defined i_use_eq || defined i_hash || defined _i_is_hash)
#define i_eq c_JOIN(i_cmpclass, _eq)
#endif
#if !(defined i_hash || defined i_no_hash)
#define i_hash c_JOIN(i_cmpclass, _hash)
#endif
#endif
#if !defined i_key
#error "No i_key defined"
#elif defined i_keyraw && !(c_OPTION(c_cmpclass) || defined i_keytoraw)
#error "If i_cmpclass / i_keyraw is defined, i_keytoraw must be defined too"
#elif !defined i_no_clone && (defined i_keyclone ^ defined i_keydrop)
#error "Both i_keyclone and i_keydrop must be defined, if any (unless i_no_clone defined)."
#elif defined i_from || defined i_drop
#error "i_from / i_drop not supported. Use i_keyfrom/i_keydrop"
#elif defined i_keyto || defined i_valto
#error i_keyto / i_valto not supported. Use i_keytoraw / i_valtoraw
#elif defined i_keyraw && defined i_use_cmp && !defined _i_has_cmp
#error "For smap / sset / pqueue, i_cmp or i_less must be defined when i_keyraw is defined."
#endif
// Fill in missing i_eq, i_less, i_cmp functions with defaults.
#if !defined i_eq && defined i_cmp
#define i_eq(x, y) (i_cmp(x, y)) == 0
#elif !defined i_eq
#define i_eq(x, y) *x == *y // works for integral types
#endif
#if !defined i_less && defined i_cmp
#define i_less(x, y) (i_cmp(x, y)) < 0
#elif !defined i_less
#define i_less(x, y) *x < *y // works for integral types
#endif
#if !defined i_cmp && defined i_less
#define i_cmp(x, y) (i_less(y, x)) - (i_less(x, y))
#endif
#if !(defined i_hash || defined i_no_hash)
#define i_hash c_default_hash
#endif
#define i_no_emplace
#ifndef i_tag
#define i_tag i_key
#endif
#if !defined i_keyfrom
#define i_keyfrom c_default_clone
#else
#undef i_no_emplace
#endif
#ifndef i_keyraw
#define i_keyraw i_key
#endif
#ifndef i_keytoraw
#define i_keytoraw c_default_toraw
#endif
#ifndef i_keyclone
#define i_keyclone c_default_clone
#endif
#ifndef i_keydrop
#define i_keydrop c_default_drop
#endif
#if defined _i_is_map // ---- process hashmap/sortedmap value i_val, ... ----
#if defined i_valpro
#define i_valclass i_valpro
#define i_valraw c_JOIN(i_valpro, _raw)
#endif
#ifdef i_valclass
#ifndef i_val
#define i_val i_valclass
#endif
#if !defined i_valclone && !defined i_no_clone
#define i_valclone c_JOIN(i_valclass, _clone)
#endif
#ifndef i_valdrop
#define i_valdrop c_JOIN(i_valclass, _drop)
#endif
#if !defined i_valfrom && defined i_valraw
#define i_valfrom c_JOIN(i_valclass, _from)
#endif
#if !defined i_valtoraw && defined i_valraw
#define i_valtoraw c_JOIN(i_valclass, _toraw)
#endif
#endif
#ifndef i_val
#error "i_val* must be defined for maps"
#elif defined i_valraw && !defined i_valtoraw
#error "If i_valraw is defined, i_valtoraw must be defined too"
#elif !defined i_no_clone && (defined i_valclone ^ defined i_valdrop)
#error "Both i_valclone and i_valdrop must be defined, if any"
#endif
#if !defined i_valfrom
#define i_valfrom c_default_clone
#else
#undef i_no_emplace
#endif
#ifndef i_valraw
#define i_valraw i_val
#endif
#ifndef i_valtoraw
#define i_valtoraw c_default_toraw
#endif
#ifndef i_valclone
#define i_valclone c_default_clone
#endif
#ifndef i_valdrop
#define i_valdrop c_default_drop
#endif
#endif // !_i_is_map
#ifndef i_val
#define i_val i_key
#endif
#ifndef i_valraw
#define i_valraw i_keyraw
#endif
#endif // STC_TEMPLATE_H_INCLUDED

71
stc/priv/template2.h Normal file
View File

@@ -0,0 +1,71 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// IWYU pragma: private
#undef T // alias for i_type
#undef i_type
#undef i_class
#undef i_tag
#undef i_opt
#undef i_capacity
#undef i_key
#undef i_keypro // Replaces next two
#undef i_key_str // [deprecated]
#undef i_key_arcbox // [deprecated]
#undef i_keyclass
#undef i_cmpclass // define i_keyraw, and bind i_cmp, i_eq, i_hash "class members"
#undef i_rawclass // [deprecated] for i_cmpclass
#undef i_keyclone
#undef i_keydrop
#undef i_keyraw
#undef i_keyfrom
#undef i_keytoraw
#undef i_cmp
#undef i_less
#undef i_eq
#undef i_hash
#undef i_val
#undef i_valpro // Replaces next two
#undef i_val_str // [deprecated]
#undef i_val_arcbox // [deprecated]
#undef i_valclass
#undef i_valclone
#undef i_valdrop
#undef i_valraw
#undef i_valfrom
#undef i_valtoraw
#undef i_use_cmp
#undef i_use_eq
#undef i_no_hash
#undef i_no_clone
#undef i_no_emplace
#undef i_declared
#undef _i_aux_def
#undef _i_has_cmp
#undef _i_has_eq
#undef _i_prefix
#undef _i_template
#undef Self

482
stc/priv/ucd_prv.c Normal file
View File

@@ -0,0 +1,482 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef STC_UCD_PRV_C_INCLUDED
#define STC_UCD_PRV_C_INCLUDED
#include <ctype.h>
// ------------------------------------------------------
// The following requires linking with utf8 symbols.
// To call them, either define i_import before including
// one of cstr, csview, zsview, or link with src/libstc.o.
enum {
U8G_Cc, U8G_Lt, U8G_Nd, U8G_Nl,
U8G_Pc, U8G_Pd, U8G_Pf, U8G_Pi,
U8G_Sc, U8G_Zl, U8G_Zp, U8G_Zs,
U8G_Arabic, U8G_Bengali, U8G_Cyrillic,
U8G_Devanagari, U8G_Georgian, U8G_Greek,
U8G_Han, U8G_Hiragana, U8G_Katakana,
U8G_Latin, U8G_Thai,
U8G_SIZE
};
static bool utf8_isgroup(int group, uint32_t c);
static bool utf8_isalpha(uint32_t c) {
static int16_t groups[] = {U8G_Latin, U8G_Nl, U8G_Cyrillic, U8G_Han, U8G_Devanagari,
U8G_Arabic, U8G_Bengali, U8G_Hiragana, U8G_Katakana,
U8G_Thai, U8G_Greek, U8G_Georgian};
if (c < 128) return isalpha((int)c) != 0;
for (int j=0; j < (int)(sizeof groups/sizeof groups[0]); ++j)
if (utf8_isgroup(groups[j], c))
return true;
return false;
}
static bool utf8_iscased(uint32_t c) {
if (c < 128) return isalpha((int)c) != 0;
return utf8_islower(c) || utf8_isupper(c) ||
utf8_isgroup(U8G_Lt, c);
}
static bool utf8_isalnum(uint32_t c) {
if (c < 128) return isalnum((int)c) != 0;
return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c);
}
static bool utf8_isword(uint32_t c) {
if (c < 128) return (isalnum((int)c) != 0) | (c == '_');
return utf8_isalpha(c) || utf8_isgroup(U8G_Nd, c) ||
utf8_isgroup(U8G_Pc, c);
}
static bool utf8_isblank(uint32_t c) {
if (c < 128) return (c == ' ') | (c == '\t');
return utf8_isgroup(U8G_Zs, c);
}
static bool utf8_isspace(uint32_t c) {
if (c < 128) return isspace((int)c) != 0;
return ((c == 8232) | (c == 8233)) || utf8_isgroup(U8G_Zs, c);
}
/* The tables below are extracted from the RE2 library */
typedef struct {
uint16_t lo;
uint16_t hi;
} URange16;
static const URange16 Cc_range16[] = { // Control
{ 0, 31 },
{ 127, 159 },
};
static const URange16 Lt_range16[] = { // Title case
{ 453, 453 },
{ 456, 456 },
{ 459, 459 },
{ 498, 498 },
{ 8072, 8079 },
{ 8088, 8095 },
{ 8104, 8111 },
{ 8124, 8124 },
{ 8140, 8140 },
{ 8188, 8188 },
};
static const URange16 Nd_range16[] = { // Decimal number
{ 48, 57 },
{ 1632, 1641 },
{ 1776, 1785 },
{ 1984, 1993 },
{ 2406, 2415 },
{ 2534, 2543 },
{ 2662, 2671 },
{ 2790, 2799 },
{ 2918, 2927 },
{ 3046, 3055 },
{ 3174, 3183 },
{ 3302, 3311 },
{ 3430, 3439 },
{ 3558, 3567 },
{ 3664, 3673 },
{ 3792, 3801 },
{ 3872, 3881 },
{ 4160, 4169 },
{ 4240, 4249 },
{ 6112, 6121 },
{ 6160, 6169 },
{ 6470, 6479 },
{ 6608, 6617 },
{ 6784, 6793 },
{ 6800, 6809 },
{ 6992, 7001 },
{ 7088, 7097 },
{ 7232, 7241 },
{ 7248, 7257 },
{ 42528, 42537 },
{ 43216, 43225 },
{ 43264, 43273 },
{ 43472, 43481 },
{ 43504, 43513 },
{ 43600, 43609 },
{ 44016, 44025 },
{ 65296, 65305 },
};
static const URange16 Nl_range16[] = { // Number letter
{ 5870, 5872 },
{ 8544, 8578 },
{ 8581, 8584 },
{ 12295, 12295 },
{ 12321, 12329 },
{ 12344, 12346 },
{ 42726, 42735 },
};
static const URange16 Pc_range16[] = { // Connector punctuation
{ 95, 95 },
{ 8255, 8256 },
{ 8276, 8276 },
{ 65075, 65076 },
{ 65101, 65103 },
{ 65343, 65343 },
};
static const URange16 Pd_range16[] = { // Dash punctuation
{ 45, 45 },
{ 1418, 1418 },
{ 1470, 1470 },
{ 5120, 5120 },
{ 6150, 6150 },
{ 8208, 8213 },
{ 11799, 11799 },
{ 11802, 11802 },
{ 11834, 11835 },
{ 11840, 11840 },
{ 11869, 11869 },
{ 12316, 12316 },
{ 12336, 12336 },
{ 12448, 12448 },
{ 65073, 65074 },
{ 65112, 65112 },
{ 65123, 65123 },
{ 65293, 65293 },
};
static const URange16 Pf_range16[] = { // Final punctuation
{ 187, 187 },
{ 8217, 8217 },
{ 8221, 8221 },
{ 8250, 8250 },
{ 11779, 11779 },
{ 11781, 11781 },
{ 11786, 11786 },
{ 11789, 11789 },
{ 11805, 11805 },
{ 11809, 11809 },
};
static const URange16 Pi_range16[] = { // Initial punctuation
{ 171, 171 },
{ 8216, 8216 },
{ 8219, 8220 },
{ 8223, 8223 },
{ 8249, 8249 },
{ 11778, 11778 },
{ 11780, 11780 },
{ 11785, 11785 },
{ 11788, 11788 },
{ 11804, 11804 },
{ 11808, 11808 },
};
static const URange16 Sc_range16[] = { // Currency symbol
{ 36, 36 },
{ 162, 165 },
{ 1423, 1423 },
{ 1547, 1547 },
{ 2046, 2047 },
{ 2546, 2547 },
{ 2555, 2555 },
{ 2801, 2801 },
{ 3065, 3065 },
{ 3647, 3647 },
{ 6107, 6107 },
{ 8352, 8384 },
{ 43064, 43064 },
{ 65020, 65020 },
{ 65129, 65129 },
{ 65284, 65284 },
{ 65504, 65505 },
{ 65509, 65510 },
};
static const URange16 Zl_range16[] = { // Line separator
{ 8232, 8232 },
};
static const URange16 Zp_range16[] = { // Paragraph separator
{ 8233, 8233 },
};
static const URange16 Zs_range16[] = { // Space separator
{ 32, 32 },
{ 160, 160 },
{ 5760, 5760 },
{ 8192, 8202 },
{ 8239, 8239 },
{ 8287, 8287 },
{ 12288, 12288 },
};
static const URange16 Arabic_range16[] = {
{ 1536, 1540 },
{ 1542, 1547 },
{ 1549, 1562 },
{ 1564, 1566 },
{ 1568, 1599 },
{ 1601, 1610 },
{ 1622, 1647 },
{ 1649, 1756 },
{ 1758, 1791 },
{ 1872, 1919 },
{ 2160, 2190 },
{ 2192, 2193 },
{ 2200, 2273 },
{ 2275, 2303 },
{ 64336, 64450 },
{ 64467, 64829 },
{ 64832, 64911 },
{ 64914, 64967 },
{ 64975, 64975 },
{ 65008, 65023 },
{ 65136, 65140 },
{ 65142, 65276 },
};
static const URange16 Bengali_range16[] = {
{ 2432, 2435 },
{ 2437, 2444 },
{ 2447, 2448 },
{ 2451, 2472 },
{ 2474, 2480 },
{ 2482, 2482 },
{ 2486, 2489 },
{ 2492, 2500 },
{ 2503, 2504 },
{ 2507, 2510 },
{ 2519, 2519 },
{ 2524, 2525 },
{ 2527, 2531 },
{ 2534, 2558 },
};
static const URange16 Cyrillic_range16[] = {
{ 1024, 1156 },
{ 1159, 1327 },
{ 7296, 7304 },
{ 7467, 7467 },
{ 7544, 7544 },
{ 11744, 11775 },
{ 42560, 42655 },
{ 65070, 65071 },
};
static const URange16 Devanagari_range16[] = {
{ 2304, 2384 },
{ 2389, 2403 },
{ 2406, 2431 },
{ 43232, 43263 },
};
static const URange16 Georgian_range16[] = {
{ 4256, 4293 },
{ 4295, 4295 },
{ 4301, 4301 },
{ 4304, 4346 },
{ 4348, 4351 },
{ 7312, 7354 },
{ 7357, 7359 },
{ 11520, 11557 },
{ 11559, 11559 },
{ 11565, 11565 },
};
static const URange16 Greek_range16[] = {
{ 880, 883 },
{ 885, 887 },
{ 890, 893 },
{ 895, 895 },
{ 900, 900 },
{ 902, 902 },
{ 904, 906 },
{ 908, 908 },
{ 910, 929 },
{ 931, 993 },
{ 1008, 1023 },
{ 7462, 7466 },
{ 7517, 7521 },
{ 7526, 7530 },
{ 7615, 7615 },
{ 7936, 7957 },
{ 7960, 7965 },
{ 7968, 8005 },
{ 8008, 8013 },
{ 8016, 8023 },
{ 8025, 8025 },
{ 8027, 8027 },
{ 8029, 8029 },
{ 8031, 8061 },
{ 8064, 8116 },
{ 8118, 8132 },
{ 8134, 8147 },
{ 8150, 8155 },
{ 8157, 8175 },
{ 8178, 8180 },
{ 8182, 8190 },
{ 8486, 8486 },
{ 43877, 43877 },
};
static const URange16 Han_range16[] = {
{ 11904, 11929 },
{ 11931, 12019 },
{ 12032, 12245 },
{ 12293, 12293 },
{ 12295, 12295 },
{ 12321, 12329 },
{ 12344, 12347 },
{ 13312, 19903 },
{ 19968, 40959 },
{ 63744, 64109 },
{ 64112, 64217 },
};
static const URange16 Hiragana_range16[] = {
{ 12353, 12438 },
{ 12445, 12447 },
};
static const URange16 Katakana_range16[] = {
{ 12449, 12538 },
{ 12541, 12543 },
{ 12784, 12799 },
{ 13008, 13054 },
{ 13056, 13143 },
{ 65382, 65391 },
{ 65393, 65437 },
};
static const URange16 Latin_range16[] = {
{ 65, 90 },
{ 97, 122 },
{ 170, 170 },
{ 186, 186 },
{ 192, 214 },
{ 216, 246 },
{ 248, 696 },
{ 736, 740 },
{ 7424, 7461 },
{ 7468, 7516 },
{ 7522, 7525 },
{ 7531, 7543 },
{ 7545, 7614 },
{ 7680, 7935 },
{ 8305, 8305 },
{ 8319, 8319 },
{ 8336, 8348 },
{ 8490, 8491 },
{ 8498, 8498 },
{ 8526, 8526 },
{ 8544, 8584 },
{ 11360, 11391 },
{ 42786, 42887 },
{ 42891, 42954 },
{ 42960, 42961 },
{ 42963, 42963 },
{ 42965, 42969 },
{ 42994, 43007 },
{ 43824, 43866 },
{ 43868, 43876 },
{ 43878, 43881 },
{ 64256, 64262 },
{ 65313, 65338 },
{ 65345, 65370 },
};
static const URange16 Thai_range16[] = {
{ 3585, 3642 },
{ 3648, 3675 },
};
#ifdef __cplusplus
#define _e_arg(k, v) v
#else
#define _e_arg(k, v) [k] = v
#endif
#define UNI_ENTRY(Code) { Code##_range16, sizeof(Code##_range16)/sizeof(URange16) }
typedef struct {
const URange16 *r16;
int nr16;
} UGroup;
static const UGroup _utf8_unicode_groups[U8G_SIZE] = {
_e_arg(U8G_Cc, UNI_ENTRY(Cc)),
_e_arg(U8G_Lt, UNI_ENTRY(Lt)),
_e_arg(U8G_Nd, UNI_ENTRY(Nd)),
_e_arg(U8G_Nl, UNI_ENTRY(Nl)),
_e_arg(U8G_Pc, UNI_ENTRY(Pc)),
_e_arg(U8G_Pd, UNI_ENTRY(Pd)),
_e_arg(U8G_Pf, UNI_ENTRY(Pf)),
_e_arg(U8G_Pi, UNI_ENTRY(Pi)),
_e_arg(U8G_Sc, UNI_ENTRY(Sc)),
_e_arg(U8G_Zl, UNI_ENTRY(Zl)),
_e_arg(U8G_Zp, UNI_ENTRY(Zp)),
_e_arg(U8G_Zs, UNI_ENTRY(Zs)),
_e_arg(U8G_Arabic, UNI_ENTRY(Arabic)),
_e_arg(U8G_Bengali, UNI_ENTRY(Bengali)),
_e_arg(U8G_Cyrillic, UNI_ENTRY(Cyrillic)),
_e_arg(U8G_Devanagari, UNI_ENTRY(Devanagari)),
_e_arg(U8G_Georgian, UNI_ENTRY(Georgian)),
_e_arg(U8G_Greek, UNI_ENTRY(Greek)),
_e_arg(U8G_Han, UNI_ENTRY(Han)),
_e_arg(U8G_Hiragana, UNI_ENTRY(Hiragana)),
_e_arg(U8G_Katakana, UNI_ENTRY(Katakana)),
_e_arg(U8G_Latin, UNI_ENTRY(Latin)),
_e_arg(U8G_Thai, UNI_ENTRY(Thai)),
};
static bool utf8_isgroup(int group, uint32_t c) {
for (int j=0; j<_utf8_unicode_groups[group].nr16; ++j) {
if (c < _utf8_unicode_groups[group].r16[j].lo)
return false;
if (c <= _utf8_unicode_groups[group].r16[j].hi)
return true;
}
return false;
}
#endif // STC_UCD_PRV_C_INCLUDED

177
stc/priv/utf8_prv.c Normal file
View File

@@ -0,0 +1,177 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef STC_UTF8_PRV_C_INCLUDED
#define STC_UTF8_PRV_C_INCLUDED
#include "utf8_tab.c"
const uint8_t utf8_dtab[] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
12,36,12,12,12,12,12,12,12,12,12,12,
};
int utf8_encode(char *out, uint32_t c) {
if (c < 0x80U) {
out[0] = (char) c;
return 1;
} else if (c < 0x0800U) {
out[0] = (char) ((c>>6 & 0x1F) | 0xC0);
out[1] = (char) ((c & 0x3F) | 0x80);
return 2;
} else if (c < 0x010000U) {
if ((c < 0xD800U) | (c >= 0xE000U)) {
out[0] = (char) ((c>>12 & 0x0F) | 0xE0);
out[1] = (char) ((c>>6 & 0x3F) | 0x80);
out[2] = (char) ((c & 0x3F) | 0x80);
return 3;
}
} else if (c < 0x110000U) {
out[0] = (char) ((c>>18 & 0x07) | 0xF0);
out[1] = (char) ((c>>12 & 0x3F) | 0x80);
out[2] = (char) ((c>>6 & 0x3F) | 0x80);
out[3] = (char) ((c & 0x3F) | 0x80);
return 4;
}
return 0;
}
uint32_t utf8_peek_at(const char* s, isize offset) {
return utf8_peek(utf8_offset(s, offset));
}
bool utf8_valid(const char* s) {
utf8_decode_t d = {.state=0};
while ((utf8_decode(&d, (uint8_t)*s) != utf8_REJECT) & (*s != '\0'))
++s;
return d.state == utf8_ACCEPT;
}
bool utf8_valid_n(const char* s, isize nbytes) {
utf8_decode_t d = {.state=0};
for (; nbytes-- != 0; ++s)
if ((utf8_decode(&d, (uint8_t)*s) == utf8_REJECT) | (*s == '\0'))
break;
return d.state == utf8_ACCEPT;
}
#define _binsearch(c, at, N, ret) do { \
int _n = N, _i = 0, _mid = _n/2; \
while (_n > 0) { \
if (at(_i + _mid) < c) { \
_i += _mid + 1; \
_n -= _mid + 1; \
_mid = _n*7/8; \
} else { \
_n = _mid; \
_mid = _n/8; \
} \
} \
ret = (_i >= N || at(_i) < c) ? N : _i; \
} while (0)
uint32_t utf8_casefold(uint32_t c) {
#define _at_fold(idx) casemappings[idx].c2
int i;
_binsearch(c, _at_fold, casefold_len, i);
if (i < casefold_len && casemappings[i].c1 <= c && c <= casemappings[i].c2) {
const struct CaseMapping entry = casemappings[i];
int d = entry.m2 - entry.c2;
if (d == 1) return c + ((entry.c2 & 1U) == (c & 1U));
return (uint32_t)((int)c + d);
}
return c;
}
uint32_t utf8_tolower(uint32_t c) {
#define _at_upper(idx) casemappings[upcase_ind[idx]].c2
int i, n = c_countof(upcase_ind);
_binsearch(c, _at_upper, n, i);
if (i < n) {
const struct CaseMapping entry = casemappings[upcase_ind[i]];
if (entry.c1 <= c && c <= entry.c2) {
int d = entry.m2 - entry.c2;
if (d == 1) return c + ((entry.c2 & 1U) == (c & 1U));
return (uint32_t)((int)c + d);
}
}
return c;
}
uint32_t utf8_toupper(uint32_t c) {
#define _at_lower(idx) casemappings[lowcase_ind[idx]].m2
int i, n = c_countof(lowcase_ind);
_binsearch(c, _at_lower, n, i);
if (i < n) {
const struct CaseMapping entry = casemappings[lowcase_ind[i]];
int d = entry.m2 - entry.c2;
if (entry.c1 + (uint32_t)d <= c && c <= entry.m2) {
if (d == 1) return c - ((entry.m2 & 1U) == (c & 1U));
return (uint32_t)((int)c - d);
}
}
return c;
}
int utf8_decode_codepoint(utf8_decode_t* d, const char* s, const char* end) { // s < end
const char* start = s;
do switch (utf8_decode(d, (uint8_t)*s++)) {
case utf8_ACCEPT: return (int)(s - start);
case utf8_REJECT: goto recover;
} while (s != end);
recover: // non-complete utf8 is also treated as utf8_REJECT
d->state = utf8_ACCEPT;
d->codep = 0xFFFD;
//return 1;
int n = (int)(s - start);
return n > 2 ? n - 1 : 1;
}
int utf8_icompare(const csview s1, const csview s2) {
utf8_decode_t d1 = {.state=0}, d2 = {.state=0};
const char *e1 = s1.buf + s1.size, *e2 = s2.buf + s2.size;
isize j1 = 0, j2 = 0;
while ((j1 < s1.size) & (j2 < s2.size)) {
if (s2.buf[j2] == '\0') return s1.buf[j1];
j1 += utf8_decode_codepoint(&d1, s1.buf + j1, e1);
j2 += utf8_decode_codepoint(&d2, s2.buf + j2, e2);
int32_t c = (int32_t)utf8_casefold(d1.codep) - (int32_t)utf8_casefold(d2.codep);
if (c != 0) return (int)c;
}
return (int)(s1.size - s2.size);
}
#endif // STC_UTF8_PRV_C_INCLUDED

127
stc/priv/utf8_prv.h Normal file
View File

@@ -0,0 +1,127 @@
/* MIT License
*
* Copyright (c) 2025 Tyge Løvset
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// IWYU pragma: private, include "stc/utf8.h"
#ifndef STC_UTF8_PRV_H_INCLUDED
#define STC_UTF8_PRV_H_INCLUDED
// The following functions assume valid utf8 strings:
/* number of bytes in the utf8 codepoint from s */
STC_INLINE int utf8_chr_size(const char *s) {
unsigned b = (uint8_t)*s;
if (b < 0x80) return 1;
/*if (b < 0xC2) return 0;*/
if (b < 0xE0) return 2;
if (b < 0xF0) return 3;
/*if (b < 0xF5)*/ return 4;
/*return 0;*/
}
/* number of codepoints in the utf8 string s */
STC_INLINE isize utf8_count(const char *s) {
isize size = 0;
while (*s)
size += (*++s & 0xC0) != 0x80;
return size;
}
STC_INLINE isize utf8_count_n(const char *s, isize nbytes) {
isize size = 0;
while ((nbytes-- != 0) & (*s != 0)) {
size += (*++s & 0xC0) != 0x80;
}
return size;
}
STC_INLINE const char* utf8_at(const char *s, isize u8pos) {
while ((u8pos > 0) & (*s != 0))
u8pos -= (*++s & 0xC0) != 0x80;
return s;
}
STC_INLINE const char* utf8_offset(const char* s, isize u8pos) {
int inc = 1;
if (u8pos < 0) u8pos = -u8pos, inc = -1;
while (u8pos && *s)
u8pos -= (*(s += inc) & 0xC0) != 0x80;
return s;
}
STC_INLINE isize utf8_to_index(const char* s, isize u8pos)
{ return utf8_at(s, u8pos) - s; }
STC_INLINE csview utf8_subview(const char *s, isize u8pos, isize u8len) {
csview span;
span.buf = utf8_at(s, u8pos);
span.size = utf8_to_index(span.buf, u8len);
return span;
}
// ------------------------------------------------------
// The following requires linking with utf8 symbols.
// To call them, either define i_import before including
// one of cstr, csview, zsview, or link with src/libstc.o.
/* decode next utf8 codepoint. https://bjoern.hoehrmann.de/utf-8/decoder/dfa */
typedef struct { uint32_t state, codep; } utf8_decode_t;
extern const uint8_t utf8_dtab[]; /* utf8code.c */
#define utf8_ACCEPT 0
#define utf8_REJECT 12
extern bool utf8_valid(const char* s);
extern bool utf8_valid_n(const char* s, isize nbytes);
extern int utf8_encode(char *out, uint32_t c);
extern int utf8_decode_codepoint(utf8_decode_t* d, const char* s, const char* end);
extern int utf8_icompare(const csview s1, const csview s2);
extern uint32_t utf8_peek_at(const char* s, isize u8offset);
extern uint32_t utf8_casefold(uint32_t c);
extern uint32_t utf8_tolower(uint32_t c);
extern uint32_t utf8_toupper(uint32_t c);
STC_INLINE bool utf8_isupper(uint32_t c)
{ return utf8_tolower(c) != c; }
STC_INLINE bool utf8_islower(uint32_t c)
{ return utf8_toupper(c) != c; }
STC_INLINE uint32_t utf8_decode(utf8_decode_t* d, const uint32_t byte) {
const uint32_t type = utf8_dtab[byte];
d->codep = d->state ? (byte & 0x3fu) | (d->codep << 6)
: (0xffU >> type) & byte;
return d->state = utf8_dtab[256 + d->state + type];
}
STC_INLINE uint32_t utf8_peek(const char* s) {
utf8_decode_t d = {.state=0};
do {
utf8_decode(&d, (uint8_t)*s++);
} while (d.state > utf8_REJECT);
return d.state == utf8_ACCEPT ? d.codep : 0xFFFD;
}
/* case-insensitive utf8 string comparison */
STC_INLINE int utf8_icmp(const char* s1, const char* s2) {
return utf8_icompare(c_sv(s1, INTPTR_MAX), c_sv(s2, INTPTR_MAX));
}
#endif // STC_UTF8_PRV_H_INCLUDED

250
stc/priv/utf8_tab.c Normal file
View File

@@ -0,0 +1,250 @@
struct CaseMapping { uint16_t c1, c2, m2; };
static struct CaseMapping casemappings[] = {
{0x0041, 0x005A, 0x007A}, // A a (26) LATIN CAPITAL LETTER A
{0x00B5, 0x00B5, 0x03BC}, // µ μ ( 1) MICRO SIGN
{0x00C0, 0x00D6, 0x00F6}, // À à (23) LATIN CAPITAL LETTER A WITH GRAVE
{0x00D8, 0x00DE, 0x00FE}, // Ø ø ( 7) LATIN CAPITAL LETTER O WITH STROKE
{0x0100, 0x012E, 0x012F}, // Ā ā (24) LATIN CAPITAL LETTER A WITH MACRON
{0x0132, 0x0136, 0x0137}, // IJ ij ( 3) LATIN CAPITAL LIGATURE IJ
{0x0139, 0x0147, 0x0148}, // Ĺ ĺ ( 8) LATIN CAPITAL LETTER L WITH ACUTE
{0x014A, 0x0176, 0x0177}, // Ŋ ŋ (23) LATIN CAPITAL LETTER ENG
{0x0178, 0x0178, 0x00FF}, // Ÿ ÿ ( 1) LATIN CAPITAL LETTER Y WITH DIAERESIS
{0x0179, 0x017D, 0x017E}, // Ź ź ( 3) LATIN CAPITAL LETTER Z WITH ACUTE
{0x017F, 0x017F, 0x0073}, // ſ s ( 1) LATIN SMALL LETTER LONG S
{0x0181, 0x0181, 0x0253}, // Ɓ ɓ ( 1) LATIN CAPITAL LETTER B WITH HOOK
{0x0182, 0x0184, 0x0185}, // Ƃ ƃ ( 2) LATIN CAPITAL LETTER B WITH TOPBAR
{0x0186, 0x0186, 0x0254}, // Ɔ ɔ ( 1) LATIN CAPITAL LETTER OPEN O
{0x0187, 0x0187, 0x0188}, // Ƈ ƈ ( 1) LATIN CAPITAL LETTER C WITH HOOK
{0x0189, 0x018A, 0x0257}, // Ɖ ɖ ( 2) LATIN CAPITAL LETTER AFRICAN D
{0x018B, 0x018B, 0x018C}, // Ƌ ƌ ( 1) LATIN CAPITAL LETTER D WITH TOPBAR
{0x018E, 0x018E, 0x01DD}, // Ǝ ǝ ( 1) LATIN CAPITAL LETTER REVERSED E
{0x018F, 0x018F, 0x0259}, // Ə ə ( 1) LATIN CAPITAL LETTER SCHWA
{0x0190, 0x0190, 0x025B}, // Ɛ ɛ ( 1) LATIN CAPITAL LETTER OPEN E
{0x0191, 0x0191, 0x0192}, // Ƒ ƒ ( 1) LATIN CAPITAL LETTER F WITH HOOK
{0x0193, 0x0193, 0x0260}, // Ɠ ɠ ( 1) LATIN CAPITAL LETTER G WITH HOOK
{0x0194, 0x0194, 0x0263}, // Ɣ ɣ ( 1) LATIN CAPITAL LETTER GAMMA
{0x0196, 0x0196, 0x0269}, // Ɩ ɩ ( 1) LATIN CAPITAL LETTER IOTA
{0x0197, 0x0197, 0x0268}, // Ɨ ɨ ( 1) LATIN CAPITAL LETTER I WITH STROKE
{0x0198, 0x0198, 0x0199}, // Ƙ ƙ ( 1) LATIN CAPITAL LETTER K WITH HOOK
{0x019C, 0x019C, 0x026F}, // Ɯ ɯ ( 1) LATIN CAPITAL LETTER TURNED M
{0x019D, 0x019D, 0x0272}, // Ɲ ɲ ( 1) LATIN CAPITAL LETTER N WITH LEFT HOOK
{0x019F, 0x019F, 0x0275}, // Ɵ ɵ ( 1) LATIN CAPITAL LETTER O WITH MIDDLE TILDE
{0x01A0, 0x01A4, 0x01A5}, // Ơ ơ ( 3) LATIN CAPITAL LETTER O WITH HORN
{0x01A6, 0x01A6, 0x0280}, // Ʀ ʀ ( 1) LATIN LETTER YR
{0x01A7, 0x01A7, 0x01A8}, // Ƨ ƨ ( 1) LATIN CAPITAL LETTER TONE TWO
{0x01A9, 0x01A9, 0x0283}, // Ʃ ʃ ( 1) LATIN CAPITAL LETTER ESH
{0x01AC, 0x01AC, 0x01AD}, // Ƭ ƭ ( 1) LATIN CAPITAL LETTER T WITH HOOK
{0x01AE, 0x01AE, 0x0288}, // Ʈ ʈ ( 1) LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
{0x01AF, 0x01AF, 0x01B0}, // Ư ư ( 1) LATIN CAPITAL LETTER U WITH HORN
{0x01B1, 0x01B2, 0x028B}, // Ʊ ʊ ( 2) LATIN CAPITAL LETTER UPSILON
{0x01B3, 0x01B5, 0x01B6}, // Ƴ ƴ ( 2) LATIN CAPITAL LETTER Y WITH HOOK
{0x01B7, 0x01B7, 0x0292}, // Ʒ ʒ ( 1) LATIN CAPITAL LETTER EZH
{0x01B8, 0x01B8, 0x01B9}, // Ƹ ƹ ( 1) LATIN CAPITAL LETTER EZH REVERSED
{0x01BC, 0x01BC, 0x01BD}, // Ƽ ƽ ( 1) LATIN CAPITAL LETTER TONE FIVE
{0x01C4, 0x01C4, 0x01C6}, // DŽ dž ( 1) LATIN CAPITAL LETTER DZ WITH CARON
{0x01C5, 0x01C5, 0x01C6}, // Dž dž ( 1) LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
{0x01C7, 0x01C7, 0x01C9}, // LJ lj ( 1) LATIN CAPITAL LETTER LJ
{0x01C8, 0x01C8, 0x01C9}, // Lj lj ( 1) LATIN CAPITAL LETTER L WITH SMALL LETTER J
{0x01CA, 0x01CA, 0x01CC}, // NJ nj ( 1) LATIN CAPITAL LETTER NJ
{0x01CB, 0x01DB, 0x01DC}, // Nj nj ( 9) LATIN CAPITAL LETTER N WITH SMALL LETTER J
{0x01DE, 0x01EE, 0x01EF}, // Ǟ ǟ ( 9) LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
{0x01F1, 0x01F1, 0x01F3}, // DZ dz ( 1) LATIN CAPITAL LETTER DZ
{0x01F2, 0x01F4, 0x01F5}, // Dz dz ( 2) LATIN CAPITAL LETTER D WITH SMALL LETTER Z
{0x01F6, 0x01F6, 0x0195}, // Ƕ ƕ ( 1) LATIN CAPITAL LETTER HWAIR
{0x01F7, 0x01F7, 0x01BF}, // Ƿ ƿ ( 1) LATIN CAPITAL LETTER WYNN
{0x01F8, 0x021E, 0x021F}, // Ǹ ǹ (20) LATIN CAPITAL LETTER N WITH GRAVE
{0x0220, 0x0220, 0x019E}, // Ƞ ƞ ( 1) LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
{0x0222, 0x0232, 0x0233}, // Ȣ ȣ ( 9) LATIN CAPITAL LETTER OU
{0x023A, 0x023A, 0x2C65}, // Ⱥ ⱥ ( 1) LATIN CAPITAL LETTER A WITH STROKE
{0x023B, 0x023B, 0x023C}, // Ȼ ȼ ( 1) LATIN CAPITAL LETTER C WITH STROKE
{0x023D, 0x023D, 0x019A}, // Ƚ ƚ ( 1) LATIN CAPITAL LETTER L WITH BAR
{0x023E, 0x023E, 0x2C66}, // Ⱦ ⱦ ( 1) LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
{0x0241, 0x0241, 0x0242}, // Ɂ ɂ ( 1) LATIN CAPITAL LETTER GLOTTAL STOP
{0x0243, 0x0243, 0x0180}, // Ƀ ƀ ( 1) LATIN CAPITAL LETTER B WITH STROKE
{0x0244, 0x0244, 0x0289}, // Ʉ ʉ ( 1) LATIN CAPITAL LETTER U BAR
{0x0245, 0x0245, 0x028C}, // Ʌ ʌ ( 1) LATIN CAPITAL LETTER TURNED V
{0x0246, 0x024E, 0x024F}, // Ɇ ɇ ( 5) LATIN CAPITAL LETTER E WITH STROKE
{0x0345, 0x0345, 0x03B9}, // ͅ ι ( 1) COMBINING GREEK YPOGEGRAMMENI
{0x0370, 0x0372, 0x0373}, // Ͱ ͱ ( 2) GREEK CAPITAL LETTER HETA
{0x0376, 0x0376, 0x0377}, // Ͷ ͷ ( 1) GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
{0x037F, 0x037F, 0x03F3}, // Ϳ ϳ ( 1) GREEK CAPITAL LETTER YOT
{0x0386, 0x0386, 0x03AC}, // Ά ά ( 1) GREEK CAPITAL LETTER ALPHA WITH TONOS
{0x0388, 0x038A, 0x03AF}, // Έ έ ( 3) GREEK CAPITAL LETTER EPSILON WITH TONOS
{0x038C, 0x038C, 0x03CC}, // Ό ό ( 1) GREEK CAPITAL LETTER OMICRON WITH TONOS
{0x038E, 0x038F, 0x03CE}, // Ύ ύ ( 2) GREEK CAPITAL LETTER UPSILON WITH TONOS
{0x0391, 0x03A1, 0x03C1}, // Α α (17) GREEK CAPITAL LETTER ALPHA
{0x03A3, 0x03AB, 0x03CB}, // Σ σ ( 9) GREEK CAPITAL LETTER SIGMA
{0x03C2, 0x03C2, 0x03C3}, // ς σ ( 1) GREEK SMALL LETTER FINAL SIGMA
{0x03CF, 0x03CF, 0x03D7}, // Ϗ ϗ ( 1) GREEK CAPITAL KAI SYMBOL
{0x03D0, 0x03D0, 0x03B2}, // ϐ β ( 1) GREEK BETA SYMBOL
{0x03D1, 0x03D1, 0x03B8}, // ϑ θ ( 1) GREEK THETA SYMBOL
{0x03D5, 0x03D5, 0x03C6}, // ϕ φ ( 1) GREEK PHI SYMBOL
{0x03D6, 0x03D6, 0x03C0}, // ϖ π ( 1) GREEK PI SYMBOL
{0x03D8, 0x03EE, 0x03EF}, // Ϙ ϙ (12) GREEK LETTER ARCHAIC KOPPA
{0x03F0, 0x03F0, 0x03BA}, // ϰ κ ( 1) GREEK KAPPA SYMBOL
{0x03F1, 0x03F1, 0x03C1}, // ϱ ρ ( 1) GREEK RHO SYMBOL
{0x03F4, 0x03F4, 0x03B8}, // ϴ θ ( 1) GREEK CAPITAL THETA SYMBOL
{0x03F5, 0x03F5, 0x03B5}, // ϵ ε ( 1) GREEK LUNATE EPSILON SYMBOL
{0x03F7, 0x03F7, 0x03F8}, // Ϸ ϸ ( 1) GREEK CAPITAL LETTER SHO
{0x03F9, 0x03F9, 0x03F2}, // Ϲ ϲ ( 1) GREEK CAPITAL LUNATE SIGMA SYMBOL
{0x03FA, 0x03FA, 0x03FB}, // Ϻ ϻ ( 1) GREEK CAPITAL LETTER SAN
{0x03FD, 0x03FF, 0x037D}, // Ͻ ͻ ( 3) GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
{0x0400, 0x040F, 0x045F}, // Ѐ ѐ (16) CYRILLIC CAPITAL LETTER IE WITH GRAVE
{0x0410, 0x042F, 0x044F}, // А а (32) CYRILLIC CAPITAL LETTER A
{0x0460, 0x0480, 0x0481}, // Ѡ ѡ (17) CYRILLIC CAPITAL LETTER OMEGA
{0x048A, 0x04BE, 0x04BF}, // Ҋ ҋ (27) CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
{0x04C0, 0x04C0, 0x04CF}, // Ӏ ӏ ( 1) CYRILLIC LETTER PALOCHKA
{0x04C1, 0x04CD, 0x04CE}, // Ӂ ӂ ( 7) CYRILLIC CAPITAL LETTER ZHE WITH BREVE
{0x04D0, 0x052E, 0x052F}, // Ӑ ӑ (48) CYRILLIC CAPITAL LETTER A WITH BREVE
{0x0531, 0x0556, 0x0586}, // Ա ա (38) ARMENIAN CAPITAL LETTER AYB
{0x10A0, 0x10C5, 0x2D25}, // Ⴀ ⴀ (38) GEORGIAN CAPITAL LETTER AN
{0x10C7, 0x10C7, 0x2D27}, // Ⴧ ⴧ ( 1) GEORGIAN CAPITAL LETTER YN
{0x10CD, 0x10CD, 0x2D2D}, // Ⴭ ⴭ ( 1) GEORGIAN CAPITAL LETTER AEN
{0x13F8, 0x13FD, 0x13F5}, // ᏸ Ᏸ ( 6) CHEROKEE SMALL LETTER YE
{0x1C80, 0x1C80, 0x0432}, // ᲀ в ( 1) CYRILLIC SMALL LETTER ROUNDED VE
{0x1C81, 0x1C81, 0x0434}, // ᲁ д ( 1) CYRILLIC SMALL LETTER LONG-LEGGED DE
{0x1C82, 0x1C82, 0x043E}, // ᲂ о ( 1) CYRILLIC SMALL LETTER NARROW O
{0x1C83, 0x1C84, 0x0442}, // ᲃ с ( 2) CYRILLIC SMALL LETTER WIDE ES
{0x1C85, 0x1C85, 0x0442}, // ᲅ т ( 1) CYRILLIC SMALL LETTER THREE-LEGGED TE
{0x1C86, 0x1C86, 0x044A}, // ᲆ ъ ( 1) CYRILLIC SMALL LETTER TALL HARD SIGN
{0x1C87, 0x1C87, 0x0463}, // ᲇ ѣ ( 1) CYRILLIC SMALL LETTER TALL YAT
{0x1C88, 0x1C88, 0xA64B}, // ᲈ ꙋ ( 1) CYRILLIC SMALL LETTER UNBLENDED UK
{0x1C90, 0x1CBA, 0x10FA}, // Ა ა (43) GEORGIAN MTAVRULI CAPITAL LETTER AN
{0x1CBD, 0x1CBF, 0x10FF}, // Ჽ ჽ ( 3) GEORGIAN MTAVRULI CAPITAL LETTER AEN
{0x1E00, 0x1E94, 0x1E95}, // Ḁ ḁ (75) LATIN CAPITAL LETTER A WITH RING BELOW
{0x1E9B, 0x1E9B, 0x1E61}, // ẛ ṡ ( 1) LATIN SMALL LETTER LONG S WITH DOT ABOVE
{0x1E9E, 0x1E9E, 0x00DF}, // ẞ ß ( 1) LATIN CAPITAL LETTER SHARP S
{0x1EA0, 0x1EFE, 0x1EFF}, // Ạ ạ (48) LATIN CAPITAL LETTER A WITH DOT BELOW
{0x1F08, 0x1F0F, 0x1F07}, // Ἀ ἀ ( 8) GREEK CAPITAL LETTER ALPHA WITH PSILI
{0x1F18, 0x1F1D, 0x1F15}, // Ἐ ἐ ( 6) GREEK CAPITAL LETTER EPSILON WITH PSILI
{0x1F28, 0x1F2F, 0x1F27}, // Ἠ ἠ ( 8) GREEK CAPITAL LETTER ETA WITH PSILI
{0x1F38, 0x1F3F, 0x1F37}, // Ἰ ἰ ( 8) GREEK CAPITAL LETTER IOTA WITH PSILI
{0x1F48, 0x1F4D, 0x1F45}, // Ὀ ὀ ( 6) GREEK CAPITAL LETTER OMICRON WITH PSILI
{0x1F59, 0x1F5F, 0x1F57}, // Ὑ ὑ ( 7) GREEK CAPITAL LETTER UPSILON WITH DASIA
{0x1F68, 0x1F6F, 0x1F67}, // Ὠ ὠ ( 8) GREEK CAPITAL LETTER OMEGA WITH PSILI
{0x1F88, 0x1F8F, 0x1F87}, // ᾈ ᾀ ( 8) GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
{0x1F98, 0x1F9F, 0x1F97}, // ᾘ ᾐ ( 8) GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
{0x1FA8, 0x1FAF, 0x1FA7}, // ᾨ ᾠ ( 8) GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
{0x1FB8, 0x1FB9, 0x1FB1}, // Ᾰ ᾰ ( 2) GREEK CAPITAL LETTER ALPHA WITH VRACHY
{0x1FBA, 0x1FBB, 0x1F71}, // Ὰ ὰ ( 2) GREEK CAPITAL LETTER ALPHA WITH VARIA
{0x1FBC, 0x1FBC, 0x1FB3}, // ᾼ ᾳ ( 1) GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
{0x1FBE, 0x1FBE, 0x03B9}, // ι ( 1) GREEK PROSGEGRAMMENI
{0x1FC8, 0x1FCB, 0x1F75}, // Ὲ ὲ ( 4) GREEK CAPITAL LETTER EPSILON WITH VARIA
{0x1FCC, 0x1FCC, 0x1FC3}, // ῌ ῃ ( 1) GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
{0x1FD8, 0x1FD9, 0x1FD1}, // Ῐ ῐ ( 2) GREEK CAPITAL LETTER IOTA WITH VRACHY
{0x1FDA, 0x1FDB, 0x1F77}, // Ὶ ὶ ( 2) GREEK CAPITAL LETTER IOTA WITH VARIA
{0x1FE8, 0x1FE9, 0x1FE1}, // Ῠ ῠ ( 2) GREEK CAPITAL LETTER UPSILON WITH VRACHY
{0x1FEA, 0x1FEB, 0x1F7B}, // Ὺ ὺ ( 2) GREEK CAPITAL LETTER UPSILON WITH VARIA
{0x1FEC, 0x1FEC, 0x1FE5}, // Ῥ ῥ ( 1) GREEK CAPITAL LETTER RHO WITH DASIA
{0x1FF8, 0x1FF9, 0x1F79}, // Ὸ ὸ ( 2) GREEK CAPITAL LETTER OMICRON WITH VARIA
{0x1FFA, 0x1FFB, 0x1F7D}, // Ὼ ὼ ( 2) GREEK CAPITAL LETTER OMEGA WITH VARIA
{0x1FFC, 0x1FFC, 0x1FF3}, // ῼ ῳ ( 1) GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
{0x2126, 0x2126, 0x03C9}, // Ω ω ( 1) OHM SIGN
{0x212A, 0x212A, 0x006B}, // k ( 1) KELVIN SIGN
{0x212B, 0x212B, 0x00E5}, // Å å ( 1) ANGSTROM SIGN
{0x2132, 0x2132, 0x214E}, // Ⅎ ⅎ ( 1) TURNED CAPITAL F
{0x2160, 0x216F, 0x217F}, // (16) ROMAN NUMERAL ONE
{0x2183, 0x2183, 0x2184}, // Ↄ ↄ ( 1) ROMAN NUMERAL REVERSED ONE HUNDRED
{0x24B6, 0x24CF, 0x24E9}, // Ⓐ ⓐ (26) CIRCLED LATIN CAPITAL LETTER A
{0x2C00, 0x2C2F, 0x2C5F}, // Ⰰ ⰰ (48) GLAGOLITIC CAPITAL LETTER AZU
{0x2C60, 0x2C60, 0x2C61}, // Ⱡ ⱡ ( 1) LATIN CAPITAL LETTER L WITH DOUBLE BAR
{0x2C62, 0x2C62, 0x026B}, // Ɫ ɫ ( 1) LATIN CAPITAL LETTER L WITH MIDDLE TILDE
{0x2C63, 0x2C63, 0x1D7D}, // Ᵽ ᵽ ( 1) LATIN CAPITAL LETTER P WITH STROKE
{0x2C64, 0x2C64, 0x027D}, // Ɽ ɽ ( 1) LATIN CAPITAL LETTER R WITH TAIL
{0x2C67, 0x2C6B, 0x2C6C}, // Ⱨ ⱨ ( 3) LATIN CAPITAL LETTER H WITH DESCENDER
{0x2C6D, 0x2C6D, 0x0251}, // Ɑ ɑ ( 1) LATIN CAPITAL LETTER ALPHA
{0x2C6E, 0x2C6E, 0x0271}, // Ɱ ɱ ( 1) LATIN CAPITAL LETTER M WITH HOOK
{0x2C6F, 0x2C6F, 0x0250}, // Ɐ ɐ ( 1) LATIN CAPITAL LETTER TURNED A
{0x2C70, 0x2C70, 0x0252}, // Ɒ ɒ ( 1) LATIN CAPITAL LETTER TURNED ALPHA
{0x2C72, 0x2C72, 0x2C73}, // Ⱳ ⱳ ( 1) LATIN CAPITAL LETTER W WITH HOOK
{0x2C75, 0x2C75, 0x2C76}, // Ⱶ ⱶ ( 1) LATIN CAPITAL LETTER HALF H
{0x2C7E, 0x2C7F, 0x0240}, // Ȿ ȿ ( 2) LATIN CAPITAL LETTER S WITH SWASH TAIL
{0x2C80, 0x2CE2, 0x2CE3}, // Ⲁ ⲁ (50) COPTIC CAPITAL LETTER ALFA
{0x2CEB, 0x2CED, 0x2CEE}, // Ⳬ ⳬ ( 2) COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
{0x2CF2, 0x2CF2, 0x2CF3}, // Ⳳ ⳳ ( 1) COPTIC CAPITAL LETTER BOHAIRIC KHEI
{0xA640, 0xA66C, 0xA66D}, // Ꙁ ꙁ (23) CYRILLIC CAPITAL LETTER ZEMLYA
{0xA680, 0xA69A, 0xA69B}, // Ꚁ ꚁ (14) CYRILLIC CAPITAL LETTER DWE
{0xA722, 0xA72E, 0xA72F}, // Ꜣ ꜣ ( 7) LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
{0xA732, 0xA76E, 0xA76F}, // Ꜳ ꜳ (31) LATIN CAPITAL LETTER AA
{0xA779, 0xA77B, 0xA77C}, // Ꝺ ꝺ ( 2) LATIN CAPITAL LETTER INSULAR D
{0xA77D, 0xA77D, 0x1D79}, // Ᵹ ᵹ ( 1) LATIN CAPITAL LETTER INSULAR G
{0xA77E, 0xA786, 0xA787}, // Ꝿ ꝿ ( 5) LATIN CAPITAL LETTER TURNED INSULAR G
{0xA78B, 0xA78B, 0xA78C}, // Ꞌ ( 1) LATIN CAPITAL LETTER SALTILLO
{0xA78D, 0xA78D, 0x0265}, // Ɥ ɥ ( 1) LATIN CAPITAL LETTER TURNED H
{0xA790, 0xA792, 0xA793}, // Ꞑ ꞑ ( 2) LATIN CAPITAL LETTER N WITH DESCENDER
{0xA796, 0xA7A8, 0xA7A9}, // Ꞗ ꞗ (10) LATIN CAPITAL LETTER B WITH FLOURISH
{0xA7AA, 0xA7AA, 0x0266}, // Ɦ ɦ ( 1) LATIN CAPITAL LETTER H WITH HOOK
{0xA7AB, 0xA7AB, 0x025C}, // ɜ ( 1) LATIN CAPITAL LETTER REVERSED OPEN E
{0xA7AC, 0xA7AC, 0x0261}, // Ɡ ɡ ( 1) LATIN CAPITAL LETTER SCRIPT G
{0xA7AD, 0xA7AD, 0x026C}, // Ɬ ɬ ( 1) LATIN CAPITAL LETTER L WITH BELT
{0xA7AE, 0xA7AE, 0x026A}, // Ɪ ɪ ( 1) LATIN CAPITAL LETTER SMALL CAPITAL I
{0xA7B0, 0xA7B0, 0x029E}, // Ʞ ʞ ( 1) LATIN CAPITAL LETTER TURNED K
{0xA7B1, 0xA7B1, 0x0287}, // Ʇ ʇ ( 1) LATIN CAPITAL LETTER TURNED T
{0xA7B2, 0xA7B2, 0x029D}, // ʝ ( 1) LATIN CAPITAL LETTER J WITH CROSSED-TAIL
{0xA7B3, 0xA7B3, 0xAB53}, // ꭓ ( 1) LATIN CAPITAL LETTER CHI
{0xA7B4, 0xA7C2, 0xA7C3}, // ꞵ ( 8) LATIN CAPITAL LETTER BETA
{0xA7C4, 0xA7C4, 0xA794}, // Ꞔ ꞔ ( 1) LATIN CAPITAL LETTER C WITH PALATAL HOOK
{0xA7C5, 0xA7C5, 0x0282}, // Ʂ ʂ ( 1) LATIN CAPITAL LETTER S WITH HOOK
{0xA7C6, 0xA7C6, 0x1D8E}, // Ᶎ ᶎ ( 1) LATIN CAPITAL LETTER Z WITH PALATAL HOOK
{0xA7C7, 0xA7C9, 0xA7CA}, // Ꟈ ꟈ ( 2) LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
{0xA7D0, 0xA7D0, 0xA7D1}, // Ꟑ ꟑ ( 1) LATIN CAPITAL LETTER CLOSED INSULAR G
{0xA7D6, 0xA7D8, 0xA7D9}, // Ꟗ ꟗ ( 2) LATIN CAPITAL LETTER MIDDLE SCOTS S
{0xA7F5, 0xA7F5, 0xA7F6}, // Ꟶ ꟶ ( 1) LATIN CAPITAL LETTER REVERSED HALF H
{0xAB70, 0xABBF, 0x13EF}, // ꭰ (80) CHEROKEE SMALL LETTER A
{0xFF21, 0xFF3A, 0xFF5A}, // (26) FULLWIDTH LATIN CAPITAL LETTER A
{0x0130, 0x0130, 0x0069}, // İ i ( 1) LATIN CAPITAL LETTER I WITH DOT ABOVE
{0x01CD, 0x01DB, 0x01DC}, // Ǎ ǎ ( 8) LATIN CAPITAL LETTER A WITH CARON
{0x01F4, 0x01F4, 0x01F5}, // Ǵ ǵ ( 1) LATIN CAPITAL LETTER G WITH ACUTE
{0x13A0, 0x13EF, 0xABBF}, // ꭰ (80) CHEROKEE LETTER A
{0x13F0, 0x13F5, 0x13FD}, // Ᏸ ᏸ ( 6) CHEROKEE LETTER YE
{0x039C, 0x039C, 0x00B5}, // Μ µ ( 1)
{0x0049, 0x0049, 0x0131}, // I ı ( 1)
{0x0053, 0x0053, 0x017F}, // S ſ ( 1)
{0x03A3, 0x03A3, 0x03C2}, // Σ ς ( 1)
{0x0392, 0x0392, 0x03D0}, // Β ϐ ( 1)
{0x0398, 0x0398, 0x03D1}, // Θ ϑ ( 1)
{0x03A6, 0x03A6, 0x03D5}, // Φ ϕ ( 1)
{0x03A0, 0x03A0, 0x03D6}, // Π ϖ ( 1)
{0x039A, 0x039A, 0x03F0}, // Κ ϰ ( 1)
{0x03A1, 0x03A1, 0x03F1}, // Ρ ϱ ( 1)
{0x0395, 0x0395, 0x03F5}, // Ε ϵ ( 1)
{0x0412, 0x0412, 0x1C80}, // В ᲀ ( 1)
{0x0414, 0x0414, 0x1C81}, // Д ᲁ ( 1)
{0x041E, 0x041E, 0x1C82}, // О ᲂ ( 1)
{0x0421, 0x0422, 0x1C84}, // С ᲃ ( 2)
{0x0422, 0x0422, 0x1C85}, // Т ᲅ ( 1)
{0x042A, 0x042A, 0x1C86}, // Ъ ᲆ ( 1)
{0x0462, 0x0462, 0x1C87}, // Ѣ ᲇ ( 1)
{0xA64A, 0xA64A, 0x1C88}, // Ꙋ ᲈ ( 1)
{0x1E60, 0x1E60, 0x1E9B}, // Ṡ ẛ ( 1)
{0x0399, 0x0399, 0x1FBE}, // Ι ( 1)
}; // 218
enum { casefold_len = 192 };
static uint8_t upcase_ind[162] = {
0, 2, 3, 4, 192, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 43, 45, 193, 47, 48, 194, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 75, 80, 83, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 195, 196, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119,
120, 121, 125, 126, 129, 131, 132, 133, 134, 135, 136, 137, 139, 140, 141, 142, 144, 146, 147, 148,
149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
189, 191,
};
static uint8_t lowcase_ind[184] = {
0, 197, 113, 2, 3, 8, 4, 198, 5, 6, 7, 9, 199, 60, 12, 14, 16, 20, 50, 25,
57, 53, 29, 31, 33, 35, 37, 39, 40, 51, 41, 43, 45, 193, 17, 47, 48, 194, 52, 54,
56, 158, 59, 63, 154, 152, 155, 11, 13, 15, 18, 19, 174, 21, 175, 22, 170, 173, 24, 23,
177, 148, 176, 26, 153, 27, 28, 150, 30, 184, 32, 179, 34, 61, 36, 62, 38, 180, 178, 65,
66, 88, 68, 69, 72, 200, 73, 70, 71, 201, 202, 203, 204, 75, 80, 205, 206, 86, 67, 207,
85, 87, 90, 89, 91, 92, 94, 93, 95, 96, 109, 110, 196, 208, 209, 210, 211, 212, 213, 214,
215, 167, 149, 185, 111, 216, 114, 115, 116, 117, 118, 119, 120, 121, 126, 129, 132, 136, 134, 137,
122, 123, 124, 125, 127, 217, 130, 131, 133, 135, 138, 142, 144, 146, 147, 55, 58, 151, 156, 157,
159, 160, 161, 97, 98, 99, 162, 163, 164, 165, 166, 168, 169, 171, 183, 172, 182, 186, 187, 188,
189, 181, 195, 191,
};