--[[ Titty has two main design goals: 1. Should look natural from the outside, Lua code's PoV, and 2. Should compile to Lua code that also appears natural from within The second is less important than the first. ]] local inspect = require"inspect".inspect local function codepoints(str) local it, state, v1, v2 = utf8.codes(str) local idx = 0 return function() if not it then return nil end v1, v2 = it(state, v1) if v1 then local r = idx idx = idx + 1 return r, v2 else it = nil end end end local function is_digit(cp) return cp and (cp >= 48 and cp <= 57) end local function is_ident_start(cp) return cp and ((cp >= 65 and cp <= 90) or (cp >= 97 and cp <= 122) or cp == 95) end local function is_ident_nonstart(cp) return cp and (is_ident_start(cp) or is_digit(cp)) end local function forall(tbl, f, startI) startI = startI or 1 for k, v in pairs(tbl) do if not f(v) then return false end end return true end local function is_whitespace(cp) return cp and (cp == 32 or cp == 10 or cp == 9) end local function clear(tbl) for k in pairs(tbl) do tbl[k] = nil end end local function lex(cpgetraw) local row, column, idx = 0, 0, 0 local pull local function cpget() if pull then local ret = pull pull = nil return ret else local cp idx, cp = cpgetraw() column = column + 1 if cp == 10 then column = 0 row = row + 1 end return cp end end local buf = {} return function() local cp while true do cp = cpget() if not is_whitespace(cp) then break end end if not cp then return nil end local rowStart, columnStart = row, column if cp == 40 then return "(", "(", rowStart, columnStart elseif cp == 41 then return ")", ")", rowStart, columnStart elseif cp == 123 then return "{", "{", rowStart, columnStart elseif cp == 125 then return "}", "}", rowStart, columnStart elseif cp == 44 then return ",", ",", rowStart, columnStart elseif cp == 46 then return ".", ".", rowStart, columnStart elseif cp == 58 then return ":", ":", rowStart, columnStart elseif cp == 61 then local after = cpget() if after == 61 then return "==", "==", rowStart, columnStart else pull = after end return "=", "=", rowStart, columnStart elseif cp == 43 then return "+", "+", rowStart, columnStart elseif cp == 45 then return "-", "-", rowStart, columnStart elseif cp == 42 then local after = cpget() if after == 42 then return "**", "**", rowStart, columnStart else pull = after end return "*", "*", rowStart, columnStart elseif cp == 47 then return "/", "/", rowStart, columnStart elseif cp == 37 then return "%", "%", rowStart, columnStart elseif cp == 35 then return "#", "#", rowStart, columnStart elseif cp == 91 then return "[", "[", rowStart, columnStart elseif cp == 93 then return "]", "]", rowStart, columnStart elseif cp == 39 then while true do cp = cpget() if cp ~= 39 then buf[#buf + 1] = cp else break end end local ret = utf8.char(table.unpack(buf)) clear(buf) return "string", ret, rowStart, columnStart elseif is_ident_start(cp) then buf[1] = cp while true do cp = cpget() if is_ident_nonstart(cp) then buf[#buf + 1] = cp else pull = cp break end end local ret = utf8.char(table.unpack(buf)) clear(buf) if ret == "func" or ret == "end" or ret == "if" or ret == "while" or ret == "do" or ret == "then" or ret == "elseif" or ret == "type" or ret == "interf" or ret == "let" or ret == "return" or ret == "else" or ret == "for" or ret == "import" or ret == "constr" or ret == "loop" or ret == "break" or ret == "nil" then return ret, ret, rowStart, columnStart end return "id", ret, rowStart, columnStart elseif is_digit(cp) then buf[1] = cp local dotFound = false while true do cp = cpget() if is_digit(cp) then buf[#buf + 1] = cp elseif cp == 46 and not dotFound then dotFound = true buf[#buf + 1] = cp else pull = cp break end end local ret = utf8.char(table.unpack(buf)) clear(buf) return "num", ret, rowStart, columnStart else error(string.format("%i:%i unknown character %q (code point %i)", row, column, utf8.char(cp), cp)) end end end return {lex = lex, codepoints = codepoints}