ky/lex.lua

216 lines
5.9 KiB
Lua
Raw Permalink Normal View History

2024-06-11 17:46:13 +03:00
--[[
Titty has two main design goals:
1. Should look natural from the outside, Lua code's PoV, and
2. Should compile to Lua code that also appears natural from within
The second is less important than the first.
]]
local inspect = require"inspect".inspect
local function codepoints(str)
local it, state, v1, v2 = utf8.codes(str)
local idx = 0
return function()
if not it then return nil end
v1, v2 = it(state, v1)
if v1 then
local r = idx
idx = idx + 1
return r, v2
else
it = nil
end
end
end
local function is_digit(cp)
return cp and (cp >= 48 and cp <= 57)
end
local function is_ident_start(cp)
return cp and ((cp >= 65 and cp <= 90) or (cp >= 97 and cp <= 122) or cp == 95)
end
local function is_ident_nonstart(cp)
return cp and (is_ident_start(cp) or is_digit(cp))
end
local function forall(tbl, f, startI)
startI = startI or 1
for k, v in pairs(tbl) do
if not f(v) then
return false
end
end
return true
end
local function is_whitespace(cp)
return cp and (cp == 32 or cp == 10 or cp == 9)
end
local function clear(tbl)
for k in pairs(tbl) do
tbl[k] = nil
end
end
local function lex(cpgetraw)
local row, column, idx = 0, 0, 0
local pull
local function cpget()
if pull then
local ret = pull
pull = nil
return ret
else
local cp
idx, cp = cpgetraw()
column = column + 1
if cp == 10 then
column = 0
row = row + 1
end
return cp
end
end
local buf = {}
return function()
local cp
while true do
cp = cpget()
if not is_whitespace(cp) then
break
end
end
if not cp then
return nil
end
local rowStart, columnStart = row, column
if cp == 40 then
return "(", "(", rowStart, columnStart
elseif cp == 41 then
return ")", ")", rowStart, columnStart
elseif cp == 123 then
return "{", "{", rowStart, columnStart
elseif cp == 125 then
return "}", "}", rowStart, columnStart
elseif cp == 44 then
return ",", ",", rowStart, columnStart
elseif cp == 46 then
return ".", ".", rowStart, columnStart
elseif cp == 58 then
return ":", ":", rowStart, columnStart
elseif cp == 61 then
local after = cpget()
if after == 61 then
return "==", "==", rowStart, columnStart
else
pull = after
end
return "=", "=", rowStart, columnStart
elseif cp == 43 then
return "+", "+", rowStart, columnStart
elseif cp == 45 then
return "-", "-", rowStart, columnStart
elseif cp == 42 then
local after = cpget()
if after == 42 then
return "**", "**", rowStart, columnStart
else
pull = after
end
return "*", "*", rowStart, columnStart
elseif cp == 47 then
return "/", "/", rowStart, columnStart
elseif cp == 37 then
return "%", "%", rowStart, columnStart
elseif cp == 35 then
return "#", "#", rowStart, columnStart
elseif cp == 91 then
return "[", "[", rowStart, columnStart
elseif cp == 93 then
return "]", "]", rowStart, columnStart
elseif cp == 39 then
while true do
cp = cpget()
if cp ~= 39 then
buf[#buf + 1] = cp
else
break
end
end
local ret = utf8.char(table.unpack(buf))
clear(buf)
return "string", ret, rowStart, columnStart
elseif is_ident_start(cp) then
buf[1] = cp
while true do
cp = cpget()
if is_ident_nonstart(cp) then
buf[#buf + 1] = cp
else
pull = cp
break
end
end
local ret = utf8.char(table.unpack(buf))
clear(buf)
if ret == "func" or ret == "end" or ret == "if"
or ret == "while" or ret == "do" or ret == "then"
or ret == "elseif" or ret == "type" or ret == "interf"
or ret == "let" or ret == "return" or ret == "else"
or ret == "for" or ret == "import" or ret == "constr"
or ret == "loop" or ret == "break" or ret == "nil" then
return ret, ret, rowStart, columnStart
end
return "id", ret, rowStart, columnStart
elseif is_digit(cp) then
buf[1] = cp
local dotFound = false
while true do
cp = cpget()
if is_digit(cp) then
buf[#buf + 1] = cp
elseif cp == 46 and not dotFound then
dotFound = true
buf[#buf + 1] = cp
else
pull = cp
break
end
end
local ret = utf8.char(table.unpack(buf))
clear(buf)
return "num", ret, rowStart, columnStart
else
error(string.format("%i:%i unknown character %q (code point %i)", row, column, utf8.char(cp), cp))
end
end
end
return {lex = lex, codepoints = codepoints}