216 lines
5.9 KiB
Lua
216 lines
5.9 KiB
Lua
--[[
|
|
Titty has two main design goals:
|
|
1. Should look natural from the outside, Lua code's PoV, and
|
|
2. Should compile to Lua code that also appears natural from within
|
|
The second is less important than the first.
|
|
]]
|
|
|
|
local inspect = require"inspect".inspect
|
|
|
|
local function codepoints(str)
|
|
local it, state, v1, v2 = utf8.codes(str)
|
|
local idx = 0
|
|
return function()
|
|
if not it then return nil end
|
|
|
|
v1, v2 = it(state, v1)
|
|
if v1 then
|
|
local r = idx
|
|
idx = idx + 1
|
|
return r, v2
|
|
else
|
|
it = nil
|
|
end
|
|
end
|
|
end
|
|
|
|
local function is_digit(cp)
|
|
return cp and (cp >= 48 and cp <= 57)
|
|
end
|
|
|
|
local function is_ident_start(cp)
|
|
return cp and ((cp >= 65 and cp <= 90) or (cp >= 97 and cp <= 122) or cp == 95)
|
|
end
|
|
|
|
local function is_ident_nonstart(cp)
|
|
return cp and (is_ident_start(cp) or is_digit(cp))
|
|
end
|
|
|
|
local function forall(tbl, f, startI)
|
|
startI = startI or 1
|
|
|
|
for k, v in pairs(tbl) do
|
|
if not f(v) then
|
|
return false
|
|
end
|
|
end
|
|
return true
|
|
end
|
|
|
|
local function is_whitespace(cp)
|
|
return cp and (cp == 32 or cp == 10 or cp == 9)
|
|
end
|
|
|
|
local function clear(tbl)
|
|
for k in pairs(tbl) do
|
|
tbl[k] = nil
|
|
end
|
|
end
|
|
|
|
local function lex(cpgetraw)
|
|
local row, column, idx = 0, 0, 0
|
|
local pull
|
|
local function cpget()
|
|
if pull then
|
|
local ret = pull
|
|
pull = nil
|
|
return ret
|
|
else
|
|
local cp
|
|
idx, cp = cpgetraw()
|
|
column = column + 1
|
|
if cp == 10 then
|
|
column = 0
|
|
row = row + 1
|
|
end
|
|
return cp
|
|
end
|
|
end
|
|
|
|
local buf = {}
|
|
|
|
return function()
|
|
local cp
|
|
|
|
while true do
|
|
cp = cpget()
|
|
|
|
if not is_whitespace(cp) then
|
|
break
|
|
end
|
|
end
|
|
|
|
if not cp then
|
|
return nil
|
|
end
|
|
|
|
local rowStart, columnStart = row, column
|
|
|
|
if cp == 40 then
|
|
return "(", "(", rowStart, columnStart
|
|
elseif cp == 41 then
|
|
return ")", ")", rowStart, columnStart
|
|
elseif cp == 123 then
|
|
return "{", "{", rowStart, columnStart
|
|
elseif cp == 125 then
|
|
return "}", "}", rowStart, columnStart
|
|
elseif cp == 44 then
|
|
return ",", ",", rowStart, columnStart
|
|
elseif cp == 46 then
|
|
return ".", ".", rowStart, columnStart
|
|
elseif cp == 58 then
|
|
return ":", ":", rowStart, columnStart
|
|
elseif cp == 61 then
|
|
local after = cpget()
|
|
if after == 61 then
|
|
return "==", "==", rowStart, columnStart
|
|
else
|
|
pull = after
|
|
end
|
|
return "=", "=", rowStart, columnStart
|
|
elseif cp == 43 then
|
|
return "+", "+", rowStart, columnStart
|
|
elseif cp == 45 then
|
|
return "-", "-", rowStart, columnStart
|
|
elseif cp == 42 then
|
|
local after = cpget()
|
|
if after == 42 then
|
|
return "**", "**", rowStart, columnStart
|
|
else
|
|
pull = after
|
|
end
|
|
return "*", "*", rowStart, columnStart
|
|
elseif cp == 47 then
|
|
return "/", "/", rowStart, columnStart
|
|
elseif cp == 37 then
|
|
return "%", "%", rowStart, columnStart
|
|
elseif cp == 35 then
|
|
return "#", "#", rowStart, columnStart
|
|
elseif cp == 91 then
|
|
return "[", "[", rowStart, columnStart
|
|
elseif cp == 93 then
|
|
return "]", "]", rowStart, columnStart
|
|
elseif cp == 39 then
|
|
while true do
|
|
cp = cpget()
|
|
|
|
if cp ~= 39 then
|
|
buf[#buf + 1] = cp
|
|
else
|
|
break
|
|
end
|
|
end
|
|
|
|
local ret = utf8.char(table.unpack(buf))
|
|
|
|
clear(buf)
|
|
|
|
return "string", ret, rowStart, columnStart
|
|
elseif is_ident_start(cp) then
|
|
buf[1] = cp
|
|
|
|
while true do
|
|
cp = cpget()
|
|
|
|
if is_ident_nonstart(cp) then
|
|
buf[#buf + 1] = cp
|
|
else
|
|
pull = cp
|
|
break
|
|
end
|
|
end
|
|
|
|
local ret = utf8.char(table.unpack(buf))
|
|
|
|
clear(buf)
|
|
|
|
if ret == "func" or ret == "end" or ret == "if"
|
|
or ret == "while" or ret == "do" or ret == "then"
|
|
or ret == "elseif" or ret == "type" or ret == "interf"
|
|
or ret == "let" or ret == "return" or ret == "else"
|
|
or ret == "for" or ret == "import" or ret == "constr"
|
|
or ret == "loop" or ret == "break" or ret == "nil" then
|
|
|
|
return ret, ret, rowStart, columnStart
|
|
end
|
|
|
|
return "id", ret, rowStart, columnStart
|
|
elseif is_digit(cp) then
|
|
buf[1] = cp
|
|
|
|
local dotFound = false
|
|
|
|
while true do
|
|
cp = cpget()
|
|
if is_digit(cp) then
|
|
buf[#buf + 1] = cp
|
|
elseif cp == 46 and not dotFound then
|
|
dotFound = true
|
|
buf[#buf + 1] = cp
|
|
else
|
|
pull = cp
|
|
break
|
|
end
|
|
end
|
|
|
|
local ret = utf8.char(table.unpack(buf))
|
|
clear(buf)
|
|
return "num", ret, rowStart, columnStart
|
|
else
|
|
error(string.format("%i:%i unknown character %q (code point %i)", row, column, utf8.char(cp), cp))
|
|
end
|
|
end
|
|
end
|
|
|
|
return {lex = lex, codepoints = codepoints}
|