ykit/buildtool/polly/parser.py
2024-08-20 00:58:17 +02:00

100 lines
2.7 KiB
Python

from pathlib import Path
from lark import Lark
from lark.lexer import Token
from lark.tree import Branch
from dataclasses import dataclass
import os
import re
@dataclass
class LineColumn:
line: int
column: int
@dataclass
class CharIndex:
char: int
type Location = LineColumn | CharIndex
@dataclass
class InsertionAction:
content: str
location: Location
@dataclass
class RegexReplaceAction:
...
type PatchAction = InsertionAction | RegexReplaceAction
@dataclass
class Patch:
target: Path
actions: list[PatchAction]
@dataclass
class PatchFile:
patches: list[Patch]
modified: float
def parse_string(token: Token):
assert isinstance(token.value, str)
if token.type == 'STRING':
string = token.value[1:-1]
string = re.sub(r"\\n","\n", string)
string = re.sub(r"\\t","\t", string)
string = re.sub(r"\\r","\r", string)
string = re.sub(r"\\(.)",r"\1", string)
return string
elif token.type == 'RAW_STRING':
string = token.value[1:-1]
return string
elif token.type == 'LONG_STRING':
string = re.match(re.compile(r"<<\s*(?P<terminator>[^\n]+)\n(.*)\n(?P=terminator)", re.MULTILINE + re.DOTALL),token.value)
assert string is not None
string = string.group(2)
return str(string)
else:
raise ValueError()
def parse_location(location: Token):
assert isinstance(location.value, str)
if location[:2] == "ln":
line, column = re.match(r"ln([0-9]+)(?:c([0-9]+))?", location.value).groups()
return LineColumn(int(line), int(column or '1'))
if location[:2] == "ch":
char = re.match(r"ch([0-9]+)", location.value).groups()[0]
return CharIndex(int(char))
raise RuntimeError("Cannot parse location")
def parse_patch(branch: Branch[Token], mtime: float):
# First instruction is always file declaration
target_file = parse_string(branch.children[0].children[0]) # pyright: ignore[reportUnknownMemberType, reportArgumentType]
actions: list[PatchAction] = []
for inst in branch.children[1:]:
match inst.data:
case "insert":
actions.append(InsertionAction(location=parse_location(inst.children[0]), content=parse_string(inst.children[1])))
# print(f"Inserting {parse_string(inst.children[1])} at {inst.children[0]} in {target_file}")
return Patch(target=Path(target_file), actions=actions)
def parse_patch_file(file: str):
lark = Lark.open('grammar.lark', rel_to=__file__)
mtime = os.path.getmtime(file)
with open(file, 'r') as f:
result = lark.parse(f.read())
patches = [parse_patch(patch, mtime) for patch in result.children]
return patches