100 lines
2.7 KiB
Python
100 lines
2.7 KiB
Python
from pathlib import Path
|
|
from lark import Lark
|
|
from lark.lexer import Token
|
|
from lark.tree import Branch
|
|
|
|
from dataclasses import dataclass
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
@dataclass
|
|
class LineColumn:
|
|
line: int
|
|
column: int
|
|
|
|
@dataclass
|
|
class CharIndex:
|
|
char: int
|
|
|
|
type Location = LineColumn | CharIndex
|
|
|
|
@dataclass
|
|
class InsertionAction:
|
|
content: str
|
|
location: Location
|
|
|
|
@dataclass
|
|
class RegexReplaceAction:
|
|
...
|
|
|
|
type PatchAction = InsertionAction | RegexReplaceAction
|
|
|
|
@dataclass
|
|
class Patch:
|
|
target: Path
|
|
actions: list[PatchAction]
|
|
|
|
@dataclass
|
|
class PatchFile:
|
|
patches: list[Patch]
|
|
modified: float
|
|
|
|
def parse_string(token: Token):
|
|
assert isinstance(token.value, str)
|
|
if token.type == 'STRING':
|
|
string = token.value[1:-1]
|
|
string = re.sub(r"\\n","\n", string)
|
|
string = re.sub(r"\\t","\t", string)
|
|
string = re.sub(r"\\r","\r", string)
|
|
string = re.sub(r"\\(.)",r"\1", string)
|
|
return string
|
|
elif token.type == 'RAW_STRING':
|
|
string = token.value[1:-1]
|
|
return string
|
|
elif token.type == 'LONG_STRING':
|
|
string = re.match(re.compile(r"<<\s*(?P<terminator>[^\n]+)\n(.*)\n(?P=terminator)", re.MULTILINE + re.DOTALL),token.value)
|
|
assert string is not None
|
|
string = string.group(2)
|
|
return str(string)
|
|
else:
|
|
raise ValueError()
|
|
|
|
def parse_location(location: Token):
|
|
assert isinstance(location.value, str)
|
|
if location[:2] == "ln":
|
|
line, column = re.match(r"ln([0-9]+)(?:c([0-9]+))?", location.value).groups()
|
|
return LineColumn(int(line), int(column or '1'))
|
|
|
|
if location[:2] == "ch":
|
|
char = re.match(r"ch([0-9]+)", location.value).groups()[0]
|
|
return CharIndex(int(char))
|
|
|
|
raise RuntimeError("Cannot parse location")
|
|
|
|
def parse_patch(branch: Branch[Token], mtime: float):
|
|
# First instruction is always file declaration
|
|
target_file = parse_string(branch.children[0].children[0]) # pyright: ignore[reportUnknownMemberType, reportArgumentType]
|
|
|
|
actions: list[PatchAction] = []
|
|
|
|
for inst in branch.children[1:]:
|
|
match inst.data:
|
|
case "insert":
|
|
actions.append(InsertionAction(location=parse_location(inst.children[0]), content=parse_string(inst.children[1])))
|
|
# print(f"Inserting {parse_string(inst.children[1])} at {inst.children[0]} in {target_file}")
|
|
|
|
return Patch(target=Path(target_file), actions=actions)
|
|
|
|
def parse_patch_file(file: str):
|
|
lark = Lark.open('grammar.lark', rel_to=__file__)
|
|
|
|
mtime = os.path.getmtime(file)
|
|
|
|
with open(file, 'r') as f:
|
|
result = lark.parse(f.read())
|
|
patches = [parse_patch(patch, mtime) for patch in result.children]
|
|
|
|
return patches
|