from pathlib import Path from lark import Lark from lark.lexer import Token from lark.tree import Branch from dataclasses import dataclass import os import re @dataclass class LineColumn: line: int column: int @dataclass class CharIndex: char: int type Location = LineColumn | CharIndex @dataclass class InsertionAction: content: str location: Location @dataclass class RegexReplaceAction: ... type PatchAction = InsertionAction | RegexReplaceAction @dataclass class Patch: target: Path actions: list[PatchAction] @dataclass class PatchFile: patches: list[Patch] modified: float def parse_string(token: Token): assert isinstance(token.value, str) if token.type == 'STRING': string = token.value[1:-1] string = re.sub(r"\\n","\n", string) string = re.sub(r"\\t","\t", string) string = re.sub(r"\\r","\r", string) string = re.sub(r"\\(.)",r"\1", string) return string elif token.type == 'RAW_STRING': string = token.value[1:-1] return string elif token.type == 'LONG_STRING': string = re.match(re.compile(r"<<\s*(?P[^\n]+)\n(.*)\n(?P=terminator)", re.MULTILINE + re.DOTALL),token.value) assert string is not None string = string.group(2) return str(string) else: raise ValueError() def parse_location(location: Token): assert isinstance(location.value, str) if location[:2] == "ln": line, column = re.match(r"ln([0-9]+)(?:c([0-9]+))?", location.value).groups() return LineColumn(int(line), int(column or '1')) if location[:2] == "ch": char = re.match(r"ch([0-9]+)", location.value).groups()[0] return CharIndex(int(char)) raise RuntimeError("Cannot parse location") def parse_patch(branch: Branch[Token], mtime: float): # First instruction is always file declaration target_file = parse_string(branch.children[0].children[0]) # pyright: ignore[reportUnknownMemberType, reportArgumentType] actions: list[PatchAction] = [] for inst in branch.children[1:]: match inst.data: case "insert": actions.append(InsertionAction(location=parse_location(inst.children[0]), content=parse_string(inst.children[1]))) # print(f"Inserting {parse_string(inst.children[1])} at {inst.children[0]} in {target_file}") return Patch(target=Path(target_file), actions=actions) def parse_patch_file(file: str): lark = Lark.open('grammar.lark', rel_to=__file__) mtime = os.path.getmtime(file) with open(file, 'r') as f: result = lark.parse(f.read()) patches = [parse_patch(patch, mtime) for patch in result.children] return patches