from lark import Lark from lark.lexer import Token from lark.tree import Branch import os import re def parse_string(token: Token): if token.type == 'STRING': string = token.value[1:-1] string = re.sub(r"\\n","\n", string) string = re.sub(r"\\t","\t", string) string = re.sub(r"\\r","\r", string) string = re.sub(r"\\(.)",r"\1", string) return string elif token.type == 'RAW_STRING': string = token.value[1:-1] return string elif token.type == 'LONG_STRING': string = re.match(re.compile(r"<<\s*(?P[^\n]+)\n(.*)\n(?P=terminator)", re.MULTILINE + re.DOTALL),token.value) assert string is not None string = string.group(2) return string def parse_location(location: Token): assert isinstance(location.value, str) m = re.match(r"ln([0-9]+)(?:c([0-9]+))?", location.value) if m: ln, col = m.groups() return {"type": "lncol", "line": int(ln), "column": int(col) if col else 0} m = re.match(r"ch([0-9]+)", location.value) if m: ch = m.groups()[0] return {"type": "char", "index": int(ch)} raise RuntimeError("Cannot parse location") def parse_patch(branch: Branch[Token], mtime: float): # First instruction is always file declaration target_file = parse_string(branch.children[0].children[0]) # pyright: ignore[reportUnknownMemberType, reportArgumentType] actions = [] for inst in branch.children[1:]: match inst.data: case "insert": actions.append({"type": "insert", "at": parse_location(inst.children[0]), "content": parse_string(inst.children[1])}) # print(f"Inserting {parse_string(inst.children[1])} at {inst.children[0]} in {target_file}") return {"target": target_file, "actions": actions, "timestamp": mtime} def parse_patch_file(file: str): lark = Lark.open('grammar.lark', rel_to=__file__) mtime = os.path.getmtime(file) with open(file, 'r') as f: result = lark.parse(f.read()) patches = [parse_patch(patch, mtime) for patch in result.children] return patches