scripts/convert_addSourceLines_textblock.py (257 lines of code) (raw):

#!/usr/bin/env python3 """ Convert CompilationTestHelper.addSourceLines(...) calls in a single Java file to use text blocks. Assumptions: - Each argument is on its own line. - The first argument is the filename string literal. - Subsequent arguments are string literals representing source lines, with optional // or /* */ comments on their own line or trailing after a string literal. """ from __future__ import annotations import argparse import sys from typing import Iterable, Iterator, List, Optional, Tuple def find_add_source_lines_calls(src: str) -> List[Tuple[int, int]]: """Return (open_paren_idx, close_paren_idx) pairs for addSourceLines calls.""" calls = [] i = 0 n = len(src) state = "normal" while i < n: c = src[i] if state == "normal": if c == '"': state = "string" elif c == "'": state = "char" elif c == "/" and i + 1 < n and src[i + 1] == "/": state = "line_comment" i += 1 elif c == "/" and i + 1 < n and src[i + 1] == "*": state = "block_comment" i += 1 elif src.startswith("addSourceLines(", i): open_idx = i + len("addSourceLines") close_idx = find_matching_paren(src, open_idx) if close_idx is not None: calls.append((open_idx, close_idx)) i = close_idx elif state == "string": if c == "\\": i += 1 elif c == '"': state = "normal" elif state == "char": if c == "\\": i += 1 elif c == "'": state = "normal" elif state == "line_comment": if c == "\n": state = "normal" elif state == "block_comment": if c == "*" and i + 1 < n and src[i + 1] == "/": state = "normal" i += 1 i += 1 return calls def find_matching_paren(src: str, open_idx: int) -> Optional[int]: """Find matching ')' for the '(' at open_idx.""" if open_idx >= len(src) or src[open_idx] != "(": return None depth = 1 i = open_idx + 1 state = "normal" while i < len(src): c = src[i] if state == "normal": if c == '"': state = "string" elif c == "'": state = "char" elif c == "/" and i + 1 < len(src) and src[i + 1] == "/": state = "line_comment" i += 1 elif c == "/" and i + 1 < len(src) and src[i + 1] == "*": state = "block_comment" i += 1 elif c == "(": depth += 1 elif c == ")": depth -= 1 if depth == 0: return i elif state == "string": if c == "\\": i += 1 elif c == '"': state = "normal" elif state == "char": if c == "\\": i += 1 elif c == "'": state = "normal" elif state == "line_comment": if c == "\n": state = "normal" elif state == "block_comment": if c == "*" and i + 1 < len(src) and src[i + 1] == "/": state = "normal" i += 1 i += 1 return None def split_string_literal(body: str) -> Optional[Tuple[str, str]]: """Split a Java string literal from the start of body, returning (content, rest).""" if not body.startswith('"'): return None i = 1 content = [] while i < len(body): c = body[i] if c == "\\": if i + 1 < len(body): content.append(body[i : i + 2]) i += 2 continue if c == '"': return "".join(content), body[i + 1 :] content.append(c) i += 1 return None def unescape_for_text_block(raw: str) -> str: """Make string literal content suitable for a text block.""" return raw.replace('\\"', '"') def transform_args_text(args_text: str) -> Optional[str]: lines = args_text.splitlines(keepends=True) if not lines: return None items: List[Tuple[str, str, str, Optional[str]]] = [] arg_line_indices: List[int] = [] filename_line_idx = None filename_line = None arg_indent = None for idx, line in enumerate(lines): if not line.strip(): continue indent_len = len(line) - len(line.lstrip()) indent = line[:indent_len] body = line[indent_len:] if body.startswith('"""'): return None if body.startswith('"'): parsed = split_string_literal(body) if parsed is None: return None content, rest = parsed rest_no_nl = rest.rstrip("\r\n") comment_idx = rest_no_nl.find("//") block_idx = rest_no_nl.find("/*") idx_comment = min( [i for i in (comment_idx, block_idx) if i != -1], default=-1, ) comment = None spacing = "" rest_prefix = rest_no_nl if idx_comment != -1: comment = rest_no_nl[idx_comment:] spacing = rest_no_nl[:idx_comment].replace(",", "") rest_prefix = rest_no_nl[:idx_comment] if rest_prefix.strip().strip(","): return None if filename_line_idx is None: filename_line_idx = idx filename_line = line arg_indent = indent else: items.append(("string", content, spacing, comment)) arg_line_indices.append(idx) elif body.lstrip().startswith("//") or body.lstrip().startswith("/*"): if filename_line_idx is None: return None if arg_indent is None: arg_indent = indent if line.startswith(arg_indent): comment_body = line[len(arg_indent) :].rstrip("\r\n") else: comment_body = line.lstrip().rstrip("\r\n") items.append(("comment", comment_body, "", None)) arg_line_indices.append(idx) else: return None if filename_line_idx is None or filename_line is None: return None if not items: return None if arg_indent is None: arg_indent = "" last_arg_line_idx = max(arg_line_indices) if arg_line_indices else filename_line_idx line_ending = "\n" for line in lines: if line.endswith("\r\n"): line_ending = "\r\n" break content_lines: List[str] = [] for kind, content, spacing, comment in items: if kind == "string": line = unescape_for_text_block(content) if comment: line += f"{spacing}{comment}" content_lines.append(line) else: content_lines.append(content) prefix_lines = lines[:filename_line_idx] suffix_lines = lines[last_arg_line_idx + 1 :] new_lines: List[str] = [] new_lines.extend(prefix_lines) if filename_line.endswith(("\n", "\r\n")): new_lines.append(filename_line) else: new_lines.append(filename_line + line_ending) new_lines.append(f'{arg_indent}"""{line_ending}') for content_line in content_lines: new_lines.append(f"{arg_indent}{content_line}{line_ending}") new_lines.append(f'{arg_indent}"""{line_ending}') new_lines.extend(suffix_lines) return "".join(new_lines) def transform_source(src: str) -> Tuple[str, int]: calls = find_add_source_lines_calls(src) if not calls: return src, 0 out = [] last = 0 changes = 0 for open_idx, close_idx in calls: args_text = src[open_idx + 1 : close_idx] new_args = transform_args_text(args_text) if new_args is None: continue out.append(src[last : open_idx + 1]) out.append(new_args) last = close_idx changes += 1 if changes == 0: return src, 0 out.append(src[last:]) return "".join(out), changes def main(argv: Optional[Iterable[str]] = None) -> int: parser = argparse.ArgumentParser( description="Convert addSourceLines calls to text blocks in a single Java file." ) parser.add_argument("path", help="Path to a .java file") parser.add_argument( "--stdout", action="store_true", help="Write output to stdout instead of modifying the file", ) args = parser.parse_args(argv) path = args.path try: src = open(path, "r", encoding="utf-8").read() except OSError as exc: print(f"Failed to read {path}: {exc}", file=sys.stderr) return 1 updated, changes = transform_source(src) if changes == 0: print("No changes needed.", file=sys.stderr) return 0 if args.stdout: sys.stdout.write(updated) return 0 try: with open(path, "w", encoding="utf-8") as handle: handle.write(updated) except OSError as exc: print(f"Failed to write {path}: {exc}", file=sys.stderr) return 1 print(f"Updated {changes} addSourceLines call(s) in {path}", file=sys.stderr) return 0 if __name__ == "__main__": raise SystemExit(main())