playground/tree-sitter/build.py (191 lines of code) (raw):

#!/usr/bin/env python3 from __future__ import annotations import argparse import json import shutil import subprocess import tempfile import urllib.parse from pathlib import Path from typing import Dict, List _HERE = Path(__file__).parent _LANGUAGES = ["swift", "kotlin", "java", "go", "python"] def run_command(cmd: List[str], cwd: Path = None) -> subprocess.CompletedProcess[str]: """Run a shell command and return the result.""" try: result = subprocess.run( cmd, cwd=cwd, capture_output=True, text=True, check=True ) return result except subprocess.CalledProcessError as e: print(f"Command failed: {' '.join(cmd)}") print(f"Error: {e.stderr}") raise def build_concrete_syntax_wasm(repo_root: Path) -> Path: """Build WASM files for concrete-syntax crate.""" concrete_syntax_dir = repo_root / "crates" / "concrete-syntax" pkg_web_dir = concrete_syntax_dir / "pkg-web" print("Building concrete syntax WASM package...") # Check if WASM files need to be built (simple existence check) wasm_files = [ pkg_web_dir / "concrete_syntax.js", pkg_web_dir / "concrete_syntax_bg.wasm" ] if all(f.exists() for f in wasm_files): print("✓ Concrete syntax WASM files already exist, skipping build...") else: print("Building WASM package for web target...") # Build the WASM target first run_command([ "cargo", "build", "--target", "wasm32-unknown-unknown", "--no-default-features", "--features", "wasm" ], cwd=concrete_syntax_dir) # Generate WASM bindings run_command([ "wasm-pack", "build", "--target", "web", "--out-dir", "pkg-web", "--no-default-features", "--features", "wasm" ], cwd=concrete_syntax_dir) print("✅ WASM package built successfully") # Verify files exist for wasm_file in wasm_files: if not wasm_file.exists(): raise FileNotFoundError(f"Required WASM file not found: {wasm_file}") return pkg_web_dir def extract_tree_sitter_deps(repo_root: Path) -> list[tuple[str, str, dict[str, str]]]: """Extract tree-sitter dependencies from cargo metadata.""" cmd = ["cargo", "metadata", "--format-version", "1"] result = run_command(cmd, cwd=repo_root) metadata = json.loads(result.stdout) deps = [] for package in metadata["packages"]: if not package["name"].startswith("tree-sitter-"): continue lang_name = package["name"].replace("tree-sitter-", "") if lang_name in _LANGUAGES: deps.append((lang_name, package["name"], package)) return deps def parse_git_source(dep_info: Dict) -> tuple[str, str]: """Parse git source information from dependency.""" source: str = dep_info["source"] # An example git string: "git+https://github.com/danieltrt/tree-sitter-go.git?rev=ea5ceb716012db8813a2c05fab23c3a020988724#ea5ceb716012db8813a2c05fab23c3a020988724" # So we first remove the "git+" prefix and remove the "#" part if it exists. source = source.removeprefix("git+").split("#")[0].strip() if "?" not in source: raise ValueError(f"Expecting ? in git source string: {source}") git_url, query_string = source.split("?", 1) params = urllib.parse.parse_qs(query_string) rev = ( params.get("rev", [None])[0] or params.get("branch", [None])[0] or params.get("tag", [None])[0] ) if not rev: raise ValueError(f"Missing rev/branch/tag information in git source: {source}") return git_url, rev def clone_grammar(name: str, dep_info: Dict, temp_dir: Path) -> Path: """Clone a grammar repository to temporary directory.""" source: str = dep_info["source"] # If it is a git source, parse it. Otherwise, it is a registry source, and we can assume it is # from tree-sitter official repo. if source.startswith("git+"): git_url, version = parse_git_source(dep_info) elif source.startswith("registry+"): repo_name = name.replace("tree-sitter-", "") git_url = f"https://github.com/tree-sitter/tree-sitter-{repo_name}" version = "v" + dep_info["version"] else: raise ValueError(f"Unsupported source type for {name}: {source}") clone_dir = temp_dir / name print(f"Cloning {name} from {git_url} and checking out {version}") run_command(["git", "clone", git_url, str(clone_dir)]) run_command(["git", "checkout", version], cwd=clone_dir) return clone_dir def build_wasm(grammar_dir: Path, name: str) -> Path: """Build WASM file for a grammar.""" print(f"Building WASM for {name}") # Note that we have to use tree-sitter CLI 0.24 since the main tree-sitter and grammars # we use in Piranha are old and not compatible with the latest tree-sitter CLI. # TODO: remove this restriction once we upstream all our changes to tree-sitter grammars # and upgrade to latest tree-sitter in Piranha. try: proc = run_command(["tree-sitter", "--version"]) version = proc.stdout.strip().split()[1] if not version.startswith("0.24"): raise RuntimeError(f"tree-sitter CLI version {version} not supported") except (subprocess.CalledProcessError, FileNotFoundError, RuntimeError): raise RuntimeError( "tree-sitter CLI version 0.24.x is required. Install with: cargo install tree-sitter-cli --version 0.24.4" ) print(f"Using tree-sitter CLI version: {proc.stdout.strip()}") run_command(["tree-sitter", "build", "--wasm"], cwd=grammar_dir) wasm_file = grammar_dir / f"{name}.wasm" if not wasm_file.exists(): raise FileNotFoundError(f"WASM file not found for {name}") return wasm_file def copy_wasm_to_assets(wasm_file: Path, lang_name: str, assets_dir: Path) -> Path: """Copy WASM file to assets directory.""" assets_dir.mkdir(exist_ok=True) dest_file = assets_dir / f"tree-sitter-{lang_name}.wasm" print(f"Copying {wasm_file} to {dest_file}") shutil.copy2(wasm_file, dest_file) return dest_file def instantiate_index_html(template_path: Path, output_path: Path): with template_path.open("r") as inp, output_path.open("w") as out: content = inp.read() languages = [] for lang in _LANGUAGES: # Set python as the default language if lang == "python": languages.append(f'<option value="{lang}" selected>{lang.title()}</option>') else: languages.append(f'<option value="{lang}">{lang.title()}</option>') content = content.replace("{{ LANGUAGE_OPTIONS }}", "\n".join(languages)) out.write(content) def main(): """Build WASM files for all supported tree-sitter dependencies.""" """Main entry point with argument parsing.""" parser = argparse.ArgumentParser( description="Build tree-sitter playground with WASM files" ) parser.add_argument( "--dist-dir", "-d", type=Path, help="Directory to copy playground files and build WASM files to", default=Path().cwd() / "dist", ) args = parser.parse_args() dist_dir = Path(args.dist_dir) if dist_dir.exists(): print(f"Dist directory {dist_dir} already exists, clearing it...") shutil.rmtree(dist_dir) proc = run_command(["git", "rev-parse", "--show-toplevel"]) repo_root = Path(proc.stdout.strip()) print(f"Using repo root: {repo_root}") print() print("Instantiating index.html.template to dist directory...") dist_dir.mkdir(parents=True, exist_ok=True) instantiate_index_html(_HERE / "index.html.template", dist_dir / "index.html") # Copy concrete syntax integration files print("Copying concrete syntax integration files...") concrete_syntax_files = ["concrete-syntax.css", "concrete-syntax.js"] for file_name in concrete_syntax_files: src_file = _HERE / file_name if src_file.exists(): dest_file = dist_dir / file_name print(f"Copying {src_file} to {dest_file}") shutil.copy2(src_file, dest_file) else: raise FileNotFoundError(f"Required concrete syntax file not found: {src_file}") # Build and copy concrete syntax WASM files print("\nBuilding concrete syntax WASM files...") concrete_syntax_pkg_dir = build_concrete_syntax_wasm(repo_root) assets_dir = dist_dir / "assets" assets_dir.mkdir(exist_ok=True) for wasm_file in ["concrete_syntax.js", "concrete_syntax_bg.wasm"]: src_file = concrete_syntax_pkg_dir / wasm_file dest_file = assets_dir / wasm_file print(f"Copying {src_file} to {dest_file}") shutil.copy2(src_file, dest_file) print("\nBuilding WASM files for all supported tree-sitter dependencies...") print("Extracting tree-sitter dependencies to build WASM grammars...") deps = extract_tree_sitter_deps(repo_root) if not deps: raise RuntimeError("No supported tree-sitter dependencies found") print(f"Found {len(deps)} supported tree-sitter dependencies:") for lang_name, pkg_name, _ in deps: print(f" - {pkg_name} ({lang_name})") with tempfile.TemporaryDirectory() as temp_dir_str: temp_dir = Path(temp_dir_str) for lang_name, pkg_name, dep_info in deps: print(f"\n--- Processing {pkg_name} ---") grammar_dir = clone_grammar(pkg_name, dep_info, temp_dir) wasm_file = build_wasm(grammar_dir, pkg_name) copy_wasm_to_assets(wasm_file, lang_name, assets_dir) print(f"✓ Successfully built {pkg_name}") print("\n=== Build Complete ===") print(f"Successfully built {len(deps)} grammars: {_LANGUAGES}") print(f"Concrete syntax WASM integration: ✓") print(f"Output directory: {dist_dir}") if __name__ == "__main__": main()