olddocs/gen_edot_col_components/tools.py (162 lines of code) (raw):

from jinja2 import Environment, FileSystemLoader import urllib.request from collections import defaultdict import yaml import re from pathlib import Path TABLE_TAG = 'edot-collector-components-table' DEPS_TAG = 'edot-collector-components-ocb' FEATURES_TAG = 'edot-features' EDOT_COLLECTOR_DIR = '../_edot-collector' EDOT_SDKS_DIR = '../_edot-sdks' TEMPLATE_COLLECTOR_COMPONENTS_TABLE = 'templates/components-table.jinja2' TEMPLATE_COLLECTOR_OCB_FILE = 'templates/ocb.jinja2' TEMPLATE_SDK_FEATURES = 'templates/features.jinja2' SDK_FEATURES_YAML = '../_edot-sdks/features.yml' def fetch_url_content(url): try: with urllib.request.urlopen(url) as response: # Read and decode the response content = response.read().decode('utf-8') return content except urllib.error.URLError as e: print(f"Failed to retrieve content: {e.reason}") return None def dep_to_component(dep): url = dep[:dep.rfind(' v')].strip() html_url = url repo_link = '[OTel Contrib Repo](https://github.com/open-telemetry/opentelemetry-collector-contrib)' if url.startswith('github.com/'): pattern = r'github.com\/(?P<org>[^\/]*)\/(?P<repo>[^\/]*)\/(?P<comp_type>[^\/]*)\/(?P<comp_name>.*)' match = re.search(pattern, url) if match: print html_url = f'https://github.com/{match.group("org")}/{match.group("repo")}/tree/main/{match.group("comp_type")}/{match.group("comp_name")}' if match.group("repo") == 'opentelemetry-collector-components': repo_link = '[Elastic Repo](https://github.com/elastic/opentelemetry-collector-components)' elif url.startswith('go.opentelemetry.io/collector'): pattern = r'go.opentelemetry.io\/collector\/(?P<comp_type>[^\/]*)\/(?P<comp_name>.*)' match = re.search(pattern, url) if match: html_url = f'https://github.com/open-telemetry/opentelemetry-collector/tree/main/{match.group("comp_type")}/{match.group("comp_name")}' repo_link = '[OTel Core Repo](https://github.com/open-telemetry/opentelemetry-collector)' comp = { 'name': dep[(dep.rfind('/')+1):(dep.rfind(' ')+1)], 'version': dep[(dep.rfind(' ')+1):], 'html_url': html_url, 'repo_link': repo_link, 'dep': dep.strip() } return comp def get_otel_col_upstream_version(url): elastic_agent_go_mod = fetch_url_content(url) lines = elastic_agent_go_mod.splitlines() for line in lines: if 'go.opentelemetry.io/collector/otelcol ' in line: return line[(line.rfind('v')+1):] return '<OTEL_COL_VERSION>' def get_collector_version(filePath): with open(filePath, 'r', encoding='utf-8') as file: content = file.read() lines = content.splitlines() versions_section = False for line in lines: if line.startswith('edot_versions'): versions_section = True if versions_section and 'collector' in line: return line[(line.rfind(':') + 1):].strip() return 'main' def get_otel_components(url): elastic_agent_go_mod = fetch_url_content(url) lines = elastic_agent_go_mod.splitlines() components_type = ['receiver', 'connector', 'processor', 'exporter', 'extension', 'provider'] otel_deps = [line for line in lines if (not line.endswith('// indirect') and ("=>" not in line) and (any(f'/{comp}/' in line for comp in components_type)))] otel_components = list(map(dep_to_component, otel_deps)) components_grouped = defaultdict(list) for comp in otel_components: for substring in components_type: if f'/{substring}/' in comp['dep']: components_grouped[f'{substring.capitalize()}s'].append(comp) break # Assumes each string matches only one group components_grouped = dict(components_grouped) for key, group in components_grouped.items(): components_grouped[key] = sorted(group, key=lambda comp: comp['name']) return components_grouped def find_files_with_substring(directory, substring): matching_files = [] # Compile the substring into a regular expression for case-insensitive search pattern = re.compile(re.escape(substring), re.IGNORECASE) # Use pathlib to iterate over all files in the directory and subdirectories for file_path in Path(directory).rglob('*'): if file_path.is_file(): try: with open(file_path, 'r', encoding='utf-8') as file: content = file.read() if pattern.search(content): matching_files.append(str(file_path)) except (UnicodeDecodeError, PermissionError) as e: # Skip files that can't be read due to encoding issues or permission errors print(f"Skipping {file_path}: {e}") return matching_files def render_markdown(data, template): # Set up the Jinja2 environment env = Environment(loader=FileSystemLoader('.')) # Load the template template = env.get_template(template) # Define the data to pass to the template return template.render(data) def render_components_into_file(dir, data, template, tag): output = render_markdown(data, template) start_tag = f'<!-- start:{tag} -->' end_tag = f'<!-- end:{tag} -->' filesPaths = find_files_with_substring(dir, start_tag) for filePath in filesPaths: with open(filePath, 'r', encoding='utf-8') as file: content = file.read() pattern = start_tag + r'.*?' + end_tag new_content = f'{start_tag}\n{output}\n{end_tag}' updated_content = re.sub(pattern, new_content, content, flags=re.DOTALL) with open(filePath, 'w', encoding='utf-8') as file: file.write(updated_content) def check_markdown_generation(dir, data, template, tag): output = render_markdown(data, template) start_tag = f'<!-- start:{tag} -->' end_tag = f'<!-- end:{tag} -->' filesPaths = find_files_with_substring(dir, start_tag) for filePath in filesPaths: with open(filePath, 'r', encoding='utf-8') as file: content = file.read() pattern = start_tag + r'(.*?)' + end_tag matches = re.findall(pattern, content, re.DOTALL) for match in matches: if match.strip() != output.strip(): print(f'Warning: Generated markdown is outdated in file {filePath}! Regenerate markdown by running `make generate`!') return False; return True; def get_features_data(source_file): with open(source_file, 'r') as file: try: return yaml.safe_load(file) except yaml.YAMLError as exc: print(f"Error reading YAML file: {exc}") exit(1) def check_markdown(): col_version = get_collector_version('../_config.yml') url = f'https://raw.githubusercontent.com/elastic/elastic-agent/refs/tags/v{col_version}/go.mod' components = get_otel_components(url) otel_col_version = get_otel_col_upstream_version(url) data = { 'grouped_components': components, 'otel_col_version': otel_col_version } tables = check_markdown_generation(EDOT_COLLECTOR_DIR, data, TEMPLATE_COLLECTOR_COMPONENTS_TABLE, TABLE_TAG) ocb = check_markdown_generation(EDOT_COLLECTOR_DIR, data, TEMPLATE_COLLECTOR_OCB_FILE, DEPS_TAG) features_data = get_features_data(SDK_FEATURES_YAML) features = check_markdown_generation(EDOT_SDKS_DIR, features_data, TEMPLATE_SDK_FEATURES, FEATURES_TAG) return tables and ocb and features def generate_markdown(): col_version = get_collector_version('../_config.yml') url = f'https://raw.githubusercontent.com/elastic/elastic-agent/refs/tags/v{col_version}/go.mod' components = get_otel_components(url) otel_col_version = get_otel_col_upstream_version(url) data = { 'grouped_components': components, 'otel_col_version': otel_col_version } render_components_into_file(EDOT_COLLECTOR_DIR, data, TEMPLATE_COLLECTOR_COMPONENTS_TABLE, TABLE_TAG) render_components_into_file(EDOT_COLLECTOR_DIR, data, TEMPLATE_COLLECTOR_OCB_FILE, DEPS_TAG) features_data = get_features_data(SDK_FEATURES_YAML) render_components_into_file(EDOT_SDKS_DIR, features_data, TEMPLATE_SDK_FEATURES, FEATURES_TAG)