def find_file()

in modify-deadlink.py [0:0]


def find_file(file_str, search_dir, line_content):
    # Initialize result list
    results = []

    # Extract the second file path (including the line number)
    match = re.search(r"in file '([^']+)'", file_str)
    if match:
        base_file = match.group(1)  # For example: "versioned_docs/version-3.0/sql-manual/sql-data-types/data-type-overview.md:67"
        parts = base_file.split(":")
        base_file_path = parts[0]         # Remove the line number part to get the file path
        line_number = parts[1] if len(parts) > 1 else ""  # The part after the colon

        # Get the root directory of the second file path
        root_dir = os.path.dirname(base_file_path)

        # Extract the first file path based on the 'link' in the log line
        match = re.search(r"link '([^']+)'", file_str)  # Extract the path after 'link'
        if match:
            filename = match.group(1)
            # Get the base file name (remove the path part)
            file_base_name = os.path.basename(filename)
            # Create the target file name, check if it already has a .md extension
            if not file_base_name.endswith(".md"):
                target_filename = f"{file_base_name}.md"
            else:
                target_filename = file_base_name

            # Check if the file exists in the directory and count the number of occurrences
            found_files = []
            for root, dirs, files in os.walk(search_dir):
                if target_filename in files:
                    file_path = os.path.join(root, target_filename)
                    found_files.append(file_path)

            # Store the result in the structure array
            if found_files:
                url_count = 0
                relative_url = ""
                for file in found_files:
                    # Calculate the relative file path
                    url_path = os.path.relpath(file, os.getcwd())
                    url_count += 1

                # If only one URL is found, output the relative path from the file directory
                if url_count == 1:
                    relative_url = os.path.relpath(found_files[0], os.path.dirname(base_file_path))

                    # Handle relative_url, if it doesn't start with '../', prepend './', and remove the .md suffix
                    if not relative_url.startswith("../"):
                        relative_url = "./" + relative_url
                    if relative_url.endswith(".md"):
                        relative_url = relative_url[:-3]

                # Extract the origin_url (from log_error, extracting the path after 'link' in quotes)
                origin_url_match = re.search(r"link '([^']+)'", line_content)  # Find the content following 'link'
                origin_url = origin_url_match.group(1) if origin_url_match else ""

                # Create the sed_str command (valid only when url_count is 1)
                sed_str = ""
                if url_count == 1:
                    sed_str = f"sed -i '{line_number}s|({origin_url})|({relative_url})|' {base_file_path}"

                # Store the result in the structure array
                file_info = FileInfo(
                    target_file=base_file_path,
                    url_line=line_number,
                    url_path=url_path,
                    url_count=url_count,
                    relative_url=relative_url,
                    log_error=line_content,  # Store the current line content
                    origin_url=origin_url,   # Store origin_url
                    sed_str=sed_str          # Store sed command
                )
                results.append(file_info)

            else:
                print(f"[ERR] No file named {target_filename} found in {search_dir}.")
                print(f"[ERR] Error log: {line_content}")  # Output the current error log
                print("-" * 80)  # Print the separator line
        else:
            print(f"No valid file path found in the input string.")
            print(f"Error log: {line_content}")  # Output the current error log
            print("-" * 80)  # Print the separator line
    else:
        print(f"No valid base file path found in the input string.")
        print(f"Error log: {line_content}")  # Output the current error log
        print("-" * 80)  # Print the separator line

    return results