import os
import base64
import calendar
import gzip
import json
import logging
import re
import threading
import time
import itertools
from collections import defaultdict
from copy import deepcopy
from datetime import datetime, timedelta
from io import BytesIO

import pusher
from mitmproxy import http, io
from mitmproxy.exceptions import FlowReadException
from nacl.exceptions import CryptoError
from nacl.secret import SecretBox
from requests_toolbelt.multipart import decoder
from bs4 import BeautifulSoup

# Important! Set the flow file name here
ISSUE_ID = os.environ.get("ISSUE_ID")
FLOW_FILE_PATH = f"/app/tests/issues/{ISSUE_ID}/flow.mitm"

# Mapbox Token variable
MAPBOX_PUBLIC_TOKEN = os.environ.get("MAPBOX_PUBLIC_TOKEN") or "NOT_PROVIDED"

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.addHandler(logging.StreamHandler())
for handler in logger.handlers:
    handler.setFormatter(logging.Formatter("%(filename)s:%(lineno)s - %(message)s"))

# Constants
API_PREFIXES = ["/api/{0}", "/api?command={0}"]
API_FORMAT_LAMBDA = lambda x: [prefix.format(x) for prefix in API_PREFIXES]
API_FORMAT_BYTES_LAMBDA = lambda x: [prefix.format(x).encode() for prefix in API_PREFIXES]
REQUESTS_TO_MATCH = ["/api", "/chat-attachments/", "/receipts/"]

EXPENSIFY_HOSTS = ["www.expensify.com"]
WEBSOCKET_HOSTS = ["ws-mt1.pusher.com", "pusher_proxy"]

UNNECESSARY_PATHS = list(
    itertools.chain.from_iterable(
        [API_FORMAT_BYTES_LAMBDA(x) for x in ["Log", "Ping", "LogOut"]]
    )
)
DUPLICATE_HANDLE_PATHS = list(
    itertools.chain.from_iterable(
        [API_FORMAT_BYTES_LAMBDA(x) for x in ["OpenReport", "GetPolicy"]]
    )
)
PUSHER_AUTHENTICATION_PATHS = list(
    itertools.chain.from_iterable(
        [API_FORMAT_BYTES_LAMBDA(x) for x in ["AuthenticatePusher"]]
    )
)
MAPBOX_TOKEN_PATHS = list(
    itertools.chain.from_iterable(
        [API_FORMAT_BYTES_LAMBDA(x) for x in ["GetMapboxAccessToken"]]
    )
)
SKIPPABLE_PATHS = list(
    itertools.chain.from_iterable(
        [
            API_FORMAT_LAMBDA(x)
            for x in [
                "ReadNewestAction",
                "AuthenticatePusher",
                "GetMissingOnyxMessages",
                "OpenReport",
            ]
        ]
    )
)

DUPLICATE_HANDLE_KEYS = ["reportID", "policyID"]
CHAT_ATTACHMENTS_PATHS = [b"/chat-attachments/", b"/receipts/"]

DATES_REGEX = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}"
DATES_REGEX_NO_MILLIS = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"
AUTH_REGEX = r'"auth": ".+"'
GARBAGE_IDS = [None, "", "null", "0", "-1"]

# Dynamic Content Constants
DYNAMIC_CONTENT_KEYS = ["reportComment", "glCode"]
DYNAMIC_CONTENT_REPLACEMENTS = {}

# Context
REPLACEMENT_VARS = {}
REPLACEMENT_DATES = {}
REPLACEMENT_TIMESTAMPS = {}

# Websockets
WS_TASKS = set()

# Pusher Tokens
PUSHER_WEB_HOST = os.environ.get("PUSHER_WEB_HOST", "ws-mt1.pusher.com")
PUSHER_WEB_PORT = os.environ.get("PUSHER_WEB_PORT", "90")
PUSHER_APP_KEY = os.environ.get("PUSHER_APP_KEY")
PUSHER_APP_SECRET = os.environ.get("PUSHER_APP_SECRET")
PUSHER_APP_ID = os.environ.get("PUSHER_APP_ID")
PUSHER_CLUSTER = "mt1"
PUSHER_ENCRYPTION_MASTER_KEY = "M1FtOU1Wd3kycjV3dWdoS05zQkh0MVJKcTJiVFFIQXQ="


class PusherDecryptionUtils:
    @staticmethod
    def decrypt_pusher_notification(shared_key: str, notification: str) -> str:
        """
        Decrypts a Pusher notification using NaCl SecretBox.

        :param shared_key: The shared encryption key (Base64-encoded).
        :param notification: The raw Pusher notification (JSON string).
        :return: The decrypted plaintext message as a string.
        """
        # Parse the JSON notification
        try:
            notification_data = json.loads(notification)
            encrypted_data = json.loads(notification_data["data"])
            nonce = encrypted_data["nonce"]
            ciphertext = encrypted_data["ciphertext"]
        except (KeyError, ValueError) as e:
            raise Exception(f"Failed to parse notification: {e}")

        # Decode the shared key, nonce, and ciphertext from Base64
        key = base64.b64decode(shared_key)
        nonce = base64.b64decode(nonce)
        encrypted_message = base64.b64decode(ciphertext)

        # Create a SecretBox object with the shared key
        box = SecretBox(key)

        # Decrypt the message
        try:
            plaintext = box.decrypt(encrypted_message, nonce)
            return plaintext.decode("utf-8")
        except CryptoError as e:
            raise Exception(f"Decryption failed: {e}")


class ExpensifyReplay:
    def __init__(self, flow_file_path):
        """
        Initialize the ExpensifyReplay addon.

        Args:
            flow_file_path (str): Path to the mitmproxy flow dump file.
        """
        self.flow_file_path = flow_file_path
        self.recorded_flows = []
        self.filtered_flows = []
        self.attachment_flows = []
        self.duplicate_handle_flows = []
        self.other_flows = []
        self.ws_flows = []
        self.pusher_auth_flows = []
        self.smallest_date = None
        self.load_recorded_flows()
        self.current_date = None
        try:
            self.pusher_client = pusher.Pusher(
                app_id=PUSHER_APP_ID,
                key=PUSHER_APP_KEY,
                secret=PUSHER_APP_SECRET,
                cluster=PUSHER_CLUSTER,
                ssl=False,
                host=PUSHER_WEB_HOST,
                port=int(PUSHER_WEB_PORT),
                encryption_master_key_base64=PUSHER_ENCRYPTION_MASTER_KEY,
            )
        except:
            self.pusher_client = None
            logger.warning(
                "Failed to initialize Pusher client, WS messages will not be injected."
            )
        logger.info(f"Loaded {len(self.recorded_flows)} recorded flows")
        logger.info(f"Unique email addresses: {list(self.email_based_flows.keys())}")
        logger.info(f"Flow recorded on: {self.smallest_date}")
        logger.info(
            f"Loaded decryption keys for {len(self.pusher_decryption_keys)} Pusher channels"
        )

    def load_recorded_flows(self):
        """
        Load recorded flows from the mitmproxy dump file.
        """
        try:
            with open(self.flow_file_path, "rb") as f:
                flow_reader = io.FlowReader(f)
                for flow in flow_reader.stream():
                    self.recorded_flows.append(flow.get_state())
            self.process_flows()
        except FlowReadException as e:
            logger.error(f"Error reading mitm file: {e}")

    def get_date_from_recorded_flow(self, flow):
        """
        Get the date from the recorded flow.

        Args:
            flow (dict): The recorded flow.

        Returns:
            datetime: The date extracted from the headers.
        """
        timestamp_created = flow["timestamp_created"]
        # Convert to datetime
        return datetime.utcfromtimestamp(timestamp_created)

    def process_flows(self):
        """
        Process the recorded flows, separating them into filtered and other flows.
        """
        # First, decompress the content of the flows
        for flow in self.recorded_flows:
            if not flow["response"]:
                continue
            flow["response"]["content"] = self.decompress_gzip(
                flow["response"]["content"], flow["response"]["headers"]
            )
            if isinstance(flow["response"]["content"], str):
                flow["response"]["content"] = (
                    flow["response"]["content"]
                    .encode("utf-16", "surrogatepass")
                    .decode("utf-16")
                )
                flow["response"]["content"] = flow["response"]["content"].replace(
                    r"\/", "/"
                )

        for flow in self.recorded_flows:
            if "method" not in flow["request"]:
                continue
            elif flow["websocket"] and any(
                host in flow["server_conn"]["sni"] for host in WEBSOCKET_HOSTS
            ):
                self.ws_flows.append(flow)
            elif flow["server_conn"]["sni"] not in EXPENSIFY_HOSTS:
                self.other_flows.append(flow)
            elif any(
                flow["request"]["path"].startswith(path) for path in UNNECESSARY_PATHS
            ):
                self.other_flows.append(flow)
            elif any(
                flow["request"]["path"].startswith(path)
                for path in CHAT_ATTACHMENTS_PATHS
            ):
                self.attachment_flows.append(flow)
            else:
                self.filtered_flows.append({"flow": flow, "marked": False})
                if any(
                    flow["request"]["path"].startswith(path)
                    for path in PUSHER_AUTHENTICATION_PATHS
                ):
                    self.pusher_auth_flows.append(flow)

        # Extract date from headers from both flows
        dates = []

        for flow in self.filtered_flows:
            date = self.get_date_from_recorded_flow(flow["flow"])
            if date:
                dates.append(date)

        for flow in self.other_flows:
            date = self.get_date_from_recorded_flow(flow)
            if date:
                dates.append(date)

        if dates:
            self.smallest_date = min(dates)
            self.smallest_date_ts = calendar.timegm(self.smallest_date.timetuple())

        # For attachment flows remove everything after ? in the path
        # That is the authenticaion token and we don't need to store that
        for flow in self.attachment_flows:
            path = flow["request"]["path"].decode().split("?")[0]
            flow["request"]["path"] = path.encode()

        # For WS flows, keep only server-initiated messages
        for flow in self.ws_flows:
            messages = flow["websocket"]["messages"]
            flow["websocket"]["messages"] = [
                list(msg) for msg in messages if msg[1] is False
            ]
            flow["websocket"]["reserved"] = False

        # Sort the flows by timestamp
        self.filtered_flows.sort(key=lambda x: x["flow"]["request"]["timestamp_start"])
        self.other_flows.sort(key=lambda x: x["request"]["timestamp_start"])

        # Further bucket into email-based flows
        self.email_based_flows = defaultdict(list)
        for flow_entry in self.filtered_flows:
            flow = flow_entry["flow"]
            headers = self.convert_headers_to_dict(flow["request"]["headers"])
            if self.is_multipart_form_data(headers) or self.is_x_www_form_urlencoded(
                headers
            ):
                email = self.get_email_from_request(
                    flow["request"]["content"],
                    headers.get(b"content-type", b"").decode("utf-8"),
                )
                if email:
                    self.email_based_flows[email].append(flow_entry)

        # Further bucket to handle duplicate calls
        self.duplicate_handle_flows = defaultdict(dict)
        for flow_entry in self.filtered_flows:
            flow = flow_entry["flow"]
            headers = self.convert_headers_to_dict(flow["request"]["headers"])
            if any(path in flow["request"]["path"] for path in DUPLICATE_HANDLE_PATHS):
                for key in DUPLICATE_HANDLE_KEYS:
                    unique_ids = self.extract_unique_ids(
                        flow["request"]["content"],
                        headers.get(b"content-type", b"").decode("utf-8"),
                    )
                    value = unique_ids.get(key)
                    update_id = unique_ids.get("clientUpdateID")
                    if value:
                        # The goal is to cache the last flow for each key
                        self.duplicate_handle_flows[(value, update_id)] = flow_entry

        # Create a map of channel to key for Pusher flows
        self.create_channel_key_map()

        # Decrypt Pusher messages
        self.decrypt_websocket_messages()

    def create_channel_key_map(self):
        """
        Read Pusher Authentication flows and create a map of channel to key.
        Store the map to each websocket flow for decryption.
        """
        self.pusher_decryption_keys = {}
        for flow in self.pusher_auth_flows:
            # Read the flow request content
            content = flow["request"]["content"]
            headers = self.convert_headers_to_dict(flow["request"]["headers"])
            # Extract channel and key
            ids = self.extract_unique_ids(
                content,
                headers.get(b"content-type", b"").decode("utf-8"),
                get_pusher_ids=True,
            )
            channel = ids.get("channel_name")
            # Channel must contain -encrypted- to be encrypted
            if "-encrypted-" not in channel:
                continue
            # Load the response as JSON
            try:
                response = json.loads(flow["response"]["content"])
                key = response.get("shared_secret")
                if not key:
                    continue
                self.pusher_decryption_keys[channel] = key
            except Exception as e:
                logger.warning(f"Failed to load Pusher response: {e}")

    def decrypt_websocket_messages(self):
        """
        Decrypt all websocket messages which have channel name containing -encrypted-.
        """
        decryption_count = 0
        for flow in self.ws_flows:
            messages = flow["websocket"]["messages"]
            for msg in messages:
                msg_json = json.loads(msg[2])
                channel = msg_json.get("channel")
                if channel in self.pusher_decryption_keys:
                    key = self.pusher_decryption_keys[channel]
                    try:
                        decrypted = PusherDecryptionUtils.decrypt_pusher_notification(
                            key, msg[2]
                        )
                        msg_json["data"] = json.loads(decrypted)
                        decryption_count += 1
                    except Exception as e:
                        pass
                msg[2] = msg_json
        logger.info(f"Decrypted {decryption_count} Pusher messages")

    def decompress_gzip(self, compressed_content, headers=None):
        """
        Decompress Gzip-encoded content.

        Args:
            compressed_content (bytes): Gzip compressed data.
            headers: HTTP headers.

        Returns:
            bytes: Decompressed data.
        """
        if headers:
            headers = self.convert_headers_to_dict(headers)
            content_encoding = headers.get(b"content-encoding", b"").decode("utf-8")
            if "gzip" not in content_encoding:
                return compressed_content

        # Content can be previously decompressed too. Return if it's not gzip
        if isinstance(compressed_content, str):
            return compressed_content

        try:
            with gzip.GzipFile(fileobj=BytesIO(compressed_content)) as f:
                return f.read().decode("utf-8")
        except Exception:
            try:
                # Possible compressed bytes data:
                with gzip.GzipFile(fileobj=BytesIO(compressed_content)) as f:
                    return f.read()
            except Exception as e:
                logger.warning(f"Failed to decompress content: {e}")

        # Return as it is if decompression fails
        return compressed_content

    def inject_ws(self, flow: http.HTTPFlow, ws_flow):
        """
        Inject a WebSocket message.

        Args:
            flow (http.HTTPFlow): The HTTP flow.
            ws_flow (dict): The WebSocket flow.
        """
        if not self.pusher_client:
            logger.warning("Pusher client not initialized, cannot inject WS messages.")
            return

        ws_messages = ws_flow["websocket"]["messages"]
        current_date = calendar.timegm(datetime.utcnow().timetuple())
        # Update timestamps for the WS messages
        smallest_ts = min(msg[3] for msg in ws_messages) - 1
        for msg in ws_messages:
            msg[3] = current_date + (msg[3] - smallest_ts)
        while True:
            _now = datetime.utcnow()
            _ts = calendar.timegm(_now.timetuple())
            if not ws_messages:
                logger.info(f"Finished injecting WS messages to {ws_flow['id']}")
                break
            if ws_messages[0][3] <= _ts:
                # Get JSON message
                msg = ws_messages.pop(0)
                msg_json = msg[2]
                channel_name = msg_json.get("channel", "")
                event = msg_json.get("event", "")
                if not channel_name or event.startswith("pusher"):
                    continue
                # Perform ID replacement
                for key, value in REPLACEMENT_VARS.items():
                    channel_name = channel_name.replace(key, value)
                # Perform data replacement on the data
                data = msg_json.get("data")
                if not data:
                    continue
                data = self.replace_dates(json.dumps(data))
                for key, value in REPLACEMENT_VARS.items():
                    data = data.replace(key, value)
                self.pusher_client.trigger(channel_name, event, json.loads(data))
                logger.info(f"Injected WS message to {channel_name}")
            time.sleep(0.5)

    def request(self, flow: http.HTTPFlow) -> None:
        """
        Handle incoming HTTP requests.

        Args:
            flow (http.HTTPFlow): The HTTP flow.
        """
        # First check if incoming request is for WS
        ws_match = any(host in flow.request.url for host in WEBSOCKET_HOSTS)
        if ws_match:
            logger.info(f"Intercepted WS request to {flow.request.url}")
            for ws_flow in self.ws_flows:
                if ws_flow["websocket"]["reserved"]:
                    continue
                if ws_flow["server_conn"]["address"] == flow.server_conn.address \
                    or ("pusher" in ws_flow["server_conn"]["sni"] and "pusher" in flow.server_conn.sni):
                    ws_flow["websocket"]["reserved"] = True
                    t = threading.Thread(target=self.inject_ws, args=(flow, ws_flow))
                    t.start()
                    WS_TASKS.add(t)
                    logger.info(f"Reserved WS flow: {ws_flow['id']}")
                    return

        host_matched = any(host in flow.request.pretty_host for host in EXPENSIFY_HOSTS)
        if (
            host_matched
            and any(path in flow.request.path for path in REQUESTS_TO_MATCH)
            and flow.request.method != "OPTIONS"
        ):
            logger.info(f"Intercepted request to {flow.request.url}")
            if self.current_date is None:
                self.current_date = datetime.utcnow()
            recorded_response = self.find_matching_response(flow)
            try:
                content = recorded_response["response"]["content"]
            except Exception:
                content = json.dumps({})
            try:
                response_code = recorded_response["response"]["status_code"]
            except Exception:
                response_code = 200
            try:
                headers = recorded_response["response"]["headers"]
            except Exception:
                headers = {}
            flow.response = http.Response.make(response_code, content, dict(headers))

    def find_matching_response(self, request_flow: http.HTTPFlow):
        """
        Find a matching response from the recorded flows for the given request.

        Args:
            request_flow (http.HTTPFlow): The incoming HTTP request flow.

        Returns:
            dict: The matching recorded flow, or None if not found.
        """
        request_method = request_flow.request.method.encode("utf-8")
        request_url = request_flow.request.url

        # First, check in other_flows
        for flow in self.other_flows:
            recorded_method = flow["request"]["method"]
            recorded_url = self.construct_url(flow)
            if recorded_method == request_method and recorded_url == request_url:
                return flow

        # Now check in attachment_flows
        for flow in self.attachment_flows:
            recorded_method = flow["request"]["method"]
            recorded_url = self.construct_url(flow)
            # Strip request URL to match
            request_url_match = request_url.split("?")[0]
            # In recorded URL perform ID replacement
            for key, value in REPLACEMENT_VARS.items():
                recorded_url = recorded_url.replace(key, value)
            if recorded_method == request_method and recorded_url == request_url_match:
                return flow

        # Then, check in filtered_flows
        request_content = request_flow.request.content
        content_type = request_flow.request.headers.get("Content-Type", "")
        flow_entry = self.find_matching_flow(
            request_method, request_url, request_content, content_type
        )
        if flow_entry is None:
            return None

        matching_flow = flow_entry["flow"]

        if not matching_flow["response"]:
            return matching_flow

        matching_flow_headers = self.convert_headers_to_dict(
            matching_flow["request"]["headers"]
        )
        if self.is_multipart_form_data(
            matching_flow_headers
        ) or self.is_x_www_form_urlencoded(matching_flow_headers):
            # We do not want to tamper original matching flow, thus create a copy
            matching_flow = deepcopy(matching_flow)
            self.replace_unique_ids(request_flow, matching_flow)

        # If content is ignored, undo
        if matching_flow["response"]["content"] == "IGNORED":
            flow_entry["marked"] = False
            matching_flow["response"]["content"] = json.dumps({})

        return matching_flow

    def find_matching_flow(
        self, request_method, request_url, request_content, content_type
    ):
        """
        Find a matching flow in the filtered_flows.
        Args:
            request_method (bytes): The request method.
            request_url (str): The request URL.
            request_content (bytes): The request content.
            content_type (str): The content type header value.
        Returns:
            dict: The matching flow, or None.
        """
        # Choose which email flows to use
        email = self.get_email_from_request(request_content, content_type)
        if email and email in self.email_based_flows:
            flows = self.email_based_flows[email]
        else:
            flows = self.filtered_flows

        for flow_entry in flows:
            flow = flow_entry["flow"]
            recorded_method = flow["request"]["method"]
            recorded_url = self.construct_url(flow)
            matched = recorded_method == request_method and recorded_url == request_url
            matched_skip = any(path in recorded_url for path in SKIPPABLE_PATHS)
            if flow_entry["marked"]:
                continue
            elif matched:
                flow_entry["marked"] = True
                return flow_entry
            elif matched_skip:
                continue  # Match later
            else:
                break

        # If we are here then no match was found, but sometimes duplicated calls can be expected,
        # especially for endpoints such as OpenReport. Specifically handle those endpoints.
        if any(path.decode() in request_url for path in DUPLICATE_HANDLE_PATHS):
            unique_ids = self.extract_unique_ids(request_content, content_type)
            # The unique_ids are replaced IDs, so fetch the original ones
            for k, v in unique_ids.items():
                # Get v's old value from Replacement Vars
                for _k, _v in REPLACEMENT_VARS.items():
                    if _v == v:
                        unique_ids[k] = _k
            for key in DUPLICATE_HANDLE_KEYS:
                value = unique_ids.get(key)
                update_id = unique_ids.get("clientUpdateID")
                matching_flow = self.duplicate_handle_flows.get((value, update_id))
                if matching_flow:
                    # Remove the flow from the cache
                    return matching_flow

        if any(path in request_url for path in SKIPPABLE_PATHS):
            return None

        # Last resort, if we are here, it means that the FE is either making a call that was not recorded
        # or that the order of calls is different. In such cases, we can return the first unmarked flow
        # Ideally, this should not happen and the test must be recorded again with a slow_mo parameter
        # set to ensure that the order of calls is maintained
        to_mark = []
        marked_flow = None
        # Iterate over non-marked flows, find the matching one,
        # and set all upto that as marked
        for flow_entry in flows:
            flow = flow_entry["flow"]
            recorded_method = flow["request"]["method"]
            recorded_url = self.construct_url(flow)
            matched = recorded_method == request_method and recorded_url == request_url
            if flow_entry["marked"]:
                continue
            else:
                if matched:
                    flow_entry["marked"] = True
                    marked_flow = flow_entry
                    break
                else:
                    to_mark.append(flow_entry)

        if marked_flow:
            for flow_entry in to_mark:
                flow_entry["marked"] = True

        return marked_flow

    def construct_url(self, flow):
        """
        Construct the full URL from the flow data.

        Args:
            flow (dict): The flow data.

        Returns:
            str: The full URL.
        """
        host = EXPENSIFY_HOSTS[0].encode("utf-8")
        return (
            flow["request"]["scheme"] + b"://" + host + flow["request"]["path"]
        ).decode("utf-8")

    def convert_headers_to_dict(self, headers: any) -> dict:
        """
        Convert headers to a dictionary.

        Args:
            headers (any): Headers to convert.

        Returns:
            dict: Dictionary of headers.
        """
        if isinstance(headers, dict):
            return headers
        return {k: v for k, v in headers}

    def is_multipart_form_data(self, headers):
        """
        Check if the content type is multipart/form-data.

        Args:
            headers (dict): HTTP headers.

        Returns:
            bool: True if content type is multipart/form-data, False otherwise.
        """
        content_type = headers.get(b"content-type", b"").decode("utf-8")
        return "multipart/form-data" in content_type

    def is_x_www_form_urlencoded(self, headers):
        """
        Check if the content type is application/x-www-form-urlencoded.

        Args:
            headers (dict): HTTP headers.

        Returns:
            bool: True if content type is application/x-www-form-urlencoded, False otherwise.
        """
        content_type = headers.get(b"content-type", b"").decode("utf-8")
        return "application/x-www-form-urlencoded" in content_type

    def replace_unique_ids(self, request_flow, recorded_flow):
        """
        Replace unique IDs in the response content based on the request content.

        Args:
            request_flow (http.HTTPFlow): The incoming request flow.
            recorded_flow (dict): The recorded flow to modify.
        """
        # Extract IDs from recorded request
        recorded_headers = self.convert_headers_to_dict(
            recorded_flow["request"]["headers"]
        )
        recorded_content = recorded_flow["request"]["content"]
        recorded_content_type = recorded_headers.get(b"content-type", b"").decode(
            "utf-8"
        )
        recorded_ids = self.extract_unique_ids(recorded_content, recorded_content_type)

        # Extract IDs from incoming request
        request_content = request_flow.request.content
        request_headers = request_flow.request.headers
        request_content_type = request_headers.get("Content-Type", "")
        request_ids = self.extract_unique_ids(request_content, request_content_type)

        if recorded_flow["response"] is None:
            flow_url = request_flow.request.url
            logger.warning(f"Missing response for flow recorded on: {flow_url}")
            return

        response_content = recorded_flow["response"]["content"]

        if type(response_content) != str:
            return

        # Replace IDs in response content
        current_repl_vars = set(REPLACEMENT_VARS.values())
        for name, value in recorded_ids.items():
            request_value = request_ids.get(name)
            if request_value:
                if request_value not in current_repl_vars:
                    # Store the replaced value for future reference
                    REPLACEMENT_VARS[value] = request_value

        # Force replacement of some keys
        # In certain scenarios, the app is requesting the BE for updates and these requests do not have any unique IDs
        # Thus, we need to force replacement so that the policy and report IDs match properly
        for key, value in REPLACEMENT_VARS.items():
            response_content = response_content.replace(key, value)

        # I hate ReadNewestAction endpoint, it messes EVERYTHING up.
        # In the upcoming request, get the ReportID and ensure we have a replacement for it
        # If we don't return empty
        if "ReadNewestAction?" in request_flow.request.path:
            # If nothing replaced, return
            report_id = request_ids.get("reportID")
            if report_id not in response_content:
                recorded_flow["response"]["content"] = "IGNORED"
                return

        # Replace dates in the response content
        response_content = self.replace_dates(response_content)

        # Special case to handle Pusher flows
        if (
            "AuthenticatePusher" in request_flow.request.path
            and self.pusher_client is not None
        ):
            response_content = self.handle_pusher_flows(request_flow, response_content)
        
        if "GetMapboxAccessToken" in request_flow.request.path:
            response_content = self.replace_mapbox_token(response_content)

        # Replace dynamic content
        response_content = self.replace_dynamic_content(
            request_content,
            request_content_type,
            recorded_content,
            recorded_content_type,
            response_content,
        )

        # Update the response content
        recorded_flow["response"]["content"] = response_content

    def handle_pusher_flows(self, request_flow, response_content):
        # From request flow get pusher IDs
        request_content = request_flow.request.content
        request_headers = request_flow.request.headers
        request_content_type = request_headers.get("Content-Type", "")
        request_ids = self.extract_unique_ids(
            request_content, request_content_type, get_pusher_ids=True
        )

        socket_id = request_ids.get("socket_id")
        channel = request_ids.get("channel_name")

        # Generate auth for the subscription key
        auth = self.pusher_client.authenticate(channel=channel, socket_id=socket_id)

        # Update the recorded flow with the auth
        response_content = json.loads(response_content)
        response_content.update(auth)
        if "shared_secret" in response_content and not isinstance(
            response_content["shared_secret"], str
        ):
            response_content["shared_secret"] = response_content[
                "shared_secret"
            ].decode("utf-8")

        return json.dumps(response_content)

    def replace_dates(self, content):
        """
        Replace dates in the content.

        Args:
            content (str): The content to modify.

        Returns:
            str: The modified content.
        """
        # Run regex on the content to find dates
        matches = re.findall(DATES_REGEX, content)
        matches_2 = re.findall(DATES_REGEX_NO_MILLIS, content)

        if not matches and not matches_2:
            return content

        def convert_timestamp_to_string(timestamp):
            timestamp = str(timestamp)[:14]
            if timestamp.endswith(".0"):
                timestamp = timestamp[:-2]
            else:
                timestamp = timestamp.replace(".", "")
            return timestamp

        def perform_dates_replacement(matches, format, content):
            smallest_date = self.smallest_date
            matches = list(set(matches))

            for match in matches:
                match_date = datetime.strptime(match, format)
                match_timestamp = (
                    calendar.timegm(match_date.timetuple())
                    + match_date.microsecond / 1_000_000
                )
                match_timestamp = convert_timestamp_to_string(match_timestamp)

                # Check if the date is already cached
                if match in REPLACEMENT_DATES:
                    new_date = REPLACEMENT_DATES[match]
                    new_timestamp = REPLACEMENT_TIMESTAMPS[match_timestamp]
                else:
                    # Calculate the difference and the new date
                    diff = match_date - smallest_date
                    if diff < timedelta(0):
                        continue
                    new_date = (self.current_date + diff).strftime(format)[: len(match)]
                    new_timestamp_value = (
                        calendar.timegm((self.current_date + diff).timetuple())
                        + (self.current_date + diff).microsecond / 1_000_000
                    )
                    new_timestamp = convert_timestamp_to_string(new_timestamp_value)

                    # Cache the replacements
                    REPLACEMENT_DATES[match] = new_date
                    REPLACEMENT_TIMESTAMPS[
                        convert_timestamp_to_string(match_timestamp)
                    ] = new_timestamp

                # Replace in the content
                content = content.replace(match, new_date)
                content = content.replace(match_timestamp, new_timestamp)

            return content

        content = perform_dates_replacement(matches, "%Y-%m-%d %H:%M:%S.%f", content)
        content = perform_dates_replacement(matches_2, "%Y-%m-%d %H:%M:%S", content)

        return content

    def convert_data_to_dict(self, content, content_type):
        """
        Convert data to a dictionary.

        Args:
            content (bytes): The data to convert.
            content_type (str): The content type header value.

        Returns:
            dict: The converted data.
        """
        data = {}
        try:
            if content_type.startswith("application/json"):
                data = json.loads(content)
            elif content_type.startswith("multipart/form-data"):
                decoded_content = decoder.MultipartDecoder(
                    content=content, content_type=content_type
                )
                name_pattern = re.compile(r'name="(.+?)"')
                for part in decoded_content.parts:
                    try:
                        text = part.text
                    except UnicodeDecodeError:
                        # Attachment data is binary, and we don't need to process it
                        continue
                    headers = part.headers.get(b"Content-Disposition", b"").decode()
                    match = name_pattern.search(headers)
                    if not match:
                        continue
                    name = match.group(1)
                    data[name] = text
            elif content_type.startswith("application/x-www-form-urlencoded"):
                content_str = content.decode("utf-8")
                for pair in content_str.split("&"):
                    name, value = pair.split("=")
                    if not name:
                        continue
                    data[name] = value
        except Exception as e:
            logger.warning(f"Failed to convert data: {e}")
        return data

    def extract_unique_ids(self, content, content_type, get_pusher_ids=False):
        """
        Extract unique IDs from multipart/form-data or x-www-form-urlencoded content.

        Args:
            content (bytes): The content to parse.
            content_type (str): The content type header value.
            get_pusher_ids (bool): Whether to extract Pusher IDs.

        Returns:
            dict: A dictionary of unique IDs and their values.
        """
        unique_ids = {}
        data = self.convert_data_to_dict(content, content_type)

        # Extract IDs from the data
        for key, value in data.items():
            if not key or value in GARBAGE_IDS:
                continue
            if "ID" in key:
                unique_ids[key] = value
            if get_pusher_ids:
                if "channel" in key:
                    unique_ids[key] = value
                if "socket_id" in key:
                    unique_ids[key] = value

        return unique_ids

    def replace_dynamic_content(
        self,
        request_data,
        request_data_type,
        response_data,
        response_data_type,
        response_content,
    ):
        # Get the dynamic content from the request
        request = self.convert_data_to_dict(request_data, request_data_type)
        response = self.convert_data_to_dict(response_data, response_data_type)

        for key in DYNAMIC_CONTENT_KEYS:
            request_value = request.get(key)
            if not request_value:
                continue
            response_value = response.get(key)
            DYNAMIC_CONTENT_REPLACEMENTS[response_value] = request_value

            # Sometimes the content might have HTML tags, strip them and store
            mod_request_value = BeautifulSoup(request_value, "lxml").text
            if not mod_request_value:
                continue
            mod_response_value = BeautifulSoup(response_value, "lxml").text
            DYNAMIC_CONTENT_REPLACEMENTS[mod_response_value] = mod_request_value

        # Replace the dynamic content in the response
        for key, value in DYNAMIC_CONTENT_REPLACEMENTS.items():
            response_content = response_content.replace(key, value)

        return response_content
    
    def replace_mapbox_token(
        self,
        response_content
    ):
        """
        Replaces Mapbox Token in the cached request.        
        """
        try:
            dict_content = json.loads(response_content)
            dict_content["onyxData"][0]["value"] = {
                "token": MAPBOX_PUBLIC_TOKEN,
                "expiration": "2100-12-23T16:38:52.716Z"
            }
            updated_content = json.dumps(dict_content)
        except:
            updated_content = response_content
        
        return updated_content

    def get_email_from_request(self, request_content, content_type):
        """
        Extract email address from the request content.

        Args:
            request_content (bytes): The request content.
            content_type (str): The content type header value.

        Returns:
            str: The email address.
        """
        email = None
        try:
            if content_type.startswith("multipart/form-data"):
                decoded_content = decoder.MultipartDecoder(
                    content=request_content, content_type=content_type
                )
                for part in decoded_content.parts:
                    headers = part.headers.get(b"Content-Disposition", b"").decode()
                    if "email" in headers:
                        email = part.text
            elif content_type.startswith("application/x-www-form-urlencoded"):
                content_str = request_content.decode("utf-8")
                for pair in content_str.split("&"):
                    name, value = pair.split("=")
                    if "email" in name:
                        email = value
        except Exception as e:
            logger.warning(f"Failed to extract email: {e}")
        return email


# Add the addon to mitmproxy
addons = [ExpensifyReplay(FLOW_FILE_PATH)]