def generate

def generate_log()

in ccai-insights-sample-data/synthetic-convo-insights.py [0:0]
164 lines of code
40 McCabe index (conditional complexity)

def generate_log(services, problems, greetings, closing_remarks, closing_responses, agent_names_buffer, max_retries=3, max_regeneration_retries=3):
    global shuffled_agent_names
    service = random.choice(services)
    problem_description = random.choice(problems)

    # Generate timestamps
     # Specify the desired start and end years, along with month/day ranges
    start_year = 2024    # Change to desired start year
    start_month = 1      # Change to desired start month
    start_day = 1       # Change to desired start day
    end_year = 2024      # Change to desired end year
    end_month = 9       # Change to desired end month
    end_day = 3         # Change to desired end day
    
    # Generate random timestamps within the specified range
    def generate_random_timestamp():
        start_date = datetime.datetime(year=start_year, month=start_month, day=start_day)
        end_date = datetime.datetime(year=end_year, month=end_month, day=end_day, hour=23, minute=59, second=59)
    
        random_datetime = start_date + random.random() * (end_date - start_date)
        return int(random_datetime.timestamp() * 1000000)
    
    # Generate timestamp and response delay
    timestamp = generate_random_timestamp()
    response_delay = random.randint(5000000, 10000000)  # 5 to 10 seconds for all roles

    # Increased regeneration threshold (adjust as needed)
    regeneration_threshold = 3  # Lowered threshold for more frequent regeneration

    if len(agent_names_buffer) < 2:
        for _ in range(max_regeneration_retries):
            print("Regenerating agent names and greetings for more variety...")
            services, problems, greetings, closing_remarks, closing, new_agent_names = generate_lists()  # Regenerate greetings as well
            agent_names_buffer.extend(new_agent_names)
            if len(agent_names_buffer) >= 2:
                break
        else:
            print("Max regeneration retries reached. Skipping this call log.")
            return None

    customer_behavior = random.choice(["polite and patient", "frustrated and impatient", "angry and demanding", "confused and unsure"])

    # Generate a natural problem statement
    problem_statement_prompt = f"""
    Rewrite this issue into a natural statement a customer would say to describe their problem with their {service}: "{problem_description}" at the BEGINNING of their call. Make sure the customer provides context to their issue.
    """

    for retry_count in range(max_retries):
        try:
            problem_statement_response = model.generate_content(problem_statement_prompt)

            # Check for safety filter blocks in any candidate
            for candidate in problem_statement_response.candidates:
                if candidate.finish_reason == "STOP_REASON_SAFETY":
                    raise Exception("Safety filter triggered. Retrying...")

            customer_statement = problem_statement_response.text.strip()

            # --- Generate Metadata (but don't extract agent name yet) ---
            call_id = random.randint(1000, 999999)
            #language_code = "en-US"
            #call_type = random.choice(["inbound", "outbound"])
            channel = random.choice(["phone", "chat"])  # Randomly chooses phone or chat
            #agent_group = random.choice(["Tier 1 Support", "Billing"])
            agent_experience = random.choice(["junior", "senior", "manager", "supervisor", "trainee"])
            agent_location = random.choice(["US", "India", "EMEA"])
            customer_id = random.randint(100, 9999)
            customer_sentiment = "positive" if customer_behavior == "polite and patient" else "negative"  # Example inference
            customer_region = random.choice(["East Coast", "West Coast", "Central"])



            prompt_template = f"""
            Create a customer support transcript where an Ulta Beauty agent helps a customer with their {service}.
            The conversation starts with the agent's greeting.
            Adhere strictly to this format:
            Agent: {random.choice(greetings)}
            Customer: {customer_statement}
            Agent: [Agent's response acknowledging the problem and starting troubleshooting]
            Customer: [Customer's response to the troubleshooting steps]
            Agent: [Further troubleshooting or resolution steps]
            ... (continue the back-and-forth as needed)
            Agent: [Resolution of the issue or escalation]
            Agent: {random.choice(closing_remarks)}
            Customer: [Customer's natural response acknowledging resolution and ending the call]

            Additional instructions:

            *   Use "{random.choice(greetings)}" for the agent's greeting.
            *   Use "{random.choice(closing_remarks)}" for the agent's closing remark.
            *   The conversation MUST include troubleshooting steps and a resolution.
            *   Focus on a single core issue the customer is experiencing
            *   The customer is "{customer_behavior}"
            """

            print(prompt_template)

            response = model.generate_content(prompt_template, generation_config=generation_config)

            transcript = response.text

            # Check for safety filter blocks in any candidate (not just the first one)
            for candidate in response.candidates:
                if candidate.finish_reason == "STOP_REASON_SAFETY":
                    raise Exception("Safety filter triggered. Retrying...")

            # Enhanced Transcript Parsing with Logic to Prevent Unnatural Endings
            entries = []
            current_speaker = None
            customer_said_no = False
            short_customer_response = False
            agent_asked_anything_else = False
            last_agent_line = ""

            for line in transcript.splitlines():
                line = line.strip()
                if line.lower().startswith("customer") or line.lower().startswith("agent"):
                    if line.lower().startswith("customer"):
                        entries.append({"role": "CUSTOMER", "text": line[8:].strip(), "user_id": 1})
                        if line.lower().strip() in ["no", "no thanks", "that's all", "that's it", "i'm good", "nothing else","okay"]:
                            customer_said_no = True
                        if len(line.lower().strip()) <= 3:
                            short_customer_response = True
                    elif line.lower().startswith("agent"):
                        last_agent_line = line[5:].strip()
                        if "anything else" in last_agent_line.lower():
                            agent_asked_anything_else = True
                        
                        # Condition to skip the "anything else" response after customer says no
                        if not (customer_said_no and "anything else" in last_agent_line.lower()):
                            entries.append({"role": "AGENT", "text": line[5:].strip(), "user_id": 2})

            # Check if the first agent entry has a proper greeting (with fuzzy matching)
            if entries and entries[0]["role"] == "AGENT":
                agent_greeting = entries[0]["text"].lower()
            
                # Define a list of acceptable greeting keywords/phrases
                greeting_keywords = ["help", "assist", "welcome", "hello", "hi", "good morning", "good afternoon", "good evening"]
            
                # Check if any of the greeting keywords/phrases have a high similarity score with the agent's greeting
                has_valid_greeting = any(
                    fuzz.partial_ratio(keyword, agent_greeting) > 80
                    for keyword in greeting_keywords
                )
            
                if not has_valid_greeting:
                    print("Agent's greeting is missing or incomplete (fuzzy matching). Retrying...")
                    return generate_log(services, problems, greetings, closing_remarks, closing_responses, agent_names)
            
            # Additional check for the first few agent turns
            for i, entry in enumerate(entries[:3]):  # Check the first 3 agent turns
                if entry["role"] == "AGENT":
                    if any(product.lower() in entry["text"].lower() for product in services):
                        print("Agent assumed the product too early. Retrying...")
                        return generate_log(services, problems, greetings, closing_remarks, closing_responses, agent_names)

            # Retry Conditions (consolidated for readability)
            if any((
                agent_asked_anything_else and last_agent_line == entries[-1]['text'],
                customer_said_no and "anything else" in last_agent_line.lower(),
                short_customer_response,
                not entries  # Check for blank output
            )):
                retry_reason = (
                    "Customer didn't answer 'anything else?'"
                    if agent_asked_anything_else
                    and last_agent_line == entries[-1]["text"]
                    else "Agent asked again after customer said no"
                    if customer_said_no and "anything else" in last_agent_line.lower()
                    else "Customer response too short"
                    if short_customer_response
                    else "Blank output"
                )
                print(f"{retry_reason}. Retrying...")
                return generate_log(
                    services,
                    problems,
                    greetings,
                    closing_remarks,
                    closing_responses,
                    agent_names,
                )

            # Add timestamps
            for i, entry in enumerate(entries):
                entry["start_timestamp_usec"] = timestamp + response_delay * i



            # *** Extract agent name AFTER populating entries ***
            agent_name_found = False
            for entry in entries:
                if entry["role"] == "AGENT":
                    # Try to find the agent name using the regular expression
                    match = re.search(
                        r"(?:my name is|this is|i'm)\s+([\w\s]+)",
                        entry["text"],
                        re.IGNORECASE,
                    )
                    if match:
                        agent_name = match.group(1).strip()
                        agent_name_found = True
                        break  # Stop after finding the agent name

            if not agent_name_found:
                # Handle the case where no agent name is found in the transcript
                print("No agent greeting found in the transcript. Retrying...")
                return generate_log(
                    services,
                    problems,
                    greetings,
                    closing_remarks,
                    closing_responses,
                    agent_names_buffer,
                )



            # Generate a unique Agent ID (you can customize this logic)
            agent_id = random.randint(1000, 9999)
            
            # Replace any remaining placeholders
            #for entry in entries:
             #   if "[agent name]" in entry["text"].lower():
              #      entry["text"] = entry["text"].replace("[agent name]", agent_name_for_transcript)

            
            # Add agent metadata to each agent entry
            for entry in entries:
                if entry["role"] == "AGENT":
                    entry["agent_name"] = agent_name  # Use the extracted agent_name
                    entry["agent_id"] = agent_id

            
            # --- Create metadata dictionary ---
            metadata = {
                "call_id": call_id,
                #"language_code": language_code,
                #"call_type": call_type,
                "channel": channel,
                #"agent_group": agent_group,
                "agent_experience": agent_experience,
                "agent_location": agent_location,
                "customer_id": customer_id,
                "customer_sentiment": customer_sentiment,
                "customer_region": customer_region,
                "agent_id": agent_id,
                # ... add more metadata as needed
            }


            call_log = {
                "entries": entries,
                "metadata": metadata,
            }  # Include metadata in the call_log
            json_object = json.dumps(call_log, indent=4)
            return json_object  # Return the generated JSON if successful

        except Exception as e:
            print(f"Error generating log (attempt {retry_count + 1}/{max_retries}): {e}")
            if retry_count < max_retries - 1:
                time.sleep(2 ** retry_count)
            else:
                print("Max retries reached. Skipping this call log.")
                return None