# Simulated Visit Generator

The function below generates a simulated visit to a web site. A visit has a collection of events. There are 3 types of events: Page Views, Add Item to Cart, and Purchase. 

All events have page views. Some have Add to Cart events. Some of the visits with Add to Cart events have purchases. 

The OpenAPI schema for a Visit is shown below. 

```
openapi: 3.0.0
info:
  title: Visit Schema API
  version: 1.0.0
  description: Schema for representing a visit to a website, including page views, adding items to a cart, and purchases.
paths: {}
components:
  schemas:
    Visit:
      type: object
      properties:
        session_id:
          type: string
          example: "SID-1234"
          description: "A unique identifier for the user's session."
        user_id:
          type: string
          example: "UID-5678"
          description: "A unique identifier for the user visiting the website."
        device_type:
          type: string
          enum: [desktop, mobile, tablet]
          example: "desktop"
          description: "The type of device used by the user."
        geolocation:
          type: string
          example: "37.7749,-122.4194"
          description: "The geolocation of the user in latitude,longitude format."
        user_agent:
          type: string
          example: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
          description: "The user agent string of the browser/device used by the user."
        events:
          type: array
          items:
            $ref: '#/components/schemas/Event'
          description: "List of events during the user's visit."

    Event:
      type: object
      properties:
        event_type:
          type: string
          enum: [page_view, add_item_to_cart, purchase]
          example: "page_view"
          description: "The type of event that occurred."
        timestamp:
          type: string
          format: date-time
          example: "2023-08-10T12:34:56Z"
          description: "The exact time when the event occurred."
        details:
          type: object
          oneOf:
            - $ref: '#/components/schemas/PageViewDetails'
            - $ref: '#/components/schemas/AddItemToCartDetails'
            - $ref: '#/components/schemas/PurchaseDetails'
          description: "Specific details of the event based on its type."

    PageViewDetails:
      type: object
      properties:
        page_url:
          type: string
          example: "https://example.com/products"
          description: "The URL of the webpage that was viewed."
        referrer_url:
          type: string
          nullable: true
          example: "https://google.com"
          description: "The URL of the referrer page that led to this page view, or null if none."

    AddItemToCartDetails:
      type: object
      properties:
        product_id:
          type: string
          example: "HDW-001"
          description: "The unique identifier of the product added to the cart."
        product_name:
          type: string
          example: "Laptop X200"
          description: "The name of the product added to the cart."
        category:
          type: string
          enum: [hardware, software, peripherals]
          example: "hardware"
          description: "The category of the product added to the cart."
        price:
          type: number
          format: float
          example: 999.99
          description: "The price of the product added to the cart."
        quantity:
          type: integer
          example: 2
          description: "The quantity of the product added to the cart."

    PurchaseDetails:
      type: object
      properties:
        order_id:
          type: string
          example: "ORD-4321"
          description: "A unique identifier for the order."
        amount:
          type: number
          format: float
          example: 1999.98
          description: "The total amount of the purchase."
        currency:
          type: string
          example: "USD"
          description: "The currency used for the purchase."
        items:
          type: array
          items:
            $ref: '#/components/schemas/PurchaseItem'
          description: "A list of items purchased in this order."

    PurchaseItem:
      type: object
      properties:
        product_id:
          type: string
          example: "HDW-001"
          description: "The unique identifier of the product purchased."
        product_name:
          type: string
          example: "Laptop X200"
          description: "The name of the product purchased."
        category:
          type: string
          enum: [hardware, software, peripherals]
          example: "hardware"
          description: "The category of the product purchased."
        price:
          type: number
          format: float
          example: 999.99
          description: "The price of the product purchased."
        quantity:
          type: integer
          example: 2
          description: "The quantity of the product purchased."

```

In [None]:
import random
from datetime import datetime
import json

def generate_visit(custom_timestamp=None):
    # Sample products categorized by type with hard-coded product IDs and popularity scores
    products = {
        "hardware": [
            {"product_id": "HDW-001", "name": "Laptop X200", "price": 999.99, "popularity": 0.3},
            {"product_id": "HDW-002", "name": "Desktop Z500", "price": 1299.99, "popularity": 0.2},
            {"product_id": "HDW-003", "name": "Gaming PC Y900", "price": 1899.99, "popularity": 0.1},
            {"product_id": "HDW-004", "name": "Ultrabook A400", "price": 1199.99, "popularity": 0.15},
            {"product_id": "HDW-005", "name": "Workstation Pro 9000", "price": 2599.99, "popularity": 0.05},
            {"product_id": "HDW-006", "name": "Mini PC Cube", "price": 699.99, "popularity": 0.2}
        ],
        "software": [
            {"product_id": "SFT-001", "name": "Office Suite Pro", "price": 199.99, "popularity": 0.25},
            {"product_id": "SFT-002", "name": "Antivirus Shield", "price": 49.99, "popularity": 0.3},
            {"product_id": "SFT-003", "name": "Photo Editor Pro", "price": 79.99, "popularity": 0.15},
            {"product_id": "SFT-004", "name": "Project Manager Plus", "price": 299.99, "popularity": 0.1},
            {"product_id": "SFT-005", "name": "Video Editor Pro", "price": 149.99, "popularity": 0.1},
            {"product_id": "SFT-006", "name": "Music Studio 2024", "price": 89.99, "popularity": 0.1}
        ],
        "peripherals": [
            {"product_id": "PER-001", "name": "Wireless Mouse", "price": 29.99, "popularity": 0.4},
            {"product_id": "PER-002", "name": "Mechanical Keyboard", "price": 89.99, "popularity": 0.3},
            {"product_id": "PER-003", "name": "27\" 4K Monitor", "price": 399.99, "popularity": 0.1},
            {"product_id": "PER-004", "name": "USB-C Docking Station", "price": 129.99, "popularity": 0.05},
            {"product_id": "PER-005", "name": "Noise Cancelling Headphones", "price": 199.99, "popularity": 0.1},
            {"product_id": "PER-006", "name": "Webcam HD 1080p", "price": 49.99, "popularity": 0.05}
        ]
    }

    # Helper function to generate a timestamp
    def generate_timestamp():
        return custom_timestamp if custom_timestamp else datetime.now().isoformat()

    # Helper function to select a random product from a category based on popularity
    def select_random_product():
        category = random.choice(list(products.keys()))
        category_products = products[category]
        # Use weighted random choice based on popularity
        product = random.choices(category_products, weights=[p["popularity"] for p in category_products])[0]
        return product, category

    # Generating the base session details
    session = {
        "session_id": f"SID-{random.randint(1000, 9999)}",
        "user_id": f"UID-{random.randint(1000, 9999)}",
        "device_type": random.choices(
            ["mobile", "desktop", "tablet"], weights=[0.6, 0.3, 0.1]
        )[0],
        "geolocation": f"{random.uniform(-90, 90):.6f},{random.uniform(-180, 180):.6f}",
        "user_agent": random.choice([
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
            "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1",
            "Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Mobile Safari/537.36"
        ]),
        "events": []
    }

    # The first page view is always the home page
    session["events"].append({
        "event": {
            "event_type": "page_view",
            "timestamp": generate_timestamp(),
            "details": {
                "page_url": "https://example.com/home",
                "referrer_url": None  # No referrer for the first page view
            }
        }
    })

    # The second page view is always the products page
    session["events"].append({
        "event": {
            "event_type": "page_view",
            "timestamp": generate_timestamp(),
            "details": {
                "page_url": "https://example.com/products",
                "referrer_url": "https://example.com/home"
            }
        }
    })

    # Adding between 0 and 4 additional page_view events with low probability for about and contact pages
    num_additional_page_views = random.randint(0, 4)
    for _ in range(num_additional_page_views):
        page_url = random.choices(
            [
                "https://example.com/cart",
                "https://example.com/about",
                "https://example.com/contact"
            ],
            [0.9, 0.05, 0.05]  # 90% chance of cart, 5% each for about and contact
        )[0]

        page_view_event = {
            "event": {
                "event_type": "page_view",
                "timestamp": generate_timestamp(),
                "details": {
                    "page_url": page_url,
                    "referrer_url": random.choice([
                        "https://google.com",
                        "https://example.com/home",
                        "https://example.com/products"
                    ])
                }
            }
        }
        session["events"].append(page_view_event)

    # Determine whether to add add_item_to_cart events
    added_items = []
    if random.random() < 0.5:  # 50% chance to add items to the cart
        num_items_to_add = random.randint(1, 3)
        for _ in range(num_items_to_add):
            product, category = select_random_product()
            add_item_to_cart_event = {
                "event": {
                    "event_type": "add_item_to_cart",
                    "timestamp": generate_timestamp(),
                    "details": {
                        "product_id": product["product_id"],
                        "product_name": product["name"],
                        "category": category,
                        "price": product["price"],
                        "quantity": random.randint(1, 5)
                    }
                }
            }
            session["events"].append(add_item_to_cart_event)
            added_items.append(add_item_to_cart_event)

    # Determine whether to add a purchase event
    if added_items and random.random() < 0.5:  # Only add purchase if items were added to cart
        total_amount = sum(
            item["event"]["details"]["price"] * item["event"]["details"]["quantity"]
            for item in added_items
        )
        purchase_event = {
            "event": {
                "event_type": "purchase",
                "timestamp": generate_timestamp(),
                "details": {
                    "order_id": f"ORD-{random.randint(1000, 9999)}",
                    "amount": total_amount,
                    "currency": "USD",
                    "items": [
                        {
                            "product_id": item["event"]["details"]["product_id"],
                            "product_name": item["event"]["details"]["product_name"],
                            "category": item["event"]["details"]["category"],
                            "price": item["event"]["details"]["price"],
                            "quantity": item["event"]["details"]["quantity"]
                        }
                        for item in added_items
                    ]
                }
            }
        }
        session["events"].append(purchase_event)

    return session

# Example usage
visit = generate_visit("2024-08-12T14:30:00")

visit_json = json.dumps(visit, indent=4)
print(visit_json)

# Generate files with Sample Visits

The function below generates files with visits. There is a file for each day starting at the start date specified and continuing for the number of days specified. The number of visits per day is in the range specied. 

In [None]:
import random
from datetime import datetime, timedelta
import json

def generate_visits_for_days(start_date, num_days, visits_per_day_range, seed=None, time_increment_minutes=10):
    if seed is not None:
        random.seed(seed)
    
    current_date = start_date

    for day in range(num_days):
        # Start time for the day's visits (e.g., 9:00 AM)
        current_time = datetime.combine(current_date, datetime.min.time()) + timedelta(hours=9)

        # Randomly determine the number of visits for this day within the specified range
        num_visits_per_day = random.randint(visits_per_day_range[0], visits_per_day_range[1])

        # Generate a file name based on the current date
        file_name = f'visits-{current_date.strftime("%Y-%m-%d")}.jsonl'
        
        # Generate visits for the current day
        with open(file_name, 'w') as f:
            for _ in range(num_visits_per_day):
                custom_timestamp = current_time.isoformat()
                visit = generate_visit(custom_timestamp)
                visit_json = json.dumps(visit)
                f.write(visit_json + '\n')

                # Increment the time for the next visit
                current_time += timedelta(minutes=time_increment_minutes)

        # Print a message indicating the file creation
        print(f'Generated file: {file_name} with {num_visits_per_day} visits')
        
        # Move to the next day
        current_date += timedelta(days=1)

# Example usage:

START_DATE = datetime(2024, 7, 1)          # Starting date
NUM_DAYS = 60                              # Number of days
VISITS_PER_DAY_RANGE = (100, 199)             # Range for the number of visits per day (min, max)
SEED = 42                                  # Set a seed for reproducibility, can be set to None
TIME_INCREMENT_MINUTES = 10                # Minutes between each visit

generate_visits_for_days(START_DATE, NUM_DAYS, VISITS_PER_DAY_RANGE, seed=SEED, time_increment_minutes=TIME_INCREMENT_MINUTES)

print("Done")

# List the Generated Files

In [None]:
! ls