def create_and_populate_customers()

in genai-for-marketing/infra/aux_data/data_gen.py [0:0]


def create_and_populate_customers(num_customers: int = 50000) -> List[Dict]:
    from aux_data.customers_aux_data import channel, locations
    
    customers_location = rng.choice(locations, size=(num_customers))
    customers_channel = rng.choice(channel, size=(num_customers))
    customers_total_purchases = rng.integers(1, 100, size=(num_customers))
    customers_total_value = rng.integers(10, 1000, size=(num_customers))
    customers_total_emails = rng.integers(1, 100, size=(num_customers))
    customers_loyalty_score = rng.integers(1, 100, size=(num_customers))
    customers_is_media_follower = rng.choice([False, True], size=(num_customers))

    baseline_datetime = datetime(2023, 4, 1)
    customers_last_sign_up_date = rng.integers(500, 1000, size=(num_customers))
    customers_last_purchase_date = rng.integers(20, 100, size=(num_customers))
    customers_last_activity_date = customers_last_purchase_date - rng.integers(10, 20, size=(num_customers))

    customers_cart_total = rng.uniform(0.0, 800.0, size=(num_customers))

    customers_data = []

    for i in range(num_customers):
        customer = {}
        customer['customer_id'] = int(i)
        customer['email'] = f'user{i}@sample_user{i}.sample'
        customer['city'] = customers_location[i]['city']
        customer['state'] = customers_location[i]['state']
        customer['channel'] = customers_channel[i]['channel']
        customer['total_purchases'] = int(customers_total_purchases[i])
        customer['total_value'] = int(customers_total_value[i])
        customer['total_emails'] = int(customers_total_emails[i])
        customer['loyalty_score'] = int(customers_loyalty_score[i])
        customer['is_media_follower'] = bool(customers_is_media_follower[i])

        customer['last_sign_up_date'] = baseline_datetime - timedelta(days=int(customers_last_sign_up_date[i]))
        customer['last_sign_up_date'] = customer['last_sign_up_date'].strftime('%Y-%m-%d')

        customer['last_purchase_date'] = baseline_datetime - timedelta(days=int(customers_last_purchase_date[i]))
        customer['last_purchase_date'] = customer['last_purchase_date'].strftime('%Y-%m-%d')

        customer['last_activity_date'] = baseline_datetime - timedelta(days=int(customers_last_activity_date[i]))
        customer['last_activity_date'] = customer['last_activity_date'].strftime('%Y-%m-%d')

        customer['cart_total'] = round(float(customers_cart_total[i]), 2)

        customers_data.append(customer)

    return customers_data