in datasets/thelook_ecommerce/pipelines/_images/run_thelook_kub/fake.py [0:0]
def __post_init__(self):
address = get_address()
self.sequence_number = 0
self.user_id = None
self.created_at = created_at(datetime.datetime(2019, 1, 1))
self.session_id = str(uuid.uuid4())
self.ip_address = fake.ipv4()
self.city = address["city"]
self.state = address["state"]
self.postal_code = address["postal_code"]
self.browser = self.random_item(
population=["IE", "Chrome", "Safari", "Firefox", "Other"],
distribution=[0.05, 0.5, 0.2, 0.2, 0.05],
)
self.traffic_source = self.random_item(
population=["Email", "Adwords", "Organic", "YouTube", "Facebook"],
distribution=[0.45, 0.3, 0.05, 0.1, 0.1],
)
products = PRODUCT_GENDER_DICT[
self.random_item(population=["M", "F"], distribution=[0.5, 0.5])
]
product = self.random_item(products)
# different event type combinations
cancelled_browsing = ["product", "cart", "cancel"]
abandoned_cart = ["department", "product", "cart"]
viewed_product = ["product"]
viewed_department = ["department", "product"]
random_events = self.random_item(
population=[
cancelled_browsing,
abandoned_cart,
viewed_product,
viewed_department,
]
)
for event in random_events:
# set ghost events ID to max of original
event_id = len(events)
self.id = event_id + 1
event_id = self.id
self.event_type = event
self.uri = generate_uri(event, product)
self.sequence_number += 1
self.created_at = self.created_at + datetime.timedelta(
minutes=random.randrange(int(MINUTES_IN_HOUR * 0.5))
)
events.append(dataclasses.asdict(self))