in utils/fake_data_generation/generate_data_california.py [0:0]
def generate_data(config, num_rows, **options):
fields = config.get("fields", [])
data_list = []
for i in range(num_rows):
data_list.append(generate_data_row(fields, **options))
# generate fake data
cities={1:['Acampo',95220, 'San Joaquin'],2:['Bard',92222,'Riverside'],3:['Calexico',92231,'Imperial'],4:['California City',93505,'Kern'],5:['Dana Point',92629,'Orange'],6:['Esparto',95627,'Yolo'],7:['Finley',95435,'Lake'],8:['Galt',95632,'Sacramento'],9:['Heber',92249,'Imperial'],10:['Inverness',94937,'Marin']}
races={1:['hispanic or latino','white','spanish'],2:['hispanic or latino','white','mexican'],3:['none','white','english'],4:['none','african-american','english'],5:['none','asian','english'],6:['none','asian','hindi'],7:['none','asian','mandarin']}
for i in data_list:
i['claimant_id']= randint(1, 100)
vl=randint(1,10)
vl1=randint(1,7)
i['city']= cities[vl][0]
i['zip_code']=cities[vl][1]
i['county']=cities[vl][2]
i['ethnicity']=races[vl1][0]
i['race']=races[vl1][1]
i['language']=races[vl1][2]
i['state']= "California"
i['document_type']='Unemployment Form'
i['alien_registration_number']=randint(1000000,10000000)
i['pan_number']=random.choice(string.ascii_letters) + random.choice(string.ascii_letters) + random.choice(string.ascii_letters) + str(randint(1000000000,10000000000))
i['MI']=i['middle_name'][0]
# rate per month
rate=[30,32,34,35]
# hours worked in 4 weeks
hour= [160,180]
i['rate']=random.choice(rate)
i['hour']=random.choice(hour)
i['hour_week']=i['hour']/4
i['rate_week']=i['rate']/4
i['cost_week']=i['rate_week']*i['hour_week']
i['current_total']= i['rate'] * i['hour']
i['current_deduct']=round(random.uniform(200,300),2)
# cost post deduction
i['net_total']=i['current_total']- i['current_deduct']
i['ytd_gross']=round(random.uniform(19200,25200),2)
i['ytd_deduct']=round(random.uniform(1000,1200),2)
i['ytd_net']=i['ytd_gross']-i['ytd_deduct']
i['paid']='monthly'
day=[5,7,10]
month=[1,2,3]
month1=[15,12,10]
i['today_date']=date.today()
i['eff_date']=datetime.datetime.strptime(str(i['today_date']), "%Y-%m-%d").date()+relativedelta(days=random.choice(day))
# employment end date
i['work_end_date']=datetime.datetime.strptime(str(i['today_date']), "%Y-%m-%d").date()-relativedelta(months=random.choice(month))
# employment start date
i['work_start_date']=datetime.datetime.strptime(str(i['work_end_date']), "%Y-%m-%d").date()-relativedelta(months=random.choice(month1))
difference = i['work_end_date'] - i['work_start_date']
i['difference_in_years'] = round((difference.days + difference.seconds/86400)/365.2425,2)
i['num_months'] = (i['work_end_date'].year - i['work_start_date'].year) * 12 + (i['work_end_date'].month - i['work_start_date'].month)
i['last_day_of_prev_month'] =datetime.datetime.strptime(str(i['work_end_date']), "%Y-%m-%d").date().replace(day=1) - relativedelta(days=1)
i['start_day_of_prev_month'] = datetime.datetime.strptime(str(i['work_end_date']), "%Y-%m-%d").date().replace(day=1)- timedelta(days=i['last_day_of_prev_month'].day)
i['payment_date'] =datetime.datetime.strptime(str(i['last_day_of_prev_month']), "%Y-%m-%d").date() + relativedelta(days=1)
last_day_of_prev_month = date.today().replace(day=1) - timedelta(days=1)
i['statement_date'] = date.today().replace(day=1) - timedelta(days=last_day_of_prev_month.day)
i['due_date']=datetime.datetime.strptime(str(i['statement_date']), "%Y-%m-%d").date()+relativedelta(days=20)
ch=random.choice(string.ascii_letters)
dl=str(randint(1000000,100000000))
i['driver_license']=ch.upper() + '-' + dl
gender=['F','M']
i['gender']=random.choice(gender)
start_date = datetime.date(2015, 1, 1)
end_date = datetime.date(2021, 1, 1)
time_between_dates = end_date - start_date
days_between_dates = time_between_dates.days
random_number_of_days = random.randrange(days_between_dates)
i['license_iss_date']= start_date + datetime.timedelta(days=random_number_of_days)
i['license_end_date']=i['license_iss_date'].replace(i['license_iss_date'].year + 5)
dcl=['A','B','M','C','D']
i['class']=random.choice(dcl)
i['rest']='NONE'
i['wt']=str(randint(99,220))+'lb'
ht=round(random.uniform(4.5,6.5),2)
i['ht']=str(ht)+'ft'
i['dd']=''.join(random.choice('0123456789ABCDEF') for i in range(16))
i['end']='NONE'
colour=['BRW','BLK','BLUE','GRN']
c1=['BRW','BLK']
i['eyes']=random.choice(colour)
i['hair']=random.choice(c1)
number= randint(1000000000,10000000000)
i['account_no']=str(number)+ '-' + str(randint(1,10))
i['employee_id']=randint(10000,100000)
i['check_no']=randint(10000,100000)
print(data_list)
return data_list