in utils/fake_data_generation/generate_data_arizona.py [0:0]
def generate_data(config, num_rows, **options):
fields = config.get("fields", [])
data_list = []
for i in range(num_rows):
data_list.append(generate_data_row(fields, **options))
# generate fake data
cities={1:['Phoenix',85001,'Maricopa County'],2:['Apache Junction',85119,'Pinal'],3:['Winkelman',85192,'Gila'],4:['Ajo',85321,'Pima'],5:['Black Canyon City',85324,'Yavapai'],6:['Parker',85344,'La Paz'],7:['Somerton',85350,'Yuma'],8:['Sun City',85351,'Maricopa'],9:['Miami',85501,'Gila'],10:['Duncan',85534,'Greenlee']}
races={1:['hispanic or latino','white','spanish'],2:['hispanic or latino','white','mexican'],3:['none','white','english'],4:['none','african-american','english'],5:['none','asian','english'],6:['none','asian','hindi'],7:['none','asian','mandarin']}
for i in data_list:
action=['applied on line','mailed application','sent resume','interview was taken','followed up with HR']
i['action1']=random.choice(action)
i['action2']=random.choice(action)
i['action3']=random.choice(action)
i['action4']=random.choice(action)
date5=['2022-02-07','2022-02-08','2022-02-09','2022-02-10','2022-02-11']
i['date1']=random.choice(date5)
i['date2']=random.choice(date5)
i['date3']=random.choice(date5)
i['date4']=random.choice(date5)
sat_date=['2022-02-12','2022-02-19','2022-02-26']
i['sat_date']=random.choice(sat_date)
contact=['in_person','mail','internet']
i['contact1']=random.choice(contact)
i['contact2']=random.choice(contact)
i['contact3']=random.choice(contact)
i['contact4']=random.choice(contact)
i['claimant_id']= randint(1, 100)
vl=randint(1,10)
vl1=randint(1,7)
i['city']= cities[vl][0]
i['zip_code']=cities[vl][1]
i['county']=cities[vl][2]
i['ethnicity']=races[vl1][0]
i['race']=races[vl1][1]
i['language']=races[vl1][2]
i['state']= "Arizona"
i['document_type']='Unemployment Form'
i['alien_registration_number']=randint(1000000,10000000000)
i['MI']=i['middle_name'][0]
# rate per month
rate=[30,32,34,35]
# hours worked in 4 weeks
hour= [160,180]
i['rate']=random.choice(rate)
i['hour']=random.choice(hour)
i['current_total']= i['rate'] * i['hour']
i['current_deduct']=round(random.uniform(200,300),2)
# cost post deduction
i['net_total']=i['current_total']- i['current_deduct']
i['ytd_gross']=round(random.uniform(19200,25200),2)
i['ytd_deduct']=round(random.uniform(1000,1200),2)
i['ytd_net']=i['ytd_gross']-i['ytd_deduct']
day=[5,7,10]
month=[1,2,3]
month1=[15,12,10]
i['today_date']=date.today()
i['eff_date']=datetime.datetime.strptime(str(i['today_date']), "%Y-%m-%d").date()+relativedelta(days=random.choice(day))
# employment end date
i['work_end_date']=datetime.datetime.strptime(str(i['today_date']), "%Y-%m-%d").date()-relativedelta(months=random.choice(month))
# employment start date
i['work_start_date']=datetime.datetime.strptime(str(i['work_end_date']), "%Y-%m-%d").date()-relativedelta(months=random.choice(month1))
i['last_day_of_prev_month'] =datetime.datetime.strptime(str(i['work_end_date']), "%Y-%m-%d").date().replace(day=1) - relativedelta(days=1)
i['start_day_of_prev_month'] = datetime.datetime.strptime(str(i['work_end_date']), "%Y-%m-%d").date().replace(day=1)- timedelta(days=i['last_day_of_prev_month'].day)
i['payment_date'] =datetime.datetime.strptime(str(i['last_day_of_prev_month']), "%Y-%m-%d").date() + relativedelta(days=1)
last_day_of_prev_month = date.today().replace(day=1) - timedelta(days=1)
i['statement_date'] = date.today().replace(day=1) - timedelta(days=last_day_of_prev_month.day)
i['due_date']=datetime.datetime.strptime(str(i['statement_date']), "%Y-%m-%d").date()+relativedelta(days=20)
ch=random.choice(string.ascii_letters)
dl=str(randint(1000000,100000000))
i['driver_license']=ch.upper() + '-' + dl
gender=['F','M']
i['gender']=random.choice(gender)
start_date = datetime.date(2015, 1, 1)
end_date = datetime.date(2021, 1, 1)
time_between_dates = end_date - start_date
days_between_dates = time_between_dates.days
random_number_of_days = random.randrange(days_between_dates)
i['license_iss_date']= start_date + datetime.timedelta(days=random_number_of_days)
i['license_end_date']=i['license_iss_date'].replace(i['license_iss_date'].year + 5)
dcl=['A','B','M','C','D']
i['class']=random.choice(dcl)
i['rest']='NONE'
i['wt']=str(randint(99,220))+'lb'
ht=round(random.uniform(4.5,6.5),2)
i['ht']=str(ht)+'ft'
i['dd']=''.join(random.choice('0123456789ABCDEF') for i in range(16))
i['end']='NONE'
colour=['BRW','BLK','BLUE','GRN']
c1=['BRW','BLK']
i['eyes']=random.choice(colour)
i['hair']=random.choice(c1)
number= randint(1000000000,10000000000)
i['account_no']=str(number)+ '-' + str(randint(1,10))
i['employee_id']=randint(10000,100000)
i['check_no']=randint(10000,100000)
print(data_list)
return data_list