in cloud9/stream.py [0:0]
def locate(u_id):
"""
Parses location info from user (account) object (NOT Tweet object)
Parameters:
u_id (str): User ID in string form.
Returns:
(unnamed list): List of strings of length 2.
"""
# Authenticate
twitter = Twython(stream_config.APP_KEY, stream_config.APP_SECRET, stream_config.OAUTH_TOKEN, stream_config.OAUTH_TOKEN_SECRET)
ids = str(u_id) # Can be a comma-separated string list if we want to retrieve by batch; pls visit docs
# Query twitter
output = twitter.lookup_user(user_id=ids)
# Get raw location info from user object
raw_geo = str(output[0]['location'])
# Decipher location
gc = geonamescache.GeonamesCache()
states = gc.get_us_states()
cities = gc.get_cities()
us_cities = [city for city in cities.values() if city['countrycode'] == 'US']
us_cities_names = [*gen_dict_extract(us_cities, 'name')]
states_names = [*gen_dict_extract(states, 'name')]
nlp = en_core_web_sm.load()
doc = nlp(raw_geo)
# Loop through and identify entities recognized and extracted from raw location info
for ent in doc.ents:
# print(ent.text, ent.start_char, ent.end_char, ent.label_)
if ent.label_ is 'GPE':
if ent.text in us_cities_names:
return ['city', ent.text]
elif ent.text in states_names:
return ['state', ent.text]
else:
print("raw_geo: " + str(raw_geo))
return ['other', raw_geo]
else:
return ['other', raw_geo]