def locate()

in cloud9/stream.py [0:0]


def locate(u_id):
    """
    Parses location info from user (account) object (NOT Tweet object)

    Parameters:
    u_id (str): User ID in string form.

    Returns:
    (unnamed list): List of strings of length 2.
    """
    # Authenticate
    twitter = Twython(stream_config.APP_KEY, stream_config.APP_SECRET, stream_config.OAUTH_TOKEN, stream_config.OAUTH_TOKEN_SECRET)
    ids = str(u_id) # Can be a comma-separated string list if we want to retrieve by batch; pls visit docs

    # Query twitter 
    output = twitter.lookup_user(user_id=ids)

    # Get raw location info from user object
    raw_geo = str(output[0]['location'])

    # Decipher location
    gc = geonamescache.GeonamesCache()
    states = gc.get_us_states()
    cities = gc.get_cities()
    us_cities = [city for city in cities.values() if city['countrycode'] == 'US']
    us_cities_names = [*gen_dict_extract(us_cities, 'name')]
    states_names = [*gen_dict_extract(states, 'name')]

    nlp = en_core_web_sm.load()
    doc = nlp(raw_geo)
    # Loop through and identify entities recognized and extracted from raw location info
    for ent in doc.ents:
      # print(ent.text, ent.start_char, ent.end_char, ent.label_)
      if ent.label_ is 'GPE':
            if ent.text in us_cities_names:
                return ['city', ent.text]
            elif ent.text in states_names:
                return ['state', ent.text]
            else:
                print("raw_geo: " + str(raw_geo))
                return ['other', raw_geo]
    else:
        return ['other', raw_geo]