def generate_segments()

in distill/segmentation/segment.py [0:0]


def generate_segments(target_dict, field_name, field_values, start_time_limit, end_time_limit, label=""):
    """
    Generates a list of Segment objects corresponding to windows of time defined by the given time limits,
    field name, and associated values meant to match the field name indicated.

    :param target_dict: A dictionary of User Ale logs assumed to be ordered by clientTime (Date/Time Objects or integers).
    :param field_name: A string indicating the field name meant to be matched by the field values.
    :param field_values: A list of field values to be matched in order to start a segment.
    :param start_time_limit: Amount of time (in seconds) prior to a detected event that should be included in the generated segment.
    :param end_time_limit: Amount of time (in seconds) to keep the segment window open after a detected event.
    :param label: An optional string argument that provides a prefix for the returned dictionary keys.
                
    :return: A Segments object containing newly created Segment objects.
    """

    # Iterate through the target dictionary using key list
    start_end_vals = []
    segment_names = []
    prev_end_time = None
    keys = list(target_dict.keys())
    index = 0
    for i in range(len(keys)):
        if field_name in target_dict[keys[i]]:
            # Matches value in field_values list with dict values (str or list)
            if any(item in target_dict[keys[i]][field_name] for item in field_values):
                # Matches values - Create segment
                orig_start_time = target_dict[keys[i]]['clientTime']
                if isinstance(orig_start_time, int):
                    start_time = orig_start_time - (start_time_limit * 1000)
                    end_time = orig_start_time + (end_time_limit*1000)
                elif isinstance(orig_start_time, datetime.datetime):
                    start_time = orig_start_time - datetime.timedelta(seconds=start_time_limit)
                    end_time = orig_start_time + datetime.timedelta(seconds=end_time_limit)
                else:
                    raise TypeError('clientTime field is not represented as an integer or datetime object')
                if prev_end_time is None or orig_start_time > prev_end_time:
                    if prev_end_time is not None and start_time < prev_end_time:
                        start_time = prev_end_time
                    start_end_tuple = (start_time, end_time)
                    start_end_vals.append(start_end_tuple)
                    segment_names.append(label + str(index))
                    prev_end_time = end_time
                    index += 1

    # Create segment dictionary with create_segment
    segments = create_segment(target_dict, segment_names, start_end_vals)
    for segment in segments:
        segment.segment_type = Segment_Type.GENERATE
        segment.generate_field_name = field_name
        segment.generate_matched_values = field_values

    return segments