def get_model_evaluations_infos()

in apps/cloudwatch-dashboard/lambdas/get-predictions/handler.py [0:0]


def get_model_evaluations_infos(model_name, width, height, tag):
    model_response = client.describe_model(ModelName=model_name)
    predictions = json.loads(model_response['ModelMetrics'])['predicted_ranges']
    start_date = pd.to_datetime(model_response['EvaluationDataStartTime']).tz_localize(None)
    end_date = pd.to_datetime(model_response['EvaluationDataEndTime']).tz_localize(None)

    df = pd.DataFrame(predictions)
    predictions_df = convert_ranges(df, start_date, end_date)
    events_df = df.copy()
    events_df['duration'] = pd.to_datetime(events_df['end']) - pd.to_datetime(events_df['start'])
    events_df['duration'] = events_df['duration'].dt.total_seconds() / 3600    
    
    component_name = json.loads(model_response['Schema'])['Components'][0]['ComponentName']
    dataset_name = model_response['DatasetName']
    dataset_response = client.describe_dataset(DatasetName=dataset_name)
    bucket = dataset_response['IngestionInputConfiguration']['S3InputConfiguration']['Bucket']
    prefix = dataset_response['IngestionInputConfiguration']['S3InputConfiguration']['Prefix'] #+ component_name + '/'
    df_list = []
    s3 = boto3.client('s3')
    for file_key in get_matching_s3_keys(bucket=bucket, prefix=prefix, suffix=('.csv', '.CSV')):
        try:
            print(f'Looking for {tag} data in file {file_key}...')
            csvfile = s3.get_object(Bucket=bucket, Key=file_key)
            df = pd.read_csv(csvfile['Body'], usecols=['Timestamp', tag])
            df['Timestamp'] = pd.to_datetime(df['Timestamp'])
            df = df.set_index('Timestamp')
            df = df[start_date:end_date]
            df_list.append(df)
            
        except Exception as e:
            print(f'Tag {tag} not found in file {file_key}')
            
    timeseries_df = pd.concat(df_list, axis='index')
    
    # Prepare the figure:
    colors = set_aws_stylesheet()
    fig = plt.figure(figsize=(width*1.25/dpi, height/dpi), dpi=dpi)
    gs = gridspec.GridSpec(nrows=4, ncols=1, height_ratios=[8, 1.5, 5, 5], hspace=0.5)
    
    # First section: a line plot of the selected time series:
    ax1 = fig.add_subplot(gs[0])
    plt.plot(timeseries_df)
    ax1.set_title(f'Tag: {tag}')
    
    # Second section: the events detected by Lookout for Equipment:
    ax2 = fig.add_subplot(gs[1])
    plot_ranges(predictions_df, 'Detected events', colors[5], ax2)
    ax2.set_xlim(ax1.get_xlim())
    
    # Third section: the number of detected events per day:
    ax3 = fig.add_subplot(gs[2])
    ax3.plot(predictions_df.rolling(60*24).sum())
    ax3.set_xlim(ax1.get_xlim())
    ax3.axes.get_xaxis().set_ticks([])
    ax3.set_xlabel('Number of daily event detected', fontsize=12)

    # # Fourth section: their averate duration:
    ax4 = fig.add_subplot(gs[3])
    ax4.bar(pd.to_datetime(events_df['start']), events_df['duration'], color=colors[9], width=2.0, alpha=0.5)
    ax4.set_xlim(ax1.get_xlim())
    ax4.axes.get_xaxis().set_ticks([])
    ax4.set_xlabel('Average duration of detected events', fontsize=12)

    # Save this image to an SVG string:
    svg_io = StringIO()
    fig.savefig(svg_io, format="svg", bbox_inches='tight')

    return svg_io.getvalue().replace('DejaVu Sans', 'Amazon Ember')