data_extraction_transformation/scripts/one_time_use_scripts/make_diagrams_per_compound.py [26:97]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    sig_id = dataf['signature_id'].unique()[0]
    machine_platform = dataf['machine_platform'].unique()[0]
    machine_platform_general = categorize_platform(machine_platform)
    framework_id = dataf['framework_id'].unique()[0]
    repository_name = dataf['repository_name'].unique()[0]
    test = dataf['test'].unique()[0]
    application = dataf['application'].unique()[0]
    suite = dataf['suite'].unique()[0]
    option_collection_hash = dataf['option_collection_hash'].unique()[0]
    TP = len(dataf[dataf['alert_summary_status_general'] == "TP"])
    FN = len(dataf[dataf['alert_summary_status_general'] == "FN"])
    FP = len(dataf[dataf['alert_summary_status_general'] == "FP"])
    SP = len(dataf[dataf['alert_summary_status_general'] == "SP"])

    sample_df = dataf.copy()
    sample_df['push_timestamp'] = pd.to_datetime(sample_df['push_timestamp'])
    sample_df.set_index('push_timestamp', inplace=True)
    
    # Sorting to maintain order
    sample_df.sort_index(inplace=True)

    # Define color mapping
    color_mapping = {
        'TP': 'green',
        'FP': 'red',
        'SP': 'grey',
        'TN': 'blue'
    }

    # Plotting
    plt.figure(figsize=(20, 10))
    
    # Plot all values as a scatter plot for better alignment
    plt.scatter(sample_df.index, sample_df['value'], 
                color=[color_mapping.get(status, 'black') for status in sample_df['alert_summary_status_general']], 
                alpha=0.6, label="Measurements")
    
    # Draw vertical lines for change points (TP, FP, SP)
    for status in ['TP', 'FP', 'SP']:
        change_points = sample_df[sample_df['alert_summary_status_general'] == status].index
        for cp in change_points:
            plt.axvline(x=cp, color=color_mapping[status], linestyle='--', alpha=0.6)

    plt.title('Time Series Plot')
    plt.xlabel('Date')
    plt.ylabel(f'Test measurement values associated with signature ID {sig_id}')
    plt.grid(axis='y')

    # Set limits
    plt.xlim(sample_df.index.min(), sample_df.index.max())
    plt.ylim(bottom=0, top=sample_df['value'].max() * 1.2)  # Slightly above max for visibility

    # X-axis formatting
    plt.xticks(pd.date_range(start=sample_df.index.min(), end=sample_df.index.max(), freq='W-MON'), 
               rotation=45)
    plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))

    # Add text box for metadata
    text_content = f"Framework ID: {framework_id}\n" \
                   f"Repository: {repository_name}\n" \
                   f"Platform (general): {machine_platform_general}\n" \
                   f"Platform: {machine_platform}\n" \
                   f"Test: {test}\n" \
                   f"Suite: {suite}\n" \
                   f"Application: {application}\n" \
                   f"TP count: {TP}\n" \
                   f"FP count: {FP}\n" \
                   f"FN count: {FN}\n" \
                   f"SP count: {SP}"

    plt.text(0.98, 0.98, text_content, ha='right', va='top', transform=plt.gca().transAxes, fontsize=12,
             family='monospace', bbox=dict(facecolor='white', alpha=0.7, edgecolor='black', boxstyle='round,pad=0.5'))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



data_extraction_transformation/scripts/one_time_use_scripts/make_diagrams_per_dtw_analysis.py [17:88]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    sig_id = dataf['signature_id'].unique()[0]
    machine_platform = dataf['machine_platform'].unique()[0]
    machine_platform_general = categorize_platform(machine_platform)
    framework_id = dataf['framework_id'].unique()[0]
    repository_name = dataf['repository_name'].unique()[0]
    test = dataf['test'].unique()[0]
    application = dataf['application'].unique()[0]
    suite = dataf['suite'].unique()[0]
    option_collection_hash = dataf['option_collection_hash'].unique()[0]
    TP = len(dataf[dataf['alert_summary_status_general'] == "TP"])
    FN = len(dataf[dataf['alert_summary_status_general'] == "FN"])
    FP = len(dataf[dataf['alert_summary_status_general'] == "FP"])
    SP = len(dataf[dataf['alert_summary_status_general'] == "SP"])

    sample_df = dataf.copy()
    sample_df['push_timestamp'] = pd.to_datetime(sample_df['push_timestamp'])
    sample_df.set_index('push_timestamp', inplace=True)
    
    # Sorting to maintain order
    sample_df.sort_index(inplace=True)

    # Define color mapping
    color_mapping = {
        'TP': 'green',
        'FP': 'red',
        'SP': 'grey',
        'TN': 'blue'
    }

    # Plotting
    plt.figure(figsize=(20, 10))
    
    # Plot all values as a scatter plot for better alignment
    plt.scatter(sample_df.index, sample_df['value'], 
                color=[color_mapping.get(status, 'black') for status in sample_df['alert_summary_status_general']], 
                alpha=0.6, label="Measurements")
    
    # Draw vertical lines for change points (TP, FP, SP)
    for status in ['TP', 'FP', 'SP']:
        change_points = sample_df[sample_df['alert_summary_status_general'] == status].index
        for cp in change_points:
            plt.axvline(x=cp, color=color_mapping[status], linestyle='--', alpha=0.6)

    plt.title('Time Series Plot')
    plt.xlabel('Date')
    plt.ylabel(f'Test measurement values associated with signature ID {sig_id}')
    plt.grid(axis='y')

    # Set limits
    plt.xlim(sample_df.index.min(), sample_df.index.max())
    plt.ylim(bottom=0, top=sample_df['value'].max() * 1.2)  # Slightly above max for visibility

    # X-axis formatting
    plt.xticks(pd.date_range(start=sample_df.index.min(), end=sample_df.index.max(), freq='W-MON'), 
               rotation=45)
    plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))

    # Add text box for metadata
    text_content = f"Framework ID: {framework_id}\n" \
                   f"Repository: {repository_name}\n" \
                   f"Platform (general): {machine_platform_general}\n" \
                   f"Platform: {machine_platform}\n" \
                   f"Test: {test}\n" \
                   f"Suite: {suite}\n" \
                   f"Application: {application}\n" \
                   f"TP count: {TP}\n" \
                   f"FP count: {FP}\n" \
                   f"FN count: {FN}\n" \
                   f"SP count: {SP}"

    plt.text(0.98, 0.98, text_content, ha='right', va='top', transform=plt.gca().transAxes, fontsize=12,
             family='monospace', bbox=dict(facecolor='white', alpha=0.7, edgecolor='black', boxstyle='round,pad=0.5'))
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



