in data_extraction_transformation/scripts/one_time_use_scripts/make_diagrams_per_dtw_analysis.py [0:0]
def process_sample(dataf, output_dir):
sig_id = dataf['signature_id'].unique()[0]
machine_platform = dataf['machine_platform'].unique()[0]
machine_platform_general = categorize_platform(machine_platform)
framework_id = dataf['framework_id'].unique()[0]
repository_name = dataf['repository_name'].unique()[0]
test = dataf['test'].unique()[0]
application = dataf['application'].unique()[0]
suite = dataf['suite'].unique()[0]
option_collection_hash = dataf['option_collection_hash'].unique()[0]
TP = len(dataf[dataf['alert_summary_status_general'] == "TP"])
FN = len(dataf[dataf['alert_summary_status_general'] == "FN"])
FP = len(dataf[dataf['alert_summary_status_general'] == "FP"])
SP = len(dataf[dataf['alert_summary_status_general'] == "SP"])
sample_df = dataf.copy()
sample_df['push_timestamp'] = pd.to_datetime(sample_df['push_timestamp'])
sample_df.set_index('push_timestamp', inplace=True)
# Sorting to maintain order
sample_df.sort_index(inplace=True)
# Define color mapping
color_mapping = {
'TP': 'green',
'FP': 'red',
'SP': 'grey',
'TN': 'blue'
}
# Plotting
plt.figure(figsize=(20, 10))
# Plot all values as a scatter plot for better alignment
plt.scatter(sample_df.index, sample_df['value'],
color=[color_mapping.get(status, 'black') for status in sample_df['alert_summary_status_general']],
alpha=0.6, label="Measurements")
# Draw vertical lines for change points (TP, FP, SP)
for status in ['TP', 'FP', 'SP']:
change_points = sample_df[sample_df['alert_summary_status_general'] == status].index
for cp in change_points:
plt.axvline(x=cp, color=color_mapping[status], linestyle='--', alpha=0.6)
plt.title('Time Series Plot')
plt.xlabel('Date')
plt.ylabel(f'Test measurement values associated with signature ID {sig_id}')
plt.grid(axis='y')
# Set limits
plt.xlim(sample_df.index.min(), sample_df.index.max())
plt.ylim(bottom=0, top=sample_df['value'].max() * 1.2) # Slightly above max for visibility
# X-axis formatting
plt.xticks(pd.date_range(start=sample_df.index.min(), end=sample_df.index.max(), freq='W-MON'),
rotation=45)
plt.gca().xaxis.set_major_formatter(plt.matplotlib.dates.DateFormatter('%Y-%m-%d'))
# Add text box for metadata
text_content = f"Framework ID: {framework_id}\n" \
f"Repository: {repository_name}\n" \
f"Platform (general): {machine_platform_general}\n" \
f"Platform: {machine_platform}\n" \
f"Test: {test}\n" \
f"Suite: {suite}\n" \
f"Application: {application}\n" \
f"TP count: {TP}\n" \
f"FP count: {FP}\n" \
f"FN count: {FN}\n" \
f"SP count: {SP}"
plt.text(0.98, 0.98, text_content, ha='right', va='top', transform=plt.gca().transAxes, fontsize=12,
family='monospace', bbox=dict(facecolor='white', alpha=0.7, edgecolor='black', boxstyle='round,pad=0.5'))
os.makedirs(output_dir, exist_ok=True)
plt.savefig(f"{output_dir}/signature_{sig_id}_plot.png", bbox_inches='tight')