Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 116 additions & 84 deletions plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,99 +9,131 @@
python3 patatrack-scipts/plot.py scan/reduced_hlt_{ecal,hcal,pixel}_w7900.csv --title Labels --labels ECAL HCAL Pixel -o OUTPUT
"""

import re
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import sys
import os
import argparse

# Create the parser
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter)

# Optional arguments
parser.add_argument('-t', '--title', type=str, default="",
help="Title of the legend")
parser.add_argument('-o', '--output', type=str, default="throughput_vs_threads",
help="Base filename for output files (PNG/PDF)")
parser.add_argument('-x', '--x-axis', default='CPU threads per job',
help='Horizontal axis label.')
parser.add_argument('--labels', nargs='+', default=None,
help="Labels to show in the legend instead of the CSV file names")

# Positional arguments (CSV files)
parser.add_argument('files', nargs='+', help="CSV files to process")

# Parse arguments
args = parser.parse_args()

# Access the values
title = args.title
filename = args.output
files = args.files
labels = args.labels
if labels is not None:
assert len(files) == len(labels), "The number of labels must match the number of input CSV files. Each label corresponds to one file, following the order they are provided."

# Dictionary to store per-file datasets
datasets = {}

for file in files:
# Read CSV and clean column names
df = pd.read_csv(file)
df.columns = df.columns.str.strip()
def make_unique_label(new_label, label_list):
"""
Given a label `new_label` and a given list of labels `label_list`, create a new label which is unique.
If `new_label` is present in `label_list`, then an index is added to the original label.
The index is iteratively incremented, until the new label differs from any label present in `label_list`.
If `new_label` is not present in `label_list`, then this function does nothing.
"""
if new_label not in label_list:
return new_label

pattern = re.compile(rf"^{re.escape(new_label)}(\d+)?$")
existing_indices = []

for label in label_list:
match = pattern.match(label)
if match:
# Extract the index if it exists
suffix = match.group(1)
if suffix:
index = int(suffix)
existing_indices.append(index)

# Find the next available index (starting from 2)
next_index = 2
while next_index in existing_indices:
next_index += 1

return f"{new_label}{next_index}"

# Keep only relevant columns (ignore "jobs")
df = df[["CPU threads per job", "average throughput (ev/s)"]]
def plot(title, filename, files, labels):
# Dictionary to store per-file datasets
datasets = {}

for file in files:
# Read CSV and clean column names
df = pd.read_csv(file)
df.columns = df.columns.str.strip()

# Keep only relevant columns (ignore "jobs")
df = df[["CPU threads per job", "average throughput (ev/s)"]]

# Group by CPU threads per job and compute mean & std
grouped = (
df.groupby("CPU threads per job")["average throughput (ev/s)"]
.agg(['mean', 'std'])
.reset_index()
.sort_values("CPU threads per job")
)

# Create a nicer label: remove extension and replace underscores
label = os.path.basename(file) if labels is None else labels[files.index(file)]
if label.endswith(".csv"):
label = label[:-4]
label = make_unique_label(label.replace("_", " "), datasets.keys())
datasets[label] = grouped

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))
for label, df in datasets.items():
#df["std"] = df["std"].fillna(0) # In case some groups have a single entry
color = ax.plot(df["CPU threads per job"], df["mean"], '--', linewidth=1.5)[0].get_color()
ax.errorbar(df["CPU threads per job"], df["mean"], yerr=df["std"],
label=label, marker='o', markersize=8, capsize=5, capthick=2,
ls='none', color=color)

ax.set_xlabel(args.x_axis)
ax.set_ylabel("Average throughput (ev/s)")
ax.set_title("Average throughput vs number of CPU threads per job")
if title:
ax.legend(title=title, title_fontsize='13', fontsize='11')
else:
ax.legend()

transparency = dict(alpha=0.7)
ax.grid(True, axis='x', **transparency)
ax.xaxis.set_major_locator(MultipleLocator(4))
ax.grid(True, which='major', axis='both', **transparency)
ax.set_ylim(bottom=0)

# Make the axes (plot area) white
ax.set_facecolor('white')

# Make only the figure background (outside axes) transparent
fig.patch.set_facecolor('none') # fully transparent
fig.patch.set_alpha(0)

fig.tight_layout()

# Save as PNG and PDF with transparent canvas background
fig.savefig(f"{filename}.png", dpi=600)
fig.savefig(f"{filename}.pdf")

if __name__ == '__main__':
# Create the parser
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawTextHelpFormatter)

# Optional arguments
parser.add_argument('-t', '--title', type=str, default="",
help="Title of the legend")
parser.add_argument('-o', '--output', type=str, default="throughput_vs_threads",
help="Base filename for output files (PNG/PDF)")
parser.add_argument('-x', '--x-axis', default='CPU threads per job',
help='Horizontal axis label.')
parser.add_argument('--labels', nargs='+', default=None,
help="Labels to show in the legend instead of the CSV file names")

# Group by CPU threads per job and compute mean & std
grouped = (
df.groupby("CPU threads per job")["average throughput (ev/s)"]
.agg(['mean', 'std'])
.reset_index()
.sort_values("CPU threads per job")
)
# Positional arguments (CSV files)
parser.add_argument('files', nargs='+', help="CSV files to process")

# Create a nicer label: remove extension and replace underscores
label = os.path.basename(file) if labels is None else labels[files.index(file)]
if label.endswith(".csv"):
label = label[:-4]
label = label.replace("_", " ")

datasets[label] = grouped

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))
for label, df in datasets.items():
#df["std"] = df["std"].fillna(0) # In case some groups have a single entry
color = ax.plot(df["CPU threads per job"], df["mean"], '--', linewidth=1.5)[0].get_color()
ax.errorbar(df["CPU threads per job"], df["mean"], yerr=df["std"],
label=label, marker='o', markersize=8, capsize=5, capthick=2,
ls='none', color=color)

ax.set_xlabel(args.x_axis)
ax.set_ylabel("Average throughput (ev/s)")
ax.set_title("Average throughput vs number of CPU threads per job")
if title:
ax.legend(title=title, title_fontsize='13', fontsize='11')
else:
ax.legend()

transparency = dict(alpha=0.7)
ax.grid(True, axis='x', **transparency)
ax.xaxis.set_major_locator(MultipleLocator(4))
ax.grid(True, which='major', axis='both', **transparency)
ax.set_ylim(bottom=0)

# Make the axes (plot area) white
ax.set_facecolor('white')

# Make only the figure background (outside axes) transparent
fig.patch.set_facecolor('none') # fully transparent
fig.patch.set_alpha(0)
# Parse arguments
args = parser.parse_args()

fig.tight_layout()
# Access the values
files = args.files
labels = args.labels
if labels is not None:
assert len(files) == len(labels), "The number of labels must match the number of input CSV files. Each label corresponds to one file, following the order they are provided."
assert len(labels) == len(set(labels)), "Labels should be unique."

# Save as PNG and PDF with transparent canvas background
fig.savefig(f"{filename}.png", dpi=600)
fig.savefig(f"{filename}.pdf")
plot(args.title, args.output, files, labels)