Skip to content
Snippets Groups Projects
Commit 87e92925 authored by Andri Joos's avatar Andri Joos :blush:
Browse files

add covariance matrix calculation

parent 82b588ed
No related branches found
No related tags found
No related merge requests found
......@@ -19,5 +19,12 @@
"--no-parallelization",
],
},
{
"name": "Python Debugger: [test] covariance matrices",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/app/preprocessing/covariance_matrices.py",
"console": "integratedTerminal"
},
]
}
import pandas as pd
import numpy as np
import os
from collections import defaultdict
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
# Define the path to your parquet files directory
parquet_dir = 'dataset/preprocessing/transformed_data/full_dataset_transformation/removed_unimportant_predictors'
# Dictionary to store covariance matrices per class
covariances = defaultdict(list)
parquet_files = [f for f in os.listdir(parquet_dir) if f.endswith('.parquet')] # TODO: use utils
# Process each parquet file individually
for file in parquet_files:
# Load the current parquet file
file_path = os.path.join(parquet_dir, file)
print(f'Collecting covariance from {file}')
df = pd.read_parquet(file_path)
# Compute covariance for each class (Maneuver) in the current file
for maneuver_class in df['Maneuver'].unique():
class_data: pd.DataFrame = df[df['Maneuver'] == maneuver_class].drop(columns=['Maneuver'])
# Calculate and store the covariance matrix for the class
# cov_matrix = np.cov(class_data, rowvar=False)
cov_matrix = class_data.cov().abs()
covariances[maneuver_class].append(cov_matrix)
# Now, average the covariances for each class across all files
avg_covariances = {}
for maneuver_class, cov_matrices in covariances.items():
# Stack matrices along a new axis and compute mean along that axis
avg_covariances[maneuver_class] = np.mean(cov_matrices, axis=0)
# Display results
for maneuver_class, cov_matrix in avg_covariances.items():
print(f"Average Covariance Matrix for class {maneuver_class}:\n{cov_matrix}\n")
for maneuver_class, cov_matrix in avg_covariances.items():
plt.figure(figsize=(10, 8))
sns.heatmap(cov_matrix, annot=False, fmt=".2f", cmap="viridis", cbar=True)
plt.title(f"Average Covariance Matrix for Class '{maneuver_class}'")
plt.xlabel("Features")
plt.ylabel("Features")
plt.savefig(f'out/{maneuver_class}.png')
......@@ -7,6 +7,9 @@ dependencies = [
"pyarrow >= 18.0.0, < 19.0.0",
"joblib >= 1.4.2, < 2.0.0",
"psutil >= 6.1.0, < 7.0.0",
"scikit-learn >= 1.5.2, < 2.0.0",
"matplotlib >= 3.9.2, < 4.0.0",
"seaborn >= 0.13.2, < 1.0.0",
]
maintainers = [
{ name = "Andri Joos" },
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment