From c58e0f9578601489184f742d6dc2630e5c969e0a Mon Sep 17 00:00:00 2001 From: Rishiram B <131481972+Rishiram20757@users.noreply.github.com> Date: Wed, 12 Feb 2025 18:39:51 +0530 Subject: [PATCH 1/3] Create synthetic weather-forecasting --- synthetic weather-forecasting | 1 + 1 file changed, 1 insertion(+) create mode 100644 synthetic weather-forecasting diff --git a/synthetic weather-forecasting b/synthetic weather-forecasting new file mode 100644 index 000000000000..8b137891791f --- /dev/null +++ b/synthetic weather-forecasting @@ -0,0 +1 @@ + From dca76fc0baab9c9db1bc6239f73eb40dfee4d066 Mon Sep 17 00:00:00 2001 From: Rishiram B <131481972+Rishiram20757@users.noreply.github.com> Date: Wed, 12 Feb 2025 18:56:39 +0530 Subject: [PATCH 2/3] Update synthetic weather-forecasting --- synthetic weather-forecasting | 125 ++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/synthetic weather-forecasting b/synthetic weather-forecasting index 8b137891791f..02c2b7ba48ff 100644 --- a/synthetic weather-forecasting +++ b/synthetic weather-forecasting @@ -1 +1,126 @@ +from data_structures.custom_queue import Queue + +import pandas as pd +import numpy as np + +# Load the original dataset +file_path = 'events.csv' +data = pd.read_csv(file_path) + +# Step 1: Convert 'Start time UTC' to datetime format +data['Start time UTC'] = pd.to_datetime(data['Start time UTC']) + +# Step 2: Shift the dates to match the range 2022-2024 +# Calculate the original date range +original_start_date = data['Start time UTC'].min() +new_start_date = pd.Timestamp('2022-01-01 00:00:00') + +# Calculate the offset +date_offset = new_start_date - original_start_date + +# Apply the offset to shift the date range +data['Start time UTC'] = data['Start time UTC'] + date_offset +data['End time UTC'] = pd.to_datetime(data['End time UTC']) + date_offset +data['Start time UTC+03:00'] = pd.to_datetime(data['Start time UTC+03:00']) + date_offset +data['End time UTC+03:00'] = pd.to_datetime(data['End time UTC+03:00']) + date_offset + +# Step 3: Rename the column to 'Electricity consumption in India' +data.rename(columns={'Electricity consumption in Finland': 'Electricity consumption in India'}, inplace=True) + +# Step 4: Filter the data for the years 2022 to 2024 +data = data[(data['Start time UTC'] >= '2022-01-01') & (data['Start time UTC'] < '2025-01-01')] + +# Step 5: Display the transformed dataset +print(data.head()) + +# Save the modified dataset (optional) +data.to_csv('energy_consumption_india_2022_2024.csv', index=False) + + +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score +from sklearn.linear_model import LinearRegression +from sklearn.tree import DecisionTreeRegressor +from sklearn.ensemble import RandomForestRegressor + +# Step 1: Load the preprocessed dataset +file_path = 'energy_consumption_india_2022_2024.csv' +data = pd.read_csv(file_path) + +# Convert 'Start time UTC' to datetime format if not already done +data['Start time UTC'] = pd.to_datetime(data['Start time UTC']) + +# Step 2: Feature Engineering +# Extract useful features from the 'Start time UTC' column +data['year'] = data['Start time UTC'].dt.year +data['month'] = data['Start time UTC'].dt.month +data['day'] = data['Start time UTC'].dt.day +data['hour'] = data['Start time UTC'].dt.hour +data['day_of_week'] = data['Start time UTC'].dt.dayofweek + +# Add lag features to capture past consumption patterns +data['lag_1'] = data['Electricity consumption in India'].shift(1) +data['lag_7'] = data['Electricity consumption in India'].shift(7) +data['lag_30'] = data['Electricity consumption in India'].shift(30) +data.dropna(inplace=True) # Remove rows with NaN values due to lagging + +# Step 3: Prepare the data for ML models +X = data.drop(['Electricity consumption in India', 'Start time UTC', 'End time UTC', + 'Start time UTC+03:00', 'End time UTC+03:00'], axis=1, errors='ignore') +y = data['Electricity consumption in India'] + +# Train-test split (80% training, 20% testing) +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + +# Step 4: Train and evaluate different models +models = { + 'Linear Regression': LinearRegression(), + 'Decision Tree': DecisionTreeRegressor(random_state=42), + 'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42) +} + +results = {} + +for model_name, model in models.items(): + # Train the model + model.fit(X_train, y_train) + + # Make predictions + y_pred = model.predict(X_test) + + # Evaluate the model + mae = mean_absolute_error(y_test, y_pred) + mse = mean_squared_error(y_test, y_pred) + r2 = r2_score(y_test, y_pred) + + # Store the results + results[model_name] = {'MAE': mae, 'MSE': mse, 'R2 Score': r2} + + # Print evaluation metrics + print(f"{model_name} Evaluation:") + print(f"MAE: {mae:.2f}") + print(f"MSE: {mse:.2f}") + print(f"R2 Score: {r2:.2f}") + print('-' * 30) + +# Step 5: Visualize Actual vs Predicted for the Best Model (Random Forest in this case) +best_model = models['Random Forest'] +y_pred_best = best_model.predict(X_test) + +plt.figure(figsize=(12, 6)) +plt.plot(y_test.values, label='Actual', color='blue', alpha=0.7) +plt.plot(y_pred_best, label='Random Forest Predicted', color='orange', alpha=0.7) +plt.title('Energy Consumption Forecasting: Actual vs Predicted') +plt.xlabel('Time Index') +plt.ylabel('Electricity Consumption in India') +plt.legend() +plt.grid(True) +plt.show() + +# Step 6: Save Model Results (Optional) +results_df = pd.DataFrame(results).T +results_df.to_csv('model_performance.csv', index=True) From 1785b85810de088ea9129f5224bd6b07050bca03 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 12 Feb 2025 13:27:04 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- synthetic weather-forecasting | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synthetic weather-forecasting b/synthetic weather-forecasting index 02c2b7ba48ff..09bf52802f29 100644 --- a/synthetic weather-forecasting +++ b/synthetic weather-forecasting @@ -87,18 +87,18 @@ results = {} for model_name, model in models.items(): # Train the model model.fit(X_train, y_train) - + # Make predictions y_pred = model.predict(X_test) - + # Evaluate the model mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) - + # Store the results results[model_name] = {'MAE': mae, 'MSE': mse, 'R2 Score': r2} - + # Print evaluation metrics print(f"{model_name} Evaluation:") print(f"MAE: {mae:.2f}")