Made more changes

This commit is contained in:
2024-04-16 22:07:53 +02:00
parent 937b6abc0b
commit 0edcc91e65
12 changed files with 214 additions and 36 deletions

View File

@@ -171,7 +171,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.undefined.undefined"
}
},
"nbformat": 4,

View File

@@ -44,8 +44,8 @@ class PolicyEvaluator:
date,
idx_samples,
test_loader,
charge_thresholds=np.arange(-1500, 1500, 50),
discharge_thresholds=np.arange(-1500, 1500, 50),
charge_thresholds=np.arange(-1000, 1000, 5),
discharge_thresholds=np.arange(-1000, 1000, 5),
penalty: int = 0,
state_of_charge: float = 0.0,
):
@@ -96,6 +96,7 @@ class PolicyEvaluator:
max_iterations=10,
tolerance=10,
learning_rate_decay=0.9, # Factor to reduce the learning rate after each iteration
iteration=0,
):
self.cache = {}
penalty = initial_penalty
@@ -139,7 +140,7 @@ class PolicyEvaluator:
# Re-calculate profit and charge cycles for the final penalty to return accurate results
profit, charge_cycles = self.evaluate_test_set_for_penalty(
idx_samples, test_loader, penalty
idx_samples, test_loader, penalty, log_metrics=True, iteration=iteration
)
return penalty, profit, charge_cycles
@@ -232,25 +233,45 @@ class PolicyEvaluator:
],
)
def evaluate_test_set_for_penalty(self, idx_samples, test_loader, penalty):
def evaluate_test_set_for_penalty(
self, idx_samples, test_loader, penalty, log_metrics=False, iteration: int = 0
):
total_profit = 0
total_charge_cycles = 0
state_of_charge = 0.0
loggings = []
for date in tqdm(self.dates):
try:
profit, charge_cycles, _, _, new_state_of_charge = (
self.evaluate_for_date(
date,
idx_samples,
test_loader,
penalty=penalty,
state_of_charge=state_of_charge,
)
(
profit,
charge_cycles,
charge_thresholds,
discharge_thresholds,
new_state_of_charge,
) = self.evaluate_for_date(
date,
idx_samples,
test_loader,
penalty=penalty,
state_of_charge=state_of_charge,
)
state_of_charge = new_state_of_charge
total_profit += profit
total_charge_cycles += charge_cycles
new_info = {
"Date": date,
"Profit": profit,
"Charge Cycles": charge_cycles,
"State of Charge": state_of_charge,
"Charge Threshold": charge_thresholds,
"Discharge Threshold": discharge_thresholds,
}
loggings.append(new_info)
except KeyboardInterrupt:
print("Interrupted")
raise KeyboardInterrupt
@@ -259,6 +280,59 @@ class PolicyEvaluator:
print(e)
pass
if log_metrics:
log_df = pd.DataFrame(loggings)
fig = px.line(
log_df,
x="Date",
y="Profit",
title="Profit over time",
labels={"Profit": "Profit (€)", "Date": "Date"},
)
self.task.get_logger().report_plotly(
"Profit", "Profit", iteration=iteration, figure=fig
)
fig = px.line(
log_df,
x="Date",
y="Charge Cycles",
title="Charge Cycles over time",
labels={"Charge Cycles": "Charge Cycles", "Date": "Date"},
)
self.task.get_logger().report_plotly(
"Charge Cycles", "Charge Cycles", iteration=iteration, figure=fig
)
fig = px.line(
log_df,
x="Date",
y="State of Charge",
title="State of Charge over time",
labels={"State of Charge": "State of Charge", "Date": "Date"},
)
self.task.get_logger().report_plotly(
"State of Charge", "State of Charge", iteration=iteration, figure=fig
)
fig = px.line(
log_df,
x="Date",
y=["Charge Threshold", "Discharge Threshold"],
title="Charge and Discharge Thresholds per Day",
)
self.task.get_logger().report_plotly(
"Thresholds per Day",
"Thresholds per Day",
iteration=iteration,
figure=fig,
)
return total_profit, total_charge_cycles
def plot_profits_table(self):

View File

@@ -1,5 +1,5 @@
from clearml import Task
from policies.simple_baseline import BaselinePolicy
from src.policies.simple_baseline import BaselinePolicy
from src.policies.baselines.YesterdayBaselinePolicyExecutor import (
YesterdayBaselinePolicyEvaluator,
)
@@ -14,17 +14,21 @@ class PerfectBaseline(YesterdayBaselinePolicyEvaluator):
def evaluate_for_date(
self,
date,
charge_thresholds=np.arange(-100, 250, 25),
discharge_thresholds=np.arange(-100, 250, 25),
charge_thresholds=np.arange(-300, 300, 5),
discharge_thresholds=np.arange(-300, 300, 5),
penalty: int = 0,
current_state_of_charge=0.0,
):
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
real_imbalance_prices_tensor = torch.tensor(
np.array([real_imbalance_prices]), device="cpu"
)
best_charge_thresholds, best_discharge_thresholds = (
self.baseline_policy.get_optimal_thresholds(
real_imbalance_prices,
real_imbalance_prices_tensor,
charge_thresholds,
discharge_thresholds,
penalty,
@@ -45,4 +49,6 @@ class PerfectBaseline(YesterdayBaselinePolicyEvaluator):
best_profit[0][0].item(),
best_charge_cycles[0][0].item(),
new_state_of_charge.squeeze(0).item(),
best_charge_thresholds.mean(axis=0),
best_discharge_thresholds.mean(axis=0),
)

View File

@@ -6,6 +6,7 @@ import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import plotly.express as px
class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
@@ -15,8 +16,8 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
def evaluate_for_date(
self,
date,
charge_thresholds=np.arange(-100, 250, 25),
discharge_thresholds=np.arange(-100, 250, 25),
charge_thresholds=np.arange(-500, 500, 5),
discharge_thresholds=np.arange(-500, 500, 5),
penalty: int = 0,
current_state_of_charge=0.0,
):
@@ -52,9 +53,13 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
yesterday_profit[0][0].item(),
yesterday_charge_cycles[0][0].item(),
new_state_of_charge.squeeze(0).item(),
yesterday_charge_thresholds.mean(axis=0),
yesterday_discharge_thresholds.mean(axis=0),
)
def evaluate_test_set_for_penalty(self, data_processor, penalty: int = 0):
def evaluate_test_set_for_penalty(
self, data_processor, penalty: int = 0, log_metrics=False
):
if data_processor:
filtered_dates = []
@@ -71,20 +76,89 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
charge_cycles = 0
state_of_charge = 0.0
loggings = []
for date in tqdm(self.dates):
try:
new_profit, new_charge_cycles, new_state_of_charge = (
self.evaluate_for_date(
date, penalty=penalty, current_state_of_charge=state_of_charge
)
(
new_profit,
new_charge_cycles,
new_state_of_charge,
charge_threshold,
discharge_threshold,
) = self.evaluate_for_date(
date, penalty=penalty, current_state_of_charge=state_of_charge
)
profit += new_profit
charge_cycles += new_charge_cycles
state_of_charge = new_state_of_charge
new_info = {
"Date": date,
"Profit": profit,
"Charge Cycles": charge_cycles,
"State of Charge": state_of_charge,
"Charge Threshold": charge_threshold.item(),
"Discharge Threshold": discharge_threshold.item(),
}
loggings.append(new_info)
except Exception as e:
print(e)
pass
if log_metrics:
log_df = pd.DataFrame(loggings)
fig = px.line(
log_df,
x="Date",
y="Profit",
title="Profit over time",
labels={"Profit": "Profit (€)", "Date": "Date"},
)
self.task.get_logger().report_plotly(
"Profit", "Profit", iteration=0, figure=fig
)
fig = px.line(
log_df,
x="Date",
y="Charge Cycles",
title="Charge Cycles over time",
labels={"Charge Cycles": "Charge Cycles", "Date": "Date"},
)
self.task.get_logger().report_plotly(
"Charge Cycles", "Charge Cycles", iteration=0, figure=fig
)
fig = px.line(
log_df,
x="Date",
y="State of Charge",
title="State of Charge over time",
labels={"State of Charge": "State of Charge", "Date": "Date"},
)
self.task.get_logger().report_plotly(
"State of Charge", "State of Charge", iteration=0, figure=fig
)
fig = px.line(
log_df,
x="Date",
y=["Charge Threshold", "Discharge Threshold"],
title="Charge and Discharge Thresholds per Day",
)
self.task.get_logger().report_plotly(
"Thresholds per Day", "Thresholds per Day", iteration=0, figure=fig
)
return profit, charge_cycles
def optimize_penalty_for_target_charge_cycles(
@@ -108,6 +182,21 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
)
self.task.get_logger().report_scalar(
"Penalty", "Penalty", penalty, iteration=iteration
)
self.task.get_logger().report_scalar(
"Charge Cycles",
"Charge Cycles",
simulated_charge_cycles,
iteration=iteration,
)
self.task.get_logger().report_scalar(
"Profit", "Profit", simulated_profit, iteration=iteration
)
# Calculate the gradient (difference) between the simulated and target charge cycles
gradient = simulated_charge_cycles - target_charge_cycles
@@ -125,7 +214,7 @@ class YesterdayBaselinePolicyEvaluator(PolicyEvaluator):
# Re-calculate profit and charge cycles for the final penalty to return accurate results
profit, charge_cycles = self.evaluate_test_set_for_penalty(
data_processor, penalty
data_processor, penalty, log_metrics=True
)
return penalty, profit, charge_cycles

View File

@@ -55,8 +55,15 @@ charge_discharge_threshold, total_profit, total_charge_cycles = (
policy_evaluator.determine_best_thresholds_test_set(data_processor)
)
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles)
print("Thresholds determined on test set")
print(f"Best Charge Discharge Threshold: {charge_discharge_threshold}")
print(f"Total Profit: {total_profit}")
print(f"Total Charge Cycles: {total_charge_cycles}")
task.get_logger().report_single_value(name="Optimal Profit", value=total_profit)
task.get_logger().report_single_value(
name="Optimal Charge Cycles", value=total_charge_cycles
)
task.get_logger().report_single_value(
name="Optimal Charge Threshold", value=charge_discharge_threshold[0]
)

View File

@@ -7,7 +7,7 @@ task.execute_remotely(queue_name="default", exit_process=True)
from src.policies.simple_baseline import BaselinePolicy, Battery
from src.data import DataProcessor, DataConfig
from policies.baselines.PerfectBaseline import PerfectBaseline
from src.policies.baselines.PerfectBaseline import PerfectBaseline
### Data Processor ###
data_config = DataConfig()

View File

@@ -208,7 +208,7 @@ class DiffusionTrainer:
running_loss /= len(train_loader.dataset)
if epoch % 150 == 0 and epoch != 0:
if epoch % 75 == 0 and epoch != 0:
crps, _ = self.test(test_loader, epoch, task)
if best_crps is None or crps < best_crps:
@@ -217,7 +217,7 @@ class DiffusionTrainer:
else:
early_stopping += 1
if early_stopping > 15:
if early_stopping > 5:
break
if task:
@@ -249,7 +249,7 @@ class DiffusionTrainer:
test_loader=test_loader,
initial_penalty=900,
target_charge_cycles=283,
learning_rate=1,
initial_learning_rate=1,
max_iterations=50,
tolerance=1,
)
@@ -438,9 +438,10 @@ class DiffusionTrainer:
test_loader=test_loader,
initial_penalty=self.prev_optimal_penalty,
target_charge_cycles=283,
learning_rate=1,
initial_learning_rate=1,
max_iterations=50,
tolerance=1,
iteration=epoch,
)
)

View File

@@ -192,9 +192,10 @@ class AutoRegressiveQuantileTrainer(AutoRegressiveTrainer):
test_loader=dataloader,
initial_penalty=900,
target_charge_cycles=283,
learning_rate=2,
initial_learning_rate=5,
max_iterations=100,
tolerance=1,
iteration=epoch,
)
)
@@ -823,7 +824,7 @@ class NonAutoRegressiveQuantileRegression(Trainer):
test_loader=dataloader,
initial_penalty=500,
target_charge_cycles=283,
learning_rate=2,
initial_learning_rate=2,
max_iterations=100,
tolerance=1,
)

View File

@@ -113,7 +113,7 @@ trainer = AutoRegressiveQuantileTrainer(
data_processor,
quantiles,
"cuda",
policy_evaluator=policy_evaluator,
policy_evaluator=None,
debug=False,
)