Added baseline with perfect predictions
This commit is contained in:
@@ -92,12 +92,16 @@ class PolicyEvaluator:
|
|||||||
test_loader,
|
test_loader,
|
||||||
initial_penalty,
|
initial_penalty,
|
||||||
target_charge_cycles,
|
target_charge_cycles,
|
||||||
learning_rate=2,
|
initial_learning_rate=2,
|
||||||
max_iterations=10,
|
max_iterations=10,
|
||||||
tolerance=10,
|
tolerance=10,
|
||||||
|
learning_rate_decay=0.9, # Factor to reduce the learning rate after each iteration
|
||||||
):
|
):
|
||||||
self.cache = {}
|
self.cache = {}
|
||||||
penalty = initial_penalty
|
penalty = initial_penalty
|
||||||
|
learning_rate = initial_learning_rate
|
||||||
|
previous_gradient = None # Track the previous gradient to adjust learning rate based on progress
|
||||||
|
|
||||||
for iteration in range(max_iterations):
|
for iteration in range(max_iterations):
|
||||||
# Calculate profit and charge cycles for the current penalty
|
# Calculate profit and charge cycles for the current penalty
|
||||||
simulated_profit, simulated_charge_cycles = (
|
simulated_profit, simulated_charge_cycles = (
|
||||||
@@ -105,19 +109,29 @@ class PolicyEvaluator:
|
|||||||
)
|
)
|
||||||
|
|
||||||
print(
|
print(
|
||||||
f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
|
f"Iteration {iteration}: Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}, Learning Rate: {learning_rate}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate the gradient (difference) between the simulated and target charge cycles
|
# Calculate the gradient (difference) between the simulated and target charge cycles
|
||||||
gradient = simulated_charge_cycles - target_charge_cycles
|
gradient = simulated_charge_cycles - target_charge_cycles
|
||||||
|
|
||||||
|
# Optionally, adjust learning rate based on the change of gradient direction to avoid oscillation
|
||||||
|
if previous_gradient is not None and gradient * previous_gradient < 0:
|
||||||
|
learning_rate *= learning_rate_decay
|
||||||
|
|
||||||
# Update the penalty parameter in the direction of the gradient
|
# Update the penalty parameter in the direction of the gradient
|
||||||
penalty += learning_rate * gradient
|
penalty += (
|
||||||
|
learning_rate * gradient
|
||||||
|
) # Note: Using -= to move penalty in the opposite direction of gradient if necessary
|
||||||
|
|
||||||
|
# Update the previous gradient
|
||||||
|
previous_gradient = gradient
|
||||||
|
|
||||||
# Check if the charge cycles are close enough to the target
|
# Check if the charge cycles are close enough to the target
|
||||||
if abs(gradient) < tolerance:
|
if abs(gradient) < tolerance:
|
||||||
print(f"Optimal penalty found after {iteration+1} iterations")
|
print(f"Optimal penalty found after {iteration+1} iterations")
|
||||||
break
|
break
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"
|
f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"
|
||||||
|
|||||||
48
src/policies/baselines/PerfectBaseline.py
Normal file
48
src/policies/baselines/PerfectBaseline.py
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
from clearml import Task
|
||||||
|
from policies.simple_baseline import BaselinePolicy
|
||||||
|
from src.policies.baselines.YesterdayBaselinePolicyExecutor import (
|
||||||
|
YesterdayBaselinePolicyEvaluator,
|
||||||
|
)
|
||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class PerfectBaseline(YesterdayBaselinePolicyEvaluator):
|
||||||
|
def __init__(self, baseline_policy: BaselinePolicy, task: Task = None):
|
||||||
|
super().__init__(baseline_policy, task)
|
||||||
|
|
||||||
|
def evaluate_for_date(
|
||||||
|
self,
|
||||||
|
date,
|
||||||
|
charge_thresholds=np.arange(-100, 250, 25),
|
||||||
|
discharge_thresholds=np.arange(-100, 250, 25),
|
||||||
|
penalty: int = 0,
|
||||||
|
current_state_of_charge=0.0,
|
||||||
|
):
|
||||||
|
|
||||||
|
real_imbalance_prices = self.get_imbanlance_prices_for_date(date.date())
|
||||||
|
|
||||||
|
best_charge_thresholds, best_discharge_thresholds = (
|
||||||
|
self.baseline_policy.get_optimal_thresholds(
|
||||||
|
real_imbalance_prices,
|
||||||
|
charge_thresholds,
|
||||||
|
discharge_thresholds,
|
||||||
|
penalty,
|
||||||
|
battery_state_of_charge=current_state_of_charge,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
best_profit, best_charge_cycles, new_state_of_charge = (
|
||||||
|
self.baseline_policy.simulate(
|
||||||
|
torch.tensor([[real_imbalance_prices]]),
|
||||||
|
torch.tensor([best_charge_thresholds.mean(axis=0)]),
|
||||||
|
torch.tensor([best_discharge_thresholds.mean(axis=0)]),
|
||||||
|
battery_state_of_charge=torch.tensor([current_state_of_charge]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return (
|
||||||
|
best_profit[0][0].item(),
|
||||||
|
best_charge_cycles[0][0].item(),
|
||||||
|
new_state_of_charge.squeeze(0).item(),
|
||||||
|
)
|
||||||
54
src/policies/baselines/perfect_baseline.py
Normal file
54
src/policies/baselines/perfect_baseline.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
from src.utils.clearml import ClearMLHelper
|
||||||
|
|
||||||
|
#### ClearML ####
|
||||||
|
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
|
||||||
|
task = clearml_helper.get_task(task_name="Perfect Baseline")
|
||||||
|
task.execute_remotely(queue_name="default", exit_process=True)
|
||||||
|
|
||||||
|
from src.policies.simple_baseline import BaselinePolicy, Battery
|
||||||
|
from src.data import DataProcessor, DataConfig
|
||||||
|
from policies.baselines.PerfectBaseline import PerfectBaseline
|
||||||
|
|
||||||
|
### Data Processor ###
|
||||||
|
data_config = DataConfig()
|
||||||
|
data_config.NRV_HISTORY = True
|
||||||
|
data_config.LOAD_HISTORY = True
|
||||||
|
data_config.LOAD_FORECAST = True
|
||||||
|
|
||||||
|
data_config.WIND_FORECAST = True
|
||||||
|
data_config.WIND_HISTORY = True
|
||||||
|
|
||||||
|
data_config.QUARTER = False
|
||||||
|
data_config.DAY_OF_WEEK = False
|
||||||
|
|
||||||
|
data_config.NOMINAL_NET_POSITION = True
|
||||||
|
|
||||||
|
data_processor = DataProcessor(data_config, path="", lstm=False)
|
||||||
|
data_processor.set_batch_size(64)
|
||||||
|
data_processor.set_full_day_skip(True)
|
||||||
|
|
||||||
|
### Policy Evaluator ###
|
||||||
|
battery = Battery(2, 1)
|
||||||
|
baseline_policy = BaselinePolicy(battery, data_path="")
|
||||||
|
policy_evaluator = PerfectBaseline(baseline_policy, task)
|
||||||
|
|
||||||
|
penalty, profit, charge_cycles = (
|
||||||
|
policy_evaluator.optimize_penalty_for_target_charge_cycles(
|
||||||
|
data_processor=data_processor,
|
||||||
|
initial_penalty=0,
|
||||||
|
target_charge_cycles=283,
|
||||||
|
learning_rate=2,
|
||||||
|
max_iterations=100,
|
||||||
|
tolerance=1,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# policy_evaluator.plot_profits_table()
|
||||||
|
print()
|
||||||
|
print("Test Set Results")
|
||||||
|
print(f"Penalty: {penalty}, Profit: {profit}, Charge Cycles: {charge_cycles}")
|
||||||
|
|
||||||
|
task.get_logger().report_single_value(name="Optimal Penalty", value=penalty)
|
||||||
|
task.get_logger().report_single_value(name="Optimal Profit", value=profit)
|
||||||
|
task.get_logger().report_single_value(name="Optimal Charge Cycles", value=charge_cycles)
|
||||||
|
|
||||||
|
task.close()
|
||||||
Reference in New Issue
Block a user