Added baseline with perfect predictions

2024-03-28 14:56:28 +01:00
parent 65ec8fcd54
commit ef094c659c
3 changed files with 119 additions and 3 deletions
--- a/src/policies/PolicyEvaluator.py
+++ b/src/policies/PolicyEvaluator.py
@@ -92,12 +92,16 @@ class PolicyEvaluator:
        test_loader,
        initial_penalty,
        target_charge_cycles,
-        learning_rate=2,
+        initial_learning_rate=2,
        max_iterations=10,
        tolerance=10,
+        learning_rate_decay=0.9,  # Factor to reduce the learning rate after each iteration
    ):
        self.cache = {}
        penalty = initial_penalty
+        learning_rate = initial_learning_rate
+        previous_gradient = None  # Track the previous gradient to adjust learning rate based on progress
+
        for iteration in range(max_iterations):
            # Calculate profit and charge cycles for the current penalty
            simulated_profit, simulated_charge_cycles = (
@@ -105,19 +109,29 @@ class PolicyEvaluator:
            )

            print(
-                f"Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}"
+                f"Iteration {iteration}: Penalty: {penalty}, Charge Cycles: {simulated_charge_cycles}, Profit: {simulated_profit}, Learning Rate: {learning_rate}"
            )

            # Calculate the gradient (difference) between the simulated and target charge cycles
            gradient = simulated_charge_cycles - target_charge_cycles

+            # Optionally, adjust learning rate based on the change of gradient direction to avoid oscillation
+            if previous_gradient is not None and gradient * previous_gradient < 0:
+                learning_rate *= learning_rate_decay
+
            # Update the penalty parameter in the direction of the gradient
-            penalty += learning_rate * gradient
+            penalty += (
+                learning_rate * gradient
+            )  # Note: Using -= to move penalty in the opposite direction of gradient if necessary
+
+            # Update the previous gradient
+            previous_gradient = gradient

            # Check if the charge cycles are close enough to the target
            if abs(gradient) < tolerance:
                print(f"Optimal penalty found after {iteration+1} iterations")
                break
+
        else:
            print(
                f"Reached max iterations ({max_iterations}) without converging to the target charge cycles"