Fixed crps + more inputs

This commit is contained in:
Victor Mylle
2023-12-05 00:08:17 +00:00
parent 120b6aa5bd
commit d3bf04d68c
13 changed files with 128426 additions and 70 deletions

View File

@@ -29,6 +29,7 @@ class NrvDataset(Dataset):
range(len(dataframe) - self.sequence_length - self.predict_sequence_length)
)
self.valid_indices = sorted(list(total_indices - set(self.samples_to_skip)))
print(len(self.valid_indices))
self.history_features = []
if self.data_config.LOAD_HISTORY:
@@ -36,7 +37,9 @@ class NrvDataset(Dataset):
if self.data_config.PV_HISTORY:
self.history_features.append("pv_gen_forecast")
if self.data_config.WIND_HISTORY:
self.history_features.append("wind_gen_forecast")
self.history_features.append("wind_history")
if self.data_config.NOMINAL_NET_POSITION:
self.history_features.append("nominal_net_position")
self.forecast_features = []
if self.data_config.LOAD_FORECAST:
@@ -44,7 +47,9 @@ class NrvDataset(Dataset):
if self.data_config.PV_FORECAST:
self.forecast_features.append("pv_gen_forecast")
if self.data_config.WIND_FORECAST:
self.forecast_features.append("wind_gen_forecast")
self.forecast_features.append("wind_forecast")
if self.data_config.NOMINAL_NET_POSITION:
self.forecast_features.append("nominal_net_position")
# add time feature to dataframe
time_feature = np.array([0] * len(dataframe))
@@ -73,7 +78,7 @@ class NrvDataset(Dataset):
skip_indices = [
list(
range(
idx - self.sequence_length - self.predict_sequence_length, idx + 1
idx - self.sequence_length - 96, idx + 1
)
)
for idx in nan_indices

View File

@@ -11,6 +11,7 @@ history_data_path = "data/history-quarter-hour-data.csv"
forecast_data_path = "data/load_forecast.csv"
pv_forecast_data_path = "data/pv_gen_forecast.csv"
wind_forecast_data_path = "data/wind_gen_forecast.csv"
nominal_net_position_data_path = "data/nominal_net_position.csv"
class DataConfig:
@@ -29,6 +30,9 @@ class DataConfig:
self.WIND_FORECAST: bool = False
self.WIND_HISTORY: bool = False
### NET POSITION ###
self.NOMINAL_NET_POSITION: bool = False
### TIME ###
self.YEAR: bool = False
self.DAY_OF_WEEK: bool = False
@@ -51,19 +55,24 @@ class DataProcessor:
self.history_features = self.get_nrv_history()
self.future_features = self.get_load_forecast()
self.pv_forecast = self.get_pv_forecast()
# self.pv_forecast = self.get_pv_forecast()
self.wind_forecast = self.get_wind_forecast()
self.all_features = self.history_features.merge(
self.future_features, on="datetime", how="left"
)
self.all_features = self.all_features.merge(
self.pv_forecast, on="datetime", how="left"
)
# self.all_features = self.all_features.merge(
# self.pv_forecast, on="datetime", how="left"
# )
self.all_features = self.all_features.merge(
self.wind_forecast, on="datetime", how="left"
)
self.all_features = self.all_features.merge(
self.get_nominal_net_position(), on="datetime", how="left"
)
self.all_features["quarter"] = (
self.all_features["datetime"].dt.hour * 4
+ self.all_features["datetime"].dt.minute / 15
@@ -77,6 +86,8 @@ class DataProcessor:
self.nrv_scaler = MinMaxScaler(feature_range=(-1, 1))
self.load_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
self.wind_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
self.nominal_net_position_scaler = MinMaxScaler(feature_range=(-1, 1))
self.full_day_skip = False
@@ -154,9 +165,9 @@ class DataProcessor:
df = pd.read_csv(self.path + wind_forecast_data_path, delimiter=";")
df = df.rename(
columns={"dayaheadforecast": "wind_forecast", "datetime": "datetime"}
columns={"measured": "wind_history", "dayaheadforecast": "wind_forecast", "datetime": "datetime"}
)
df = df[["datetime", "wind_forecast"]]
df = df[["datetime", "wind_forecast", "wind_history"]]
# remove nan rows
df = df[~df["wind_forecast"].isnull()]
@@ -166,6 +177,29 @@ class DataProcessor:
df.sort_values(by="datetime", inplace=True)
return df
def get_nominal_net_position(self):
df = pd.read_csv(self.path + nominal_net_position_data_path, delimiter=";")
# remove Resulotion column
df = df.drop(columns=["Resolution code"])
# rename columns
df = df.rename(
columns={
"Datetime": "datetime",
"Implicit net position": "nominal_net_position",
}
)
# to pandas datetime
df["datetime"] = pd.to_datetime(df["datetime"], utc=True)
# make sure all rows are quarter-hourly, if some are not, copy the previous value
df = df.set_index("datetime").resample("15min").ffill().reset_index()
return df
def set_batch_size(self, batch_size: int):
self.batch_size = batch_size
@@ -199,6 +233,15 @@ class DataProcessor:
train_df["total_load"] = self.load_forecast_scaler.transform(
train_df["total_load"].values.reshape(-1, 1)
).reshape(-1)
train_df["wind_forecast"] = self.wind_forecast_scaler.fit_transform(
train_df["wind_forecast"].values.reshape(-1, 1)
).reshape(-1)
train_df["wind_history"] = self.wind_forecast_scaler.transform(
train_df["wind_history"].values.reshape(-1, 1)
).reshape(-1)
train_df["nominal_net_position"] = self.nominal_net_position_scaler.fit_transform(
train_df["nominal_net_position"].values.reshape(-1, 1)
).reshape(-1)
train_dataset = NrvDataset(
train_df,
@@ -230,6 +273,16 @@ class DataProcessor:
test_df["total_load"] = self.load_forecast_scaler.transform(
test_df["total_load"].values.reshape(-1, 1)
).reshape(-1)
test_df["wind_forecast"] = self.wind_forecast_scaler.transform(
test_df["wind_forecast"].values.reshape(-1, 1)
).reshape(-1)
test_df["wind_history"] = self.wind_forecast_scaler.transform(
test_df["wind_history"].values.reshape(-1, 1)
).reshape(-1)
test_df["nominal_net_position"] = self.nominal_net_position_scaler.transform(
test_df["nominal_net_position"].values.reshape(-1, 1)
).reshape(-1)
test_dataset = NrvDataset(
test_df,