Trying to get S4 running (no success)

2024-01-18 22:53:29 +00:00
28 changed files with 2235 additions and 4423 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +0,0 @@
-*.csv filter=lfs diff=lfs merge=lfs -text
--- a/4
+++ b/4
@@ -1,5 +1,7 @@
-FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime

+FROM pytorch/pytorch:1.13.1-cuda11.6-cudnn8-devel
+#FROM getkeops/keops-full:2.1-geomloss0.2.5-cuda11.8-pytorch2.0.0-python3.10
+# FROM pytorch/pytorch:2.1.0-cuda11.8-cudnn8-devel
 RUN apt-get update
 RUN apt-get install -y git

--- a/Reports/February/MP2324_verslag2_Mylle_Victor.pdf
+++ b/Reports/February/MP2324_verslag2_Mylle_Victor.pdf
--- a/Reports/February/ea-en.pdf
+++ b/Reports/February/ea-en.pdf
--- a/Reports/February/ea-nl.pdf
+++ b/Reports/February/ea-nl.pdf
--- a/Reports/February/eb-en.pdf
+++ b/Reports/February/eb-en.pdf
--- a/Reports/February/ugent-doc.cls
+++ b/Reports/February/ugent-doc.cls
@@ -1,185 +0,0 @@
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-%
-%    Ghent University document class 
-%    Created by DF Benoit, December 15, 2022
-%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-\NeedsTeXFormat{LaTeX2e}
-\ProvidesClass{ugent-doc}
-
-% Required packages
-\RequirePackage{kvoptions}
-\RequirePackage{geometry}
-\RequirePackage{calc} 
-\RequirePackage{graphicx}
-\RequirePackage{xcolor}
-
-% ugent-doc specific options (kvoptions)
-\SetupKeyvalOptions{family=ugd,prefix=ugd@} %UGentArticle
-
-% Declare the class specific options
-\DeclareStringOption[eb]{faculty}[eb]
-\DeclareStringOption[en]{language}[en]
-\DeclareStringOption[article]{doctype}[article]
-\DeclareBoolOption[true]{sftitles} % Default: true
-\ProcessKeyvalOptions*
-
-% Pass options not specified above to the parent class
-% \@unusedoptionlist is a macro in kvoptions
-\LoadClass[\@unusedoptionlist]{\ugd@doctype}
-
-% All sections, subsections and subsubsections in sans serif
-\ifugd@sftitles
-    \RequirePackage[sf]{titlesec}
-\fi
-
-% Define UGent colors
-%= = = = = = = = = = = 
-% Base colors
-% UGent blue 
-\definecolor{ugentblue}{RGB}{30,100,200}
-% UGent yellow 
-\definecolor{ugentyellow}{RGB}{255,210,0}
-% UGent white 
-\definecolor{ugentwhite}{RGB}{255,255,255}
-% UGent black 
-\definecolor{ugentblack}{RGB}{0,0,0}
-
-% Faculty specific colors
-% Faculty of Literature & Philosophy
-\definecolor{ugent-lw}{RGB}{241,164,43}
-% Faculty of Law
-\definecolor{ugent-re}{RGB}{220,78,40}
-% Faculty of Science
-\definecolor{ugent-we}{RGB}{45,140,168}
-% Faculty of Medicine and Health Sciences
-\definecolor{ugent-ge}{RGB}{232,94,113}
-% Faculty of Engineering and Architecture
-\definecolor{ugent-ea}{RGB}{139,190,232}
-% Faculty of Economics and Business Administration
-\definecolor{ugent-eb}{RGB}{174,176,80}
-% Faculty of Veterinary Medicine
-\definecolor{ugent-di}{RGB}{130,84,145}
-% Faculty of Psychology and Educational Sciences
-\definecolor{ugent-pp}{RGB}{251,126,58}
-% Faculty of Bioscience Engineering
-\definecolor{ugent-bw}{RGB}{39,171,173}
-% Faculty of Pharmaceutical Sciences
-\definecolor{ugent-fw}{RGB}{190,81,144}
-% Faculty of Political and Social Sciences
-\definecolor{ugent-ps}{RGB}{113,168,96}
-
-% Define new commands
-\def\thetitle#1{\def\@thetitle{#1}}
-\def\thesubtitle#1{\def\@thesubtitle{#1}}
-\def\infoboxa#1{\def\@infoboxa{#1}}
-\def\infoboxb#1{\def\@infoboxb{#1}}
-\def\infoboxc#1{\def\@infoboxc{#1}}
-\def\infoboxd#1{\def\@infoboxd{#1}}
-
-% Initialize new commands as 'empty'
-\def\@thetitle{}
-\def\@thesubtitle{}
-\def\@infoboxa{}
-\def\@infoboxb{}
-\def\@infoboxc{}
-\def\@infoboxd{}
-
-% Define lengths based on UGent document grid
-% See: https://styleguide.ugent.be/basic-principles/grid-and-layout.html
-\newlength{\longedge}
-\setlength{\longedge}{\maxof{\paperheight}{\paperwidth}}
-\newlength{\gridunit}
-\setlength{\gridunit}{\longedge/28} %Divide long edge by 7 and next by 4
-\newlength{\subpaperheight}
-\setlength{\subpaperheight}{\paperheight-7\gridunit} %Type area: 3 units for faculty logo, 4 units for UGent logo
-\newlength{\subpaperwidth}
-\setlength{\subpaperwidth}{\paperwidth-\gridunit} %Left margin of 1 gridunit
-
-% Define strut based on \gridunit
-\newcommand{\mystrut}[1][-.5]{\rule[#1\gridunit]{0pt}{0pt}}
-
-% Set default page layout
-% Can be overwritten in preamble of document
-\renewcommand{\baselinestretch}{1.15} % line spacing
-\geometry{bottom=2.5cm,top=2.5cm,left=3cm,right=2cm} % margins
-
-% Redefine the titlepage in accordance with UGent styleguide
-\renewcommand\maketitle{\begin{titlepage}%
-    \thispagestyle{empty} % by default, the pagestyle of title page is plain
-    \newgeometry{top=0cm, bottom=0cm, left=0cm, right=0cm} % set special margins
-    \setlength{\parindent}{0cm} % necessary to put minipages/boxes at extreme left of page  
-    \setlength{\parsep}{0cm} % necessary to stack minipages/boxes without space
-    \setlength{\fboxsep}{0cm} % no border around minipages/boxes
-    \setlength{\parskip}{0cm}
-    \setlength{\lineskip}{0cm}
-
-    \ifugd@sftitles
-        \sffamily % Titlepage in sans serif font
-    \fi
-
-    \includegraphics[height=3\gridunit]{\ugd@faculty-\ugd@language.pdf}%
-
-    \makebox[\gridunit]{}% Left margin of 1 gridunit
-    \colorbox{ugent-\ugd@faculty!30}{%
-    %\colorbox{ugentwhite}{%
-    \begin{minipage}[c][\subpaperheight][t]{\subpaperwidth}%
-    	\vskip 5\gridunit % top margin within minipage
-    	\hskip \gridunit % left margin of 1 within the colorbox 
-        %\fbox{%
-        \begin{minipage}{\subpaperwidth-2\gridunit} % tile minipage, right margin of 1
-            \raggedright\bfseries\huge
-            \textcolor{ugentblue}{\mystrut\@thetitle}\newline
-            \Large\textcolor{ugentblue}{\@thesubtitle}
-            \mystrut[1]
-        \end{minipage}%}
-
-        \vskip\fill % Push down to bottom of minipage
-
-        \ifx\@infoboxa\empty\else % ony put box if not empty
-            \hskip\gridunit % left margin of infobox
-            %\fbox{%
-            \begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
-                \@infoboxa
-            \end{minipage}%}
-
-            \baselineskip0pt\mystrut
-        \fi
-
-        \ifx\@infoboxb\empty\else % ony put box if not empty
-            \hskip\gridunit % left margin of infobox
-            %\fbox{%
-            \begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
-                \@infoboxb
-            \end{minipage}%}
-
-            \baselineskip0pt\mystrut
-        \fi
-
-        \ifx\@infoboxc\empty\else % ony put box if not empty
-            \hskip\gridunit % left margin of infobox
-            %\fbox{%
-            \begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
-                \@infoboxc
-            \end{minipage}%}
-
-            \baselineskip0pt\mystrut
-        \fi
-
-        \ifx\@infoboxd\empty\else % ony put box if not empty
-            \hskip\gridunit % left margin of infobox
-            %\fbox{%
-            \begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
-                \@infoboxd
-            \end{minipage}%}
-        \fi
-
-        \baselineskip0pt\mystrut[-1]
-    \end{minipage}
-    }%
-
-    \includegraphics[height=4\gridunit]{ugent-\ugd@language.pdf}%
-    \end{titlepage}
-    \restoregeometry
-}
--- a/Reports/February/ugent-en.pdf
+++ b/Reports/February/ugent-en.pdf
--- a/Reports/February/verslag.aux
+++ b/Reports/February/verslag.aux
@@ -1,34 +0,0 @@
-\relax 
-\providecommand\hyper@newdestlabel[2]{}
-\@nameuse{bbl@beforestart}
-\abx@aux@refcontext{nyt/global//global/global}
-\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
-\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
-\global\let\oldcontentsline\contentsline
-\gdef\contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
-\global\let\oldnewlabel\newlabel
-\gdef\newlabel#1#2{\newlabelxx{#1}#2}
-\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
-\AtEndDocument{\ifx\hyper@anchor\@undefined
-\let\contentsline\oldcontentsline
-\let\newlabel\oldnewlabel
-\fi}
-\fi}
-\global\let\hyper@last\relax 
-\gdef\HyperFirstAtBeginDocument#1{#1}
-\providecommand\HyField@AuxAddToFields[1]{}
-\providecommand\HyField@AuxAddToCoFields[2]{}
-\@writefile{toc}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
-\@writefile{lof}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
-\@writefile{lot}{\boolfalse {citerequest}\boolfalse {citetracker}\boolfalse {pagetracker}\boolfalse {backtracker}\relax }
-\babel@aux{english}{}
-\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {1}Intermediate Results}{1}{section.1}\protected@file@percent }
-\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Net Regulation Volume Modeling}{1}{subsection.1.1}\protected@file@percent }
-\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.1.1}Input Features}{1}{subsubsection.1.1.1}\protected@file@percent }
-\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.1.2}Models}{1}{subsubsection.1.1.2}\protected@file@percent }
-\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Performance of Autoregressive Models}}{1}{table.1}\protected@file@percent }
-\newlabel{tab:general_models}{{1}{1}{Performance of Autoregressive Models}{table.1}{}}
-\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.1.3}Charging Policy}{2}{subsubsection.1.1.3}\protected@file@percent }
-\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Comparison of Energy Storage Policies Using Predicted NRV. Battery of 2MWh with 1MW charge/discharge power. Evaluated on data from 01-01-2023 until 08-10-2023.}}{2}{table.2}\protected@file@percent }
-\newlabel{table:energy_storage_policies}{{2}{2}{Comparison of Energy Storage Policies Using Predicted NRV. Battery of 2MWh with 1MW charge/discharge power. Evaluated on data from 01-01-2023 until 08-10-2023}{table.2}{}}
-\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {2}Schedule next months}{3}{section.2}\protected@file@percent }
--- a/Reports/February/verslag.bcf
+++ b/Reports/February/verslag.bcf
--- a/Reports/February/verslag.log
+++ b/Reports/February/verslag.log
--- a/Reports/February/verslag.out
+++ b/Reports/February/verslag.out
@@ -1,6 +0,0 @@
-\BOOKMARK [1][-]{section.1}{Intermediate Results}{}% 1
-\BOOKMARK [2][-]{subsection.1.1}{Net Regulation Volume Modeling}{section.1}% 2
-\BOOKMARK [3][-]{subsubsection.1.1.1}{Input Features}{subsection.1.1}% 3
-\BOOKMARK [3][-]{subsubsection.1.1.2}{Models}{subsection.1.1}% 4
-\BOOKMARK [3][-]{subsubsection.1.1.3}{Charging Policy}{subsection.1.1}% 5
-\BOOKMARK [1][-]{section.2}{Schedule next months}{}% 6
--- a/Reports/February/verslag.pdf
+++ b/Reports/February/verslag.pdf
--- a/Reports/February/verslag.run.xml
+++ b/Reports/February/verslag.run.xml
@@ -1,89 +0,0 @@
-<?xml version="1.0" standalone="yes"?>
-<!-- logreq request file -->
-<!-- logreq version 1.0 / dtd version 1.0 -->
-<!-- Do not edit this file! -->
-<!DOCTYPE requests [
-  <!ELEMENT requests (internal | external)*>
-  <!ELEMENT internal (generic, (provides | requires)*)>
-  <!ELEMENT external (generic, cmdline?, input?, output?, (provides | requires)*)>
-  <!ELEMENT cmdline (binary, (option | infile | outfile)*)>
-  <!ELEMENT input (file)+>
-  <!ELEMENT output (file)+>
-  <!ELEMENT provides (file)+>
-  <!ELEMENT requires (file)+>
-  <!ELEMENT generic (#PCDATA)>
-  <!ELEMENT binary (#PCDATA)>
-  <!ELEMENT option (#PCDATA)>
-  <!ELEMENT infile (#PCDATA)>
-  <!ELEMENT outfile (#PCDATA)>
-  <!ELEMENT file (#PCDATA)>
-  <!ATTLIST requests
-    version CDATA #REQUIRED
-  >
-  <!ATTLIST internal
-    package CDATA #REQUIRED
-    priority (9) #REQUIRED
-    active (0 | 1) #REQUIRED
-  >
-  <!ATTLIST external
-    package CDATA #REQUIRED
-    priority (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8) #REQUIRED
-    active (0 | 1) #REQUIRED
-  >
-  <!ATTLIST provides
-    type (static | dynamic | editable) #REQUIRED
-  >
-  <!ATTLIST requires
-    type (static | dynamic | editable) #REQUIRED
-  >
-  <!ATTLIST file
-    type CDATA #IMPLIED
-  >
-]>
-<requests version="1.0">
-  <internal package="biblatex" priority="9" active="0">
-    <generic>latex</generic>
-    <provides type="dynamic">
-      <file>verslag.bcf</file>
-    </provides>
-    <requires type="dynamic">
-      <file>verslag.bbl</file>
-    </requires>
-    <requires type="static">
-      <file>blx-dm.def</file>
-      <file>apa.dbx</file>
-      <file>blx-compat.def</file>
-      <file>biblatex.def</file>
-      <file>standard.bbx</file>
-      <file>apa.bbx</file>
-      <file>apa.cbx</file>
-      <file>biblatex.cfg</file>
-      <file>english.lbx</file>
-      <file>american.lbx</file>
-      <file>american-apa.lbx</file>
-      <file>english-apa.lbx</file>
-    </requires>
-  </internal>
-  <external package="biblatex" priority="5" active="0">
-    <generic>biber</generic>
-    <cmdline>
-      <binary>biber</binary>
-      <infile>verslag</infile>
-    </cmdline>
-    <input>
-      <file>verslag.bcf</file>
-    </input>
-    <output>
-      <file>verslag.bbl</file>
-    </output>
-    <provides type="dynamic">
-      <file>verslag.bbl</file>
-    </provides>
-    <requires type="dynamic">
-      <file>verslag.bcf</file>
-    </requires>
-    <requires type="editable">
-      <file>./references.bib</file>
-    </requires>
-  </external>
-</requests>
--- a/Reports/February/verslag.synctex.gz
+++ b/Reports/February/verslag.synctex.gz
--- a/Reports/February/verslag.tex
+++ b/Reports/February/verslag.tex
@@ -1,213 +0,0 @@
-\documentclass[12pt,a4paper,faculty=ea,language=en,doctype=article]{ugent-doc}
-
-% Optional: margins and spacing
-%-------------------------------
-% Uncomment and adjust to change the default values set by the template
-% Note: the defaults are suggested values by Ghent University
-%\geometry{bottom=2.5cm,top=2.5cm,left=3cm,right=2cm} 
-%\renewcommand{\baselinestretch}{1.15} % line spacing
-
-% Font
-%------
-\usepackage[T1]{fontenc}
-\usepackage[utf8]{inputenc} % allows non-ascii input characters
-% Comment or remove the two lines below to use the default Computer Modern font
-\usepackage{libertine}
-\usepackage{libertinust1math}
-\usepackage{enumitem}
-% NOTE: because the UGent font Panno is proprietary, it is not possible to use it
-% in Overleaf. But UGent does not suggest to use Panno for documents (or maybe only for
-% the titlepage). For the body, the UGent suggestion is to use a good serif font (for
-% LaTeX this could be libertine or Computer Modern).
-
-% Proper word splitting
-%-----------------------
-\usepackage[english]{babel} 
-
-% Mathematics
-%-------------
-\usepackage{amsmath}
-
-% Figures
-%---------
-\usepackage{graphicx} % optional: the package is already loaded by the template
-\graphicspath{{./figures/}}
-
-% Bibliography settings
-%-----------------------
-\usepackage[backend=biber, style=apa, sorting=nyt, hyperref=true]{biblatex}
-\addbibresource{./references.bib}
-\usepackage{csquotes} % Suggested when using babel+biblatex
-
-% Hyperreferences
-%-----------------
-\usepackage[colorlinks=true, allcolors=ugentblue]{hyperref}
-
-% Whitespace between paragraphs and no indentation
-%--------------------------------------------------
-\usepackage[parfill]{parskip} 
-
-% Input for title page
-%----------------------
-
-% The title
-\thesubtitle{February Intermediate Report}
-
-%% Note: a stricter UGent style could be achieved with, e.g.:
-\usepackage{ulem} % for colored underline
-\renewcommand{\ULthickness}{2pt} % adjust thickness of underline
-\thetitle{Forecasting and generative modeling of the Belgian electricity market}
-% Note: do not forget to reset the \ULthickness to 1pt after invoking \maketitle
-% (otherwise all underlines in the rest of your document will be too thick):
-%\renewcommand{\ULthickness}{1pt}
-
-% The first (top) infobox at bottom of titlepage
-\infoboxa{\bfseries\large Master Thesis}
-
-% The second infobox at bottom of titlepage
-\infoboxb{Name: 
-\begin{tabular}[t]{l}
-    Victor Mylle
-\end{tabular}
-}
-
-% The third infobox at bottom of titlepage
-\infoboxc{
-    Promotors:
-    \begin{tabular}[t]{l}
-        prof. dr. ir. Chris Develder \\
-        prof. Bert Claessens
-    \end{tabular}
-    \\\\
-    Supervisor: 
-    \begin{tabular}[t]{l}
-        Jonas Van Gompel
-    \end{tabular}
-}
-
-% The last (bottom) infobox at bottom of titlepage
-\infoboxd{Academic year: 2023--2024} % note dash, not hyphen
-
-
-\begin{document}
-
-% =====================================================================
-% Cover
-% =====================================================================
-
-% ------------ TITLE PAGE ---------
-\maketitle
-\renewcommand{\ULthickness}{1pt}
-
-% =====================================================================
-% Front matter
-% =====================================================================
-
-% ------------ TABLE OF CONTENTS ---------
-% {\hypersetup{hidelinks}\tableofcontents} % hide link color in toc
-% \newpage
-% \begin{titlepage}
-
-%     \centering % Centers everything on the page
-    
-%     % Logo or Image (Optional)
-%     % \includegraphics[width=0.5\textwidth]{path_to_logo.jpg} 
-
-%     \vspace*{2cm} % Add vertical space
-
-%     {\large Title: Forecasting and generative modeling of the Belgian electricity market\par}
-    
-%     \vspace{2cm}
-%     {\Large Victor Mylle\par}
-    
-%     \vspace{1cm}
-%     {\large Period of Internship: 3 July 2023 - 31 August 2023\par}
-    
-%     \vspace{1cm}
-%     {\large Mentor: dr. ir. Femke De Backere\par}
-%     {\large TechWolf supervisor: ir. Jens-Joris Decorte}
-    
-% \end{titlepage}
-
-\newpage
-
-\section{Intermediate Results}
-
-\subsection{Net Regulation Volume Modeling}
-Using a generative model, we try to predict the NRV for the next day. The model is trained on historical data and uses multiple input features to model the NRV. The data for the input features can all be downloaded from \href{https://www.elia.be/en/grid-data/open-data}{Elia Open Data}.
-
-\subsubsection{Input Features}
-The generative model uses multiple input features to predict the NRV.
-
-\begin{itemize}[noitemsep]
-    \item NRV History (NRV of yesterday)
-    \item Load Forecast (Forecasted load of tomorrow)
-    \item Load History (Load of yesterday)
-    \item Wind Forecast (Forecasted wind of tomorrow)
-    \item Wind History (Wind of yesterday)
-    \item Implicit net position  (Nominal net position of tomorrow)
-    \item Time features (Day of the week + quarter of the day)
-    \item Photovoltaic Forecast\textsuperscript{*}
-    \item Photovoltaic History\textsuperscript{*}
-\end{itemize}
-\textsuperscript{*} These features are not used currently, the data was not available. These features can easily be added without changing any code. 
-
-\subsubsection{Models}
-In the intermediate report of November, baselines were discussed. Now, other more advanced models are used. Samples must be generated using the model, this means the model can't just output one value but a distribution is needed. Quantile Regression can be used for this task. The model then outputs the values of multiple quantiles. For example, the model outputs the value for which 10\% of the data is lower, the value for which 50\% of the data is lower, etc. This way, the model outputs a distribution which can be used to sample from. The NRV predicitons are done in a quarter-hourly resolution. To predict the NRV for the next day, 96 values need to be sampled. This can be done in an autoregressive manner. The model outputs the quantiles for the first quarter-hour, a sample is drawn from this distribution and this sample is used as input for the next quarter-hour. This process is repeated 96 times.
-
-\begin{table}[h]
-    \centering
-    \begin{tabular}{lcc}
-    \hline
-    \textbf{Model} & \textbf{test\_L1Loss} & \textbf{test\_CRPSLoss} \\
-    \hline
-    Linear Model & 101.639 & 68.485 \\
-    Non Linear Model & 102.031 & 68.968 \\
-    LSTM/GRU Model & 104.261 & 66.052 \\
-    \hline
-    \end{tabular}
-    \caption{Performance of Autoregressive Models}
-    \label{tab:general_models}
-\end{table}
-
-At the moment, I am experimenting with a diffusion model to generatively model the NRV but more research and expermimenting needs to be done.
-
-\subsubsection{Charging Policy}
-Using the predicted NRV, a policy can be implemented to charge and discharge a battery. The goal of the policy is to maximize the profit made by selling the stored electricity. A simple policy is implemented to charge and discharge the battery based on 2 thresholds determined by the predicted NRV. The policy is evaluated on historical data and the profit is calculated. To determine the charge and discharge threshold, 1000 full NRV predictions are done for the next day and for each of these predicitions, the thresholds are determined. Next, the mean of these thresholds is used as the final threshold.
-
-\begin{table}[h]
-    \centering
-    \begin{tabular}{lccc}
-    \hline
-    \textbf{Policy} & \textbf{Total Profit (€)} & \textbf{Charge Cycles} \\
-    \hline
-    Baseline (charge: €150, discharge: €175) & 251,202.59 & 725 \\
-    Baseline (yesterday imbalance price) & 342,980.09 & 903 \\
-    GRU Predicted NRV (mean thresholds) & 339,846.91 & 842 \\
-    Diffusion Predicted NRV (mean thresholds) & 338,168.03 & 886 \\
-    \hline
-    \end{tabular}
-    \caption{Comparison of Energy Storage Policies Using Predicted NRV. Battery of 2MWh with 1MW charge/discharge power. Evaluated on data from 01-01-2023 until 08-10-2023.}
-    \label{table:energy_storage_policies}
-\end{table}
-
-The recommended charge cycles for a battery is <400 cycles per year. The policy also needs to take this into account. A penalty parameter can be introduced and determined so that the policy is penalized for every charge cycle above 400. The policy can then be optimized using this penalty parameter. I am currenlty experimenting with this.
-
-
-\newpage
-\section{Schedule next months}
-\begin{itemize}
-    \item Baselines with penalties for charge cycles above 400
-    \item Better visualizations of the policy profit results.
-    \item Case studies of days with extreme thresholds
-    
-    \item Finetuning of models and hyperparametres based on model errors and profits of the policy
-    \item Ablation study of input features
-    
-    \item Experiment further with diffusion models
-    
-    \item During the experimenting, I will write my thesis and update the results and conclusions chapters.
-\end{itemize}
-
-\end{document}
-
--- a/Result-Reports/Policies.md
+++ b/Result-Reports/Policies.md
@@ -144,61 +144,5 @@ Test data: 01-01-2023 until 08-10–2023
 - [x] Profit penalty parameter als over charge cycles voor een dag -> parameter bepalen op training data (convex probleem) (< 400 charge cycles per jaar) (over een dag kijken hoeveel charge cycles -> profit - penalty * charge cycles erover, (misschien belonen als eronder charge cycles))

 - [ ] Meer verschil bekijken tussen GRU en diffusion
- [ ] (In Progress) Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
- [x] Policies met andere modellen (Linear, Non Linear)
-
- [ ] Visualize the policies over the whole testr set -> thresholds plotten voor elke dag (elke policy) -> mss distribution om overzichtelijk te houden (mean and std)
- [ ] Probleem met diffusion model (activation function? waarom direct grote waardes?)
-
- [ ] Autoregressive confidence problem -> Quantiles zelf uit elkaar halen (helpt dit?)
-
- [ ] time steps reducing for diffusion model (UNet activation functions?)
-
- [ ] (State space model? S4)
-
-
-
-TODO:
- [ ] diffusion model oefening generative models vragen
- [ ] Non autoregressive models policy testen (Non Linear eerst) -> als dit al slect, niet verder kijken, wel vermelden
- [ ] Policy in test set -> over charge cycles (stop trading electricity)
-
- [ ] penalty bepalen op training data
-
- [ ] cycles en profit herschalen naar per jaar
-
-
-baseline -> NRV van gisteren gebruiken om thresholds te bepalen voor vandaag
-andere policies -> NRV van vandaag voorspellen met model en thresholds bepalen voor vandaag
-
-
-Eerste baseline -> thresholds bepalen op training data maar ook stoppen als 400 cycles (herschalen) per jaar bereikt zijn -> thresholds zouden anders moeten zijn (Ook met penalty parameter)
-> deze toepassen op test set (ook stoppen als 400/jaar bereikt zijn)
-
-
-Visualizatie van thresholds over test set voor baselines en complexere modellen -> zonder penalties tonen
-
-
-
-
-1 a 2 Case studies (extreme gevallen, thresholds 150, -5, normale mss)
-
- Generatie van NRV (echte NRV)
- Thresholds die eruit komen
- Profit en charge cycles
-
-
-
-Policy volledig fixen en later training script met policy direct erachter (tijdens schrijven door laten runnen)
-
-1) Policy
-2) Finetuning van modellen (+ vergelijken met elkaar opbv profit en error)
-3) Ablation Study (input features weghalen en kijken wat er gebeurt)
-( 4) Diffusion tussendoor )  
-
-
-Inleiding +
-Literatuurstudie +
-Tabellen die we gaan bespreken -> updaten met nieuwe data dan
-
-Nog eens 3e meeting opbrengen voor 2e deel maart.
+- [ ] Andere lagen voor diffusion model (GRU, kijken naar TSDiff)
+- [x] Policies met andere modellen (Linear, Non Linear)
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,4 +11,6 @@ clearml
 properscoring
 nbconvert
 torchinfo
-tabulate
+tabulate
+einops
+opt_einsum
--- a/src/models/diffusion_model.py
+++ b/src/models/diffusion_model.py
@@ -45,53 +45,3 @@ class SimpleDiffusionModel(DiffusionModel):
            self.layers.append(nn.ReLU())

        self.layers.append(nn.Linear(hidden_sizes[-1] + time_dim + other_inputs_dim, input_size))
-
-class GRUDiffusionModel(DiffusionModel):
-    def __init__(self, input_size: int, hidden_sizes: list, other_inputs_dim: int, gru_hidden_size: int, time_dim: int = 64):
-        super(GRUDiffusionModel, self).__init__(time_dim)
-        
-        self.other_inputs_dim = other_inputs_dim
-        self.gru_hidden_size = gru_hidden_size
-
-        # GRU layer
-        self.gru = nn.GRU(input_size=input_size + time_dim + other_inputs_dim,
-                          hidden_size=gru_hidden_size,
-                          num_layers=3,
-                          batch_first=True)
-
-        # Fully connected layers after GRU
-        self.fc_layers = nn.ModuleList()
-        prev_size = gru_hidden_size
-        for hidden_size in hidden_sizes:
-            self.fc_layers.append(nn.Linear(prev_size, hidden_size))
-            self.fc_layers.append(nn.ReLU())
-            prev_size = hidden_size
-
-        # Final output layer
-        self.fc_layers.append(nn.Linear(prev_size, input_size))
-
-    def forward(self, x, t, inputs):
-        batch_size, seq_len = x.shape
-        x = x.unsqueeze(-1).repeat(1, 1, seq_len)
-
-        # Positional encoding for each time step
-        t = t.unsqueeze(-1).type(torch.float)
-        t = self.pos_encoding(t, self.time_dim) # Shape: [batch_size, seq_len, time_dim]
-
-        # repeat time encoding for each time step t is shape [batch_size, time_dim], i want [batch_size, seq_len, time_dim]
-        t = t.unsqueeze(1).repeat(1, seq_len, 1)
-
-        # Concatenate x, t, and inputs along the feature dimension
-        x = torch.cat((x, t, inputs), dim=-1) # Shape: [batch_size, seq_len, input_size + time_dim + other_inputs_dim]
-
-        # Pass through GRU
-        output, hidden = self.gru(x) # Hidden Shape: [batch_size, seq_len, 1]
-
-        # Get last hidden state
-        x = hidden[-1]
-
-        # Process each time step's output with fully connected layers
-        for layer in self.fc_layers:
-            x = layer(x)
-
-        return x
--- a/src/models/tsdiff_s4/backbones.py
+++ b/src/models/tsdiff_s4/backbones.py
@@ -0,0 +1,172 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+import math
+
+import torch
+from torch import nn
+
+from src.models.tsdiff_s4.s4 import S4
+
+
+class SinusoidalPositionEmbeddings(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, time):
+        device = time.device
+        half_dim = self.dim // 2
+        embeddings = math.log(10000) / (half_dim - 1)
+        embeddings = torch.exp(
+            torch.arange(half_dim, device=device) * -embeddings
+        )
+        embeddings = time[:, None] * embeddings[None, :]
+        embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1)
+        return embeddings
+
+
+class S4Layer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        dropout=0.0,
+    ):
+        super().__init__()
+        self.layer = S4(
+            d_model=d_model,
+            d_state=128,
+            bidirectional=True,
+            dropout=dropout,
+            transposed=True,
+            postact=None,
+        )
+        self.norm = nn.LayerNorm(d_model)
+        self.dropout = (
+            nn.Dropout1d(dropout) if dropout > 0.0 else nn.Identity()
+        )
+
+    def forward(self, x):
+        """
+        Input x is shape (B, d_input, L)
+        """
+        z = x
+        # Prenorm
+        z = self.norm(z.transpose(-1, -2)).transpose(-1, -2)
+        # Apply layer: we ignore the state input and output for training
+        z, _ = self.layer(z)
+        # Dropout on the output of the layer
+        z = self.dropout(z)
+        # Residual connection
+        x = z + x
+        return x, None
+
+    def default_state(self, *args, **kwargs):
+        return self.layer.default_state(*args, **kwargs)
+
+    def step(self, x, state, **kwargs):
+        z = x
+        # Prenorm
+        z = self.norm(z.transpose(-1, -2)).transpose(-1, -2)
+        # Apply layer
+        z, state = self.layer.step(z, state, **kwargs)
+        # Residual connection
+        x = z + x
+        return x, state
+
+
+class S4Block(nn.Module):
+    def __init__(self, d_model, dropout=0.0, expand=2, num_features=0):
+        super().__init__()
+        self.s4block = S4Layer(d_model, dropout=dropout)
+
+        self.time_linear = nn.Linear(d_model, d_model)
+        self.tanh = nn.Tanh()
+        self.sigm = nn.Sigmoid()
+        self.out_linear1 = nn.Conv1d(
+            in_channels=d_model, out_channels=d_model, kernel_size=1
+        )
+        self.out_linear2 = nn.Conv1d(
+            in_channels=d_model, out_channels=d_model, kernel_size=1
+        )
+        self.feature_encoder = nn.Conv1d(num_features, d_model, kernel_size=1)
+
+    def forward(self, x, t, features=None):
+        t = self.time_linear(t)[:, None, :].repeat(1, x.shape[2], 1)
+        t = t.transpose(-1, -2)
+        out, _ = self.s4block(x + t)
+        if features is not None:
+            out = out + self.feature_encoder(features)
+        out = self.tanh(out) * self.sigm(out)
+        out1 = self.out_linear1(out)
+        out2 = self.out_linear2(out)
+        return out1 + x, out2
+
+
+def Conv1dKaiming(in_channels, out_channels, kernel_size):
+    layer = nn.Conv1d(in_channels, out_channels, kernel_size)
+    nn.init.kaiming_normal_(layer.weight)
+    return layer
+
+
+class BackboneModel(nn.Module):
+    def __init__(
+        self,
+        input_dim,
+        hidden_dim,
+        output_dim,
+        step_emb,
+        num_residual_blocks,
+        num_features,
+        residual_block="s4",
+        dropout=0.0,
+        init_skip=True,
+    ):
+        super().__init__()
+        if residual_block == "s4":
+            residual_block = S4Block
+        else:
+            raise ValueError(f"Unknown residual block {residual_block}")
+        self.input_init = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+        )
+        self.time_init = nn.Sequential(
+            nn.Linear(step_emb, hidden_dim),
+            nn.SiLU(),
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.SiLU(),
+        )
+        self.out_linear = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, output_dim),
+        )
+        residual_blocks = []
+        for i in range(num_residual_blocks):
+            residual_blocks.append(
+                residual_block(
+                    hidden_dim, num_features=num_features, dropout=dropout
+                )
+            )
+        self.residual_blocks = nn.ModuleList(residual_blocks)
+        self.step_embedding = SinusoidalPositionEmbeddings(step_emb)
+        self.init_skip = init_skip
+
+    def forward(self, input, t, features=None):
+        x = self.input_init(input)  # B, L ,C
+        step_emb = self.step_embedding(t)
+        t = self.time_init(step_emb)
+        x = x.transpose(-1, -2)
+        if features is not None:
+            features = features.transpose(-1, -2)
+        skips = []
+        for layer in self.residual_blocks:
+            x, skip = layer(x, t, features)
+            skips.append(skip)
+
+        skip = torch.stack(skips).sum(0)
+        skip = skip.transpose(-1, -2)
+        out = self.out_linear(skip)
+        if self.init_skip:
+            out = out + input
+        return out
--- a/src/models/tsdiff_s4/s4.py
+++ b/src/models/tsdiff_s4/s4.py
--- a/src/notebooks/diffusion-training.ipynb
+++ b/src/notebooks/diffusion-training.ipynb
@@ -4,18 +4,39 @@
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
   "source": [
    "import sys\n",
    "sys.path.append('../..')\n",
-    "import torch"
+    "import torch\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
   "source": [
    "from src.data import DataProcessor, DataConfig\n",
    "from src.trainers.quantile_trainer import AutoRegressiveQuantileTrainer, NonAutoRegressiveQuantileRegression\n",
@@ -31,7 +52,7 @@
    "from datetime import datetime\n",
    "import torch.nn as nn\n",
    "from src.models.time_embedding_layer import TimeEmbedding\n",
-    "from src.models.diffusion_model import SimpleDiffusionModel, GRUDiffusionModel\n",
+    "from src.models.diffusion_model import SimpleDiffusionModel\n",
    "from src.trainers.diffusion_trainer import DiffusionTrainer\n",
    "from torchinfo import summary\n",
    "\n",
@@ -44,7 +65,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -63,98 +84,29 @@
    "data_config.NOMINAL_NET_POSITION = True\n",
    "\n",
    "data_processor = DataProcessor(data_config, path=\"../../\", lstm=True)\n",
-    "data_processor.set_batch_size(1024)\n",
+    "data_processor.set_batch_size(128)\n",
    "data_processor.set_full_day_skip(True)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "torch.Size([1024, 96, 96])\n"
+      "ClearML Task: created new task id=b71216825809432682ea3c7841c07612\n",
+      "ClearML results page: http://192.168.1.182:8080/projects/2e46d4af6f1e4c399cf9f5aa30bc8795/experiments/b71216825809432682ea3c7841c07612/output/log\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([1024, 96])) that is different to the input size (torch.Size([2, 1024, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.mse_loss(input, target, reduction=self.reduction)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([556, 96, 96])\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/conda/lib/python3.10/site-packages/torch/nn/modules/loss.py:536: UserWarning: Using a target size (torch.Size([556, 96])) that is different to the input size (torch.Size([2, 556, 96])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.\n",
-      "  return F.mse_loss(input, target, reduction=self.reduction)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n",
-      "torch.Size([1024, 96, 96])\n",
-      "torch.Size([556, 96, 96])\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "KeyboardInterrupt\n",
-      "\n"
+      "500 model found when searching for `file:///workspaces/Thesis/src/notebooks/checkpoint.pt`\n",
+      "Selected model `Autoregressive Non Linear Quantile Regression + Quarter + DoW + Net` (id=bc0cb0d7fc614e2e8b0edf5b85348646)\n"
     ]
    }
   ],
@@ -164,15 +116,14 @@
    "epochs=150\n",
    "\n",
    "#### Model ####\n",
-    "# model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
-    "model = GRUDiffusionModel(96, [256, 256], other_inputs_dim=inputDim[2], time_dim=64, gru_hidden_size=128)\n",
+    "model = SimpleDiffusionModel(96, [512, 512, 512], other_inputs_dim=inputDim[1], time_dim=64)\n",
    "\n",
    "#### ClearML ####\n",
-    "# task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
+    "task = clearml_helper.get_task(task_name=\"Diffusion Model\")\n",
    "\n",
    "#### Trainer ####\n",
    "trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
-    "trainer.train(epochs, learningRate, None)"
+    "trainer.train(epochs, learningRate, task)"
   ]
  },
  {
@@ -292,6 +243,165 @@
    "sample_diffusion(new_model, 1, inputs)"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Trying out BackboneModel using S4 state space model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[KeOps] Compiling cuda jit compiler engine ... \n",
+      "[KeOps] Warning : There were warnings or errors compiling formula :\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
+      "\n",
+      "OK\n",
+      "[pyKeOps] Compiling nvrtc binder for python ... \n",
+      "[KeOps] Warning : There were warnings or errors compiling formula :\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libstdc++.so: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010001\n",
+      "/usr/bin/ld: warning: /opt/conda/lib/libgcc_s.so.1: unsupported GNU_PROPERTY_TYPE (5) type: 0xc0010002\n",
+      "\n",
+      "OK\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "sys.path.append('../..')\n",
+    "import torch\n",
+    "\n",
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "from src.models.tsdiff_s4.backbones import BackboneModel\n",
+    "from src.trainers.diffusion_trainer import DiffusionTrainer\n",
+    "\n",
+    "backbone = BackboneModel(\n",
+    "    input_dim=1,\n",
+    "    hidden_dim=512,\n",
+    "    output_dim=1,\n",
+    "    step_emb=128,\n",
+    "    num_residual_blocks=3,\n",
+    "    num_features=2\n",
+    ")\n",
+    "backbone = backbone.to(\"cuda\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[KeOps] Generating code for formula Sum_Reduction(ComplexMult(Real2Complex(1/ComplexSquareAbs(ComplexMult(Var(1,2,0)-Var(2,2,1),Var(1,2,0)-Conj(Var(2,2,1))))),ComplexMult(Var(1,2,0)*ComplexReal(Var(0,2,1))-Real2Complex(Sum(Var(0,2,1)*Var(2,2,1))),Conj(ComplexMult(Var(1,2,0)-Var(2,2,1),Var(1,2,0)-Conj(Var(2,2,1)))))),0) ... "
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "# now lets find out what the input shape of the featues and input must be\n",
+    "\n",
+    "# input: (B, L, C)\n",
+    "# features: (B, L, F)\n",
+    "# time: (B, 1)\n",
+    "\n",
+    "# output: (B, L, C)? \n",
+    "\n",
+    "input = torch.randn(2, 96, 1).to(\"cuda\")\n",
+    "features = torch.randn(2, 96, 2).to(\"cuda\")\n",
+    "times = torch.randn(2).to(\"cuda\")\n",
+    "\n",
+    "backbone(input, times, features).shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "'nvrtc'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[3], line 13\u001b[0m\n\u001b[1;32m     10\u001b[0m times \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor([\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m2\u001b[39m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     11\u001b[0m features \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m96\u001b[39m, \u001b[38;5;241m2\u001b[39m)\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 13\u001b[0m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mshape\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/backbones.py:164\u001b[0m, in \u001b[0;36mBackboneModel.forward\u001b[0;34m(self, input, t, features)\u001b[0m\n\u001b[1;32m    162\u001b[0m skips \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m    163\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m layer \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresidual_blocks:\n\u001b[0;32m--> 164\u001b[0m     x, skip \u001b[38;5;241m=\u001b[39m \u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfeatures\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    165\u001b[0m     skips\u001b[38;5;241m.\u001b[39mappend(skip)\n\u001b[1;32m    167\u001b[0m skip \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mstack(skips)\u001b[38;5;241m.\u001b[39msum(\u001b[38;5;241m0\u001b[39m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/backbones.py:96\u001b[0m, in \u001b[0;36mS4Block.forward\u001b[0;34m(self, x, t, features)\u001b[0m\n\u001b[1;32m     94\u001b[0m t \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtime_linear(t)[:, \u001b[38;5;28;01mNone\u001b[39;00m, :]\u001b[38;5;241m.\u001b[39mrepeat(\u001b[38;5;241m1\u001b[39m, x\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m2\u001b[39m], \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m     95\u001b[0m t \u001b[38;5;241m=\u001b[39m t\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[0;32m---> 96\u001b[0m out, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43ms4block\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     97\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m features \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     98\u001b[0m     out \u001b[38;5;241m=\u001b[39m out \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfeature_encoder(features)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/backbones.py:56\u001b[0m, in \u001b[0;36mS4Layer.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m     54\u001b[0m z \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnorm(z\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m))\u001b[38;5;241m.\u001b[39mtranspose(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m2\u001b[39m)\n\u001b[1;32m     55\u001b[0m \u001b[38;5;66;03m# Apply layer: we ignore the state input and output for training\u001b[39;00m\n\u001b[0;32m---> 56\u001b[0m z, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mz\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     57\u001b[0m \u001b[38;5;66;03m# Dropout on the output of the layer\u001b[39;00m\n\u001b[1;32m     58\u001b[0m z \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdropout(z)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:1761\u001b[0m, in \u001b[0;36mS4.forward\u001b[0;34m(self, u, state, rate, lengths, **kwargs)\u001b[0m\n\u001b[1;32m   1759\u001b[0m \u001b[38;5;66;03m# Compute SS Kernel\u001b[39;00m\n\u001b[1;32m   1760\u001b[0m L_kernel \u001b[38;5;241m=\u001b[39m L \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mL \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mmin\u001b[39m(L, \u001b[38;5;28mround\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mL \u001b[38;5;241m/\u001b[39m rate))\n\u001b[0;32m-> 1761\u001b[0m k, k_state \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkernel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1762\u001b[0m \u001b[43m    \u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mL_kernel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstate\u001b[49m\n\u001b[1;32m   1763\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# (C H L) (B C H L)\u001b[39;00m\n\u001b[1;32m   1765\u001b[0m \u001b[38;5;66;03m# Convolution\u001b[39;00m\n\u001b[1;32m   1766\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbidirectional:\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:1549\u001b[0m, in \u001b[0;36mSSKernel.forward\u001b[0;34m(self, state, L, rate)\u001b[0m\n\u001b[1;32m   1548\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, state\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, L\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, rate\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.0\u001b[39m):\n\u001b[0;32m-> 1549\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkernel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mL\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mL\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrate\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrate\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1501\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1496\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1497\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1499\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1500\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1501\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1502\u001b[0m \u001b[38;5;66;03m# Do not call functions when jit is used\u001b[39;00m\n\u001b[1;32m   1503\u001b[0m full_backward_hooks, non_full_backward_hooks \u001b[38;5;241m=\u001b[39m [], []\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:925\u001b[0m, in \u001b[0;36mSSKernelNPLR.forward\u001b[0;34m(self, state, rate, L)\u001b[0m\n\u001b[1;32m    923\u001b[0m     r \u001b[38;5;241m=\u001b[39m cauchy_mult(v, z, w, symmetric\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m    924\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m has_pykeops:\n\u001b[0;32m--> 925\u001b[0m     r \u001b[38;5;241m=\u001b[39m \u001b[43mcauchy_conj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mz\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    926\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    927\u001b[0m     r \u001b[38;5;241m=\u001b[39m cauchy_naive(v, z, w)\n",
+      "File \u001b[0;32m/workspaces/Thesis/src/notebooks/../../src/models/tsdiff_s4/s4.py:89\u001b[0m, in \u001b[0;36mcauchy_conj\u001b[0;34m(v, z, w)\u001b[0m\n\u001b[1;32m     86\u001b[0m z \u001b[38;5;241m=\u001b[39m _c2r(z)\n\u001b[1;32m     87\u001b[0m w \u001b[38;5;241m=\u001b[39m _c2r(w)\n\u001b[0;32m---> 89\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m2\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[43mcauchy_mult\u001b[49m\u001b[43m(\u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mz\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mw\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbackend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mGPU\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _r2c(r)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:688\u001b[0m, in \u001b[0;36mGenred.__call__\u001b[0;34m(self, backend, device_id, ranges, out, *args)\u001b[0m\n\u001b[1;32m    686\u001b[0m params\u001b[38;5;241m.\u001b[39mny \u001b[38;5;241m=\u001b[39m ny\n\u001b[1;32m    687\u001b[0m params\u001b[38;5;241m.\u001b[39mout \u001b[38;5;241m=\u001b[39m out\n\u001b[0;32m--> 688\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mGenredAutograd_fun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    690\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m postprocess(out, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreduction_op, nout, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mopt_arg, dtype)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:384\u001b[0m, in \u001b[0;36mGenredAutograd_fun\u001b[0;34m(*inputs)\u001b[0m\n\u001b[1;32m    383\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mGenredAutograd_fun\u001b[39m(\u001b[38;5;241m*\u001b[39minputs):\n\u001b[0;32m--> 384\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mGenredAutograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/autograd/function.py:506\u001b[0m, in \u001b[0;36mFunction.apply\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m    503\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39m_are_functorch_transforms_active():\n\u001b[1;32m    504\u001b[0m     \u001b[38;5;66;03m# See NOTE: [functorch vjp and autograd interaction]\u001b[39;00m\n\u001b[1;32m    505\u001b[0m     args \u001b[38;5;241m=\u001b[39m _functorch\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39munwrap_dead_wrappers(args)\n\u001b[0;32m--> 506\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m    508\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39msetup_context \u001b[38;5;241m==\u001b[39m _SingleLevelFunction\u001b[38;5;241m.\u001b[39msetup_context:\n\u001b[1;32m    509\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m    510\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mIn order to use an autograd.Function with functorch transforms \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m    511\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m(vmap, grad, jvp, jacrev, ...), it must override the setup_context \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m    512\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstaticmethod. For more details, please see \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m    513\u001b[0m         \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://pytorch.org/docs/master/notes/extending.func.html\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:295\u001b[0m, in \u001b[0;36mGenredAutograd.forward\u001b[0;34m(*inputs)\u001b[0m\n\u001b[1;32m    293\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[1;32m    294\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39minputs):\n\u001b[0;32m--> 295\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mGenredAutograd_base\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/pykeops/torch/generic/generic_red.py:91\u001b[0m, in \u001b[0;36mGenredAutograd_base._forward\u001b[0;34m(params, *args)\u001b[0m\n\u001b[1;32m     85\u001b[0m device_id, device_args \u001b[38;5;241m=\u001b[39m set_device(\n\u001b[1;32m     86\u001b[0m     tagCPUGPU, tagHostDevice, params\u001b[38;5;241m.\u001b[39mdevice_id_request, \u001b[38;5;241m*\u001b[39margs\n\u001b[1;32m     87\u001b[0m )\n\u001b[1;32m     89\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpykeops\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcommon\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mkeops_io\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m keops_binder\n\u001b[0;32m---> 91\u001b[0m myconv \u001b[38;5;241m=\u001b[39m \u001b[43mkeops_binder\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mnvrtc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtagCPUGPU\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcpp\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m(\n\u001b[1;32m     92\u001b[0m     tagCPUGPU,\n\u001b[1;32m     93\u001b[0m     tag1D2D,\n\u001b[1;32m     94\u001b[0m     tagHostDevice,\n\u001b[1;32m     95\u001b[0m     use_ranges,\n\u001b[1;32m     96\u001b[0m     device_id,\n\u001b[1;32m     97\u001b[0m     params\u001b[38;5;241m.\u001b[39mformula,\n\u001b[1;32m     98\u001b[0m     params\u001b[38;5;241m.\u001b[39maliases,\n\u001b[1;32m     99\u001b[0m     \u001b[38;5;28mlen\u001b[39m(args),\n\u001b[1;32m    100\u001b[0m     params\u001b[38;5;241m.\u001b[39mdtype,\n\u001b[1;32m    101\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    102\u001b[0m     params\u001b[38;5;241m.\u001b[39moptional_flags,\n\u001b[1;32m    103\u001b[0m )\u001b[38;5;241m.\u001b[39mimport_module()\n\u001b[1;32m    105\u001b[0m \u001b[38;5;66;03m# N.B.: KeOps C++ expects contiguous data arrays\u001b[39;00m\n\u001b[1;32m    106\u001b[0m test_contig \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mall\u001b[39m(arg\u001b[38;5;241m.\u001b[39mis_contiguous() \u001b[38;5;28;01mfor\u001b[39;00m arg \u001b[38;5;129;01min\u001b[39;00m args)\n",
+      "\u001b[0;31mKeyError\u001b[0m: 'nvrtc'"
+     ]
+    }
+   ],
+   "source": [
+    "# inputDim = data_processor.get_input_size()\n",
+    "learningRate = 0.0001\n",
+    "epochs=150\n",
+    "\n",
+    "#### Model ####\n",
+    "model = BackboneModel(1, 512, output_dim=1, step_emb=64, num_residual_blocks=4, num_features=2)\n",
+    "model.to(\"cuda\")\n",
+    "\n",
+    "inputs = torch.randn(2, 96, 1).to(\"cuda\")\n",
+    "times = torch.tensor([0]*2).to(\"cuda\")\n",
+    "features = torch.randn(2, 96, 2).to(\"cuda\")\n",
+    "\n",
+    "model(inputs, times, features).shape\n",
+    "\n",
+    "#### Trainer ####\n",
+    "# trainer = DiffusionTrainer(model, data_processor, \"cuda\")\n",
+    "# trainer.train(epochs, learningRate, None)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,
--- a/src/policies/plot_combiner.ipynb
+++ b/src/policies/plot_combiner.ipynb
@@ -1197,7 +1197,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.8"
+   "version": "3.10.11"
  }
 },
 "nbformat": 4,
--- a/src/policies/policy_executer.py
+++ b/src/policies/policy_executer.py
@@ -124,9 +124,6 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,
    predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
    baseline_profits_cycles = {i: [0, 0] for i in penalties}

-    _charge_thresholds = {}
-    _discharge_thresholds = {}
-
    initial, nrvs, target = predict_NRV(model, date, data_processor, test_loader)

    initial = np.repeat(initial, nrvs.shape[0])
@@ -142,10 +139,6 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,

    for penalty in penalties:
        found_charge_thresholds, found_discharge_thresholds = baseline_policy.get_optimal_thresholds(reconstructed_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
-        
-        _charge_thresholds[penalty] = found_charge_thresholds
-        _discharge_thresholds[penalty] = found_discharge_thresholds
-
        next_day_charge_threshold = found_charge_thresholds.mean(axis=0)
        next_day_discharge_threshold = found_discharge_thresholds.mean(axis=0)
        yesterday_charge_thresholds, yesterday_discharge_thresholds = baseline_policy.get_optimal_thresholds(yesterday_imbalance_prices, charge_thresholds, discharge_thresholds, penalty)
@@ -160,29 +153,23 @@ def get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc,
        baseline_profits_cycles[penalty][0] += yesterday_profit.item()
        baseline_profits_cycles[penalty][1] += yesterday_charge_cycles.item()

-    return predicted_nrv_profits_cycles, baseline_profits_cycles, _charge_thresholds, _discharge_thresholds
+    return predicted_nrv_profits_cycles, baseline_profits_cycles

 def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: callable):
    penalties = [0, 10, 50, 150, 300, 500, 600, 800, 1000, 1500, 2000, 2500]
    predicted_nrv_profits_cycles = {i: [0, 0] for i in penalties}
    baseline_profits_cycles = {i: [0, 0] for i in penalties}

-    charge_thresholds = {}
-    discharge_thresholds = {}
-
    # get all dates in test set
    dates = baseline_policy.test_data["DateTime"].dt.date.unique()

    # dates back to datetime
    dates = pd.to_datetime(dates)

-    for date in tqdm(dates[:10]):
+    for date in tqdm(dates):
        try:
-            new_predicted_nrv_profits_cycles, new_baseline_profits_cycles, new_charge_thresholds, new_discharge_thresholds = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties)
+            new_predicted_nrv_profits_cycles, new_baseline_profits_cycles = get_next_day_profits_for_date(model, data_processor, test_loader, date, ipc, predict_NRV, penalties)
            
-            charge_thresholds[date] = new_charge_thresholds
-            discharge_thresholds[date] = new_discharge_thresholds
-
            for penalty in penalties:
                predicted_nrv_profits_cycles[penalty][0] += new_predicted_nrv_profits_cycles[penalty][0]
                predicted_nrv_profits_cycles[penalty][1] += new_predicted_nrv_profits_cycles[penalty][1]
@@ -192,15 +179,15 @@ def next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV: call

        except Exception as e:
            # print(f"Error for date {date}")
-            raise e
+            continue

-    return predicted_nrv_profits_cycles, baseline_profits_cycles, charge_thresholds, discharge_thresholds
+    return predicted_nrv_profits_cycles, baseline_profits_cycles

 def main():
    clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
    task = clearml_helper.get_task(task_name="Policy Test")

-    # task.execute_remotely(queue_name="default", exit_process=True)
+    task.execute_remotely(queue_name="default", exit_process=True)

    configuration, model, data_processor, test_loader = load_model(args.task_id)

@@ -218,69 +205,7 @@ def main():

    ipc = ImbalancePriceCalculator(data_path="")

-    predicted_nrv_profits_cycles, baseline_profits_cycles, charge_thresholds, discharge_thresholds = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV)
-    # the charge_thresholds is a dictionary with date as key. The values of the dictionary is another dictionary with keys as penalties and values as the charge thresholds
-    # create density plot that shows a density plot of the charge thresholds for each penalty (use seaborn displot) (One plot with a different color for each penalty)
-
-    import seaborn as sns
-    import matplotlib.pyplot as plt
-
-    charge_thresholds_for_penalty = {}
-    for d in charge_thresholds.values():
-        for penalty, thresholds in d.items():
-            if penalty not in charge_thresholds_for_penalty:
-                charge_thresholds_for_penalty[penalty] = []
-            charge_thresholds_for_penalty[penalty].extend(thresholds)
-
-    discharge_thresholds_for_penalty = {}
-    for d in discharge_thresholds.values():
-        for penalty, thresholds in d.items():
-            if penalty not in discharge_thresholds_for_penalty:
-                discharge_thresholds_for_penalty[penalty] = []
-            discharge_thresholds_for_penalty[penalty].extend(thresholds)
-
-    ### Plot charge thresholds distribution ###
-    data_to_plot = []
-    for penalty, values in charge_thresholds_for_penalty.items():
-        for value in values:
-            data_to_plot.append({'Penalty': penalty, 'Value': value.item()})
-    df = pd.DataFrame(data_to_plot)
-    print(df.head())
-    palette = sns.color_palette("bright", len(charge_thresholds.keys()))
-    fig = sns.displot(data=df, x="Value", hue="Penalty", kind="kde", palette=palette)
-    plt.title('Density of Charge Thresholds by Penalty')
-    plt.xlabel('Charge Threshold')
-    plt.ylabel('Density')
-    plt.legend(title='Penalty')
-    task.get_logger().report_matplotlib_figure(
-        "Policy Results", 
-        "Charge Thresholds", 
-        iteration=0, 
-        figure=fig
-    )
-    plt.close()
-
-    ### Plot discharge thresholds distribution ###
-    data_to_plot = []
-    for penalty, values in discharge_thresholds_for_penalty.items():
-        for value in values:
-            data_to_plot.append({'Penalty': penalty, 'Value': value.item()})
-    df = pd.DataFrame(data_to_plot)
-    palette = sns.color_palette("bright", len(discharge_thresholds.keys()))
-    fig = sns.displot(data=df, x="Value", hue="Penalty", kind="kde", palette=palette)
-    plt.title('Density of Charge Thresholds by Penalty')
-    plt.xlabel('Charge Threshold')
-    plt.ylabel('Density')
-    plt.legend(title='Penalty')
-    task.get_logger().report_matplotlib_figure(
-        "Policy Results", 
-        "Discharge Thresholds", 
-        iteration=0, 
-        figure=fig
-    )
-    plt.close()
-
-
+    predicted_nrv_profits_cycles, baseline_profits_cycles = next_day_test_set(model, data_processor, test_loader, ipc, predict_NRV)

    # create dataframe with columns "name", "penalty", "profit", "cycles"
    df = pd.DataFrame(columns=["name", "penalty", "profit", "cycles"])
--- a/src/trainers/diffusion_trainer.py
+++ b/src/trainers/diffusion_trainer.py
@@ -19,13 +19,7 @@ def sample_diffusion(model: DiffusionModel, n: int, inputs: torch.tensor, noise_
    alpha = 1. - beta
    alpha_hat = torch.cumprod(alpha, dim=0)

-    # inputs: (num_features) -> (batch_size, num_features)
-    # inputs: (time_steps, num_features) -> (batch_size, time_steps, num_features)
-    if len(inputs.shape) == 2:
-        inputs = inputs.repeat(n, 1)
-    elif len(inputs.shape) == 3:
-        inputs = inputs.repeat(n, 1, 1)
-
+    inputs = inputs.repeat(n, 1).to(device)
    model.eval()
    with torch.no_grad():
        x = torch.randn(inputs.shape[0], ts_length).to(device)
@@ -51,7 +45,7 @@ class DiffusionTrainer:
        self.model = model
        self.device = device

-        self.noise_steps = 20
+        self.noise_steps = 1000
        self.beta_start = 1e-4
        self.beta_end = 0.02
        self.ts_length = 96
@@ -107,12 +101,8 @@ class DiffusionTrainer:

        input_data = torch.randn(1024, 96).to(self.device)
        time_steps = torch.randn(1024).long().to(self.device)
+        other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device)

-        if self.data_processor.lstm:
-            inputDim = self.data_processor.get_input_size()
-            other_input_data = torch.randn(1024, inputDim[1], self.model.other_inputs_dim).to(self.device)
-        else:
-            other_input_data = torch.randn(1024, self.model.other_inputs_dim).to(self.device)
        task.set_configuration_object("model", str(summary(self.model, input_data=[input_data, time_steps, other_input_data])))

        self.data_processor = task.connect(self.data_processor, name="data_processor")
@@ -130,8 +120,8 @@ class DiffusionTrainer:
            predict_sequence_length=self.ts_length
        )

-        train_sample_indices = self.random_samples(train=True, num_samples=5)
-        test_sample_indices = self.random_samples(train=False, num_samples=5)
+        train_sample_indices = self.random_samples(train=True, num_samples=10)
+        test_sample_indices = self.random_samples(train=False, num_samples=10)

        for epoch in range(epochs):
            running_loss = 0.0
@@ -142,6 +132,8 @@ class DiffusionTrainer:

                t = self.sample_timesteps(time_series.shape[0]).to(self.device)
                x_t, noise = self.noise_time_series(time_series, t)
+                x_t = x_t.unsqueeze(-1)
+                print(x_t.shape, t.shape, base_pattern.shape)
                predicted_noise = self.model(x_t, t, base_pattern)
                loss = criterion(predicted_noise, noise)

@@ -153,7 +145,7 @@ class DiffusionTrainer:
            
            running_loss /= len(train_loader.dataset)

-            if epoch % 40 == 0 and epoch != 0:
+            if epoch % 20 == 0 and epoch != 0:
                self.test(test_loader, epoch, task)

            if task:
@@ -164,7 +156,7 @@ class DiffusionTrainer:
                    value=loss.item(),
                )

-                if epoch % 150 == 0 and epoch != 0:
+                if epoch % 100 == 0 and epoch != 0:
                    self.debug_plots(task, True, train_loader, train_sample_indices, epoch)
                    self.debug_plots(task, False, test_loader, test_sample_indices, epoch)

@@ -177,7 +169,6 @@ class DiffusionTrainer:
            features, target, _ = data_loader.dataset[idx]

            features = features.to(self.device)
-            features = features.unsqueeze(0)

            self.model.eval()
            with torch.no_grad():
@@ -233,7 +224,7 @@ class DiffusionTrainer:
            
            number_of_samples = 100
            sample = self.sample(self.model, number_of_samples, inputs)
-
+            
            # reduce samples from (batch_size*number_of_samples, time_steps) to (batch_size, number_of_samples, time_steps)
            samples_batched = sample.reshape(inputs.shape[0], number_of_samples, 96)

--- a/src/training_scripts/diffusion_training.py
+++ b/src/training_scripts/diffusion_training.py
@@ -10,7 +10,7 @@ from torch.nn import MSELoss, L1Loss
 from datetime import datetime
 import torch.nn as nn
 from src.models.time_embedding_layer import TimeEmbedding
-from src.models.diffusion_model import GRUDiffusionModel, SimpleDiffusionModel
+from src.models.diffusion_model import SimpleDiffusionModel
 from src.trainers.diffusion_trainer import DiffusionTrainer


@@ -38,11 +38,10 @@ data_config.NOMINAL_NET_POSITION = True
 data_config = task.connect(data_config, name="data_features")

 data_processor = DataProcessor(data_config, path="", lstm=False)
-data_processor.set_batch_size(128)
+data_processor.set_batch_size(8192)
 data_processor.set_full_day_skip(True)

 inputDim = data_processor.get_input_size()
-print("Input dim: ", inputDim)

 model_parameters = {
    "epochs": 5000,
@@ -54,8 +53,7 @@ model_parameters = {
 model_parameters = task.connect(model_parameters, name="model_parameters")

 #### Model ####
-# model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])
-model = GRUDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[2], time_dim=model_parameters["time_dim"], gru_hidden_size=256)
+model = SimpleDiffusionModel(96, model_parameters["hidden_sizes"], other_inputs_dim=inputDim[1], time_dim=model_parameters["time_dim"])

 print("Starting training ...")

--- a/src/utils/clearml.py
+++ b/src/utils/clearml.py
@@ -10,6 +10,6 @@ class ClearMLHelper:
        Task.ignore_requirements("torchvision")
        Task.ignore_requirements("tensorboard")
        task = Task.init(project_name=self.project_name, task_name=task_name, continue_last_task=False)
-        task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime")
+        task.set_base_docker(f"docker.io/clearml/pytorch-cuda-gcc:2.0.0-cuda11.7-cudnn8-runtime --env GIT_SSL_NO_VERIFY=true --env CLEARML_AGENT_GIT_USER=VictorMylle --env CLEARML_AGENT_GIT_PASS=Voetballer1" )
        task.set_packages("requirements.txt")
        return task
--- a/test.py
+++ b/test.py
@@ -0,0 +1,2 @@
+import pykeops
+pykeops.test_numpy_bindings()