Updated thesis

2024-05-10 15:52:57 +02:00
parent 0bcaa2f63f
commit 1b2b3518e2
9 changed files with 72 additions and 59 deletions
--- a/Reports/Thesis/sections/results/models/comparison.tex
+++ b/Reports/Thesis/sections/results/models/comparison.tex
@@ -25,7 +25,7 @@ After training the different models and experimenting with various hyperparamete
    & & & & & \\
    & \acs{AQR} & Non-Linear & 32447.41 & 137.24 & 79.22 & 524,013 \\
    & \acs{NAQR} & Non-Linear & 42588.16 & 157.20 & 73.75 & 673,760 \\
-    & Diffusion & Non-Linear & 46448.90 & 164.50 & 81.06 & 14,229,344 \\
+    & Diffusion & Non-Linear & 47178.91 & 166.89 & 80.30 & 3,116,896 \\
    & & & & & \\
    & \acs{AQR} & GRU & 35238.98 & 141.02 & 80.92 & 11,843,565 \\
    & \acs{NAQR} & GRU & 40613.54 & 151.17 & 75.33 & 6,165,216 \\
--- a/Reports/Thesis/sections/results/models/diffusion.tex
+++ b/Reports/Thesis/sections/results/models/diffusion.tex
@@ -35,7 +35,7 @@ Other hyperparameters that need to be chosen are the number of denoising steps,
        \draw[-latex] (img2.south) |- (Middle) -| (img3.north);
    \end{tikzpicture}
    \caption{Intermediate steps of the diffusion model for example 864 from the test set. The confidence intervals shown in the plots are made using 100 samples.}
-    \label{fig:diffusion_intermediates}
+    \label{fig:diffusion_intermediates}0
 \end{figure}

 In Figure \ref{fig:diffusion_intermediates}, multiple intermediate steps of the denoising process are shown as an example from the test set. The model starts with noisy full-day NRV samples which can be seen in the first steps. These noisy samples are then denoised in multiple steps until realistic samples are generated. This can be seen in the last image in the figure. It can be observed that the confidence intervals get more narrow over time as the noise is removed from the samples. 
@@ -48,10 +48,21 @@ In Figure \ref{fig:diffusion_intermediates}, multiple intermediate steps of the
    Features & Diffusion Steps & Layers & Hidden Size & MSE & MAE & CRPS \\
    \midrule
    NRV & & & & & & & \\
+    & 300 & 2 & 256 & 57129.71 & 185.56 & 81.00 \\
+    & 300 & 2 & 512 & 48364.77 & 169.39 & 79.13 \\
+    & 300 & 2 & 1024 & 43540.50 & 159.17 & 78.27 \\
+    & 300 & 3 & 256 & 52741.73 & 177.09 & 79.55 \\
+    & 300 & 3 & 512 & 45048.05 & 161.89 & 78.46 \\
+    & 300 & 3 & 1024 & 42089.13 & 155.97 & 78.25 \\
+    & 300 & 4 & 256 & 56939.68 & 185.07 & 81.16 \\
+    & 300 & 4 & 512 & 46225.72 & 164.74 & 79.19 \\
    & 300 & 4 & 1024 & 42984.02 & 157.54 & 77.92 \\
    \midrule
    NRV + Load + Wind + PV + NP & & & & & & & \\
-    & 300 & 3 & 256 & & & \\
+    & 300 & 2 & 256 & 63337.36 & 196.21 & 84.29 \\
+    & 300 & 2 & 512 & 52745.92 & 177.16 & 81.57 \\
+    & 300 & 2 & 1024 & 47178.91 & 166.89 & 80.30 \\
+    & 300 & 3 & 256 & 66148.13 & 200.34 & 85.31 \\
    & 300 & 3 & 512 & 53159.99 & 178.46 & 81.95 \\
    & 300 & 3 & 1024 & 47815.13 & 167.22 & 81.16 \\
    & 300 & 3 & 2048 & 46448.90 & 164.50 & 81.06 \\
@@ -61,8 +72,10 @@ In Figure \ref{fig:diffusion_intermediates}, multiple intermediate steps of the
    \bottomrule
    \end{tabular}
    \end{adjustbox}
-    \caption{Non-linear quantile regression model results. All the models used a dropout of 0.2 .}
+    \caption{Simple diffusion model results.}
    \label{tab:diffusion_results}
 \end{table}

+In Table \ref{tab:diffusion_results}, the results of the experiments for the diffusion model can be seen. The diffusion model that was used is a simple implementation of the Denoising Diffusion Probabilistic Model (DDPM). The model itself exists of multiple linear layers with ReLU activation functions. The diffusion steps were set to 300 for the experiments. This number was determined by doing a few experiments with more and fewer steps. The model performance did not improve when more steps were used. This parameter could be further optimized together with the other parameters to find the best-performing model. This would take a lot of time and is not the goal of this thesis. 

+The first observation that can be made is the higher error metrics when more input features are used. This is counterintuitive because the model has more information to generate the samples. The reason for this behavior is not immediately clear. One reason could be that the model conditioning is not optimal. Now the input features are passed to every layer of the model together with the time series that needs to be denoised. The model could be improved by using a more advanced conditioning mechanism like classifier guidance and classifier-free guidance. 
--- a/Reports/Thesis/verslag.aux
+++ b/Reports/Thesis/verslag.aux
@@ -85,43 +85,43 @@
 \@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Diffusion}{36}{subsection.6.3}\protected@file@percent }
 \@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces Intermediate steps of the diffusion model for example 864 from the test set. The confidence intervals shown in the plots are made using 100 samples.\relax }}{38}{figure.caption.23}\protected@file@percent }
 \newlabel{fig:diffusion_intermediates}{{14}{38}{Intermediate steps of the diffusion model for example 864 from the test set. The confidence intervals shown in the plots are made using 100 samples.\relax }{figure.caption.23}{}}
-\@writefile{lot}{\contentsline {table}{\numberline {9}{\ignorespaces Non-linear quantile regression model results. All the models used a dropout of 0.2 .\relax }}{38}{table.caption.24}\protected@file@percent }
-\newlabel{tab:diffusion_results}{{9}{38}{Non-linear quantile regression model results. All the models used a dropout of 0.2 .\relax }{table.caption.24}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}Comparison}{38}{subsection.6.4}\protected@file@percent }
-\ACRO{recordpage}{MSE}{39}{1}{38}
-\ACRO{recordpage}{MAE}{39}{1}{38}
-\ACRO{recordpage}{CRPS}{39}{1}{38}
-\@writefile{lot}{\contentsline {table}{\numberline {10}{\ignorespaces Comparison of the different models using the \ac {MSE}, \ac {MAE} and \ac {CRPS} metrics. The best-performing models for a certain type are selected based on the \ac {CRPS}.\relax }}{39}{table.caption.25}\protected@file@percent }
-\newlabel{tab:model_comparison}{{10}{39}{Comparison of the different models using the \ac {MSE}, \ac {MAE} and \ac {CRPS} metrics. The best-performing models for a certain type are selected based on the \ac {CRPS}.\relax }{table.caption.25}{}}
-\ACRO{recordpage}{NAQR}{40}{1}{39}
+\@writefile{lot}{\contentsline {table}{\numberline {9}{\ignorespaces Simple diffusion model results.\relax }}{38}{table.caption.24}\protected@file@percent }
+\newlabel{tab:diffusion_results}{{9}{38}{Simple diffusion model results.\relax }{table.caption.24}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}Comparison}{39}{subsection.6.4}\protected@file@percent }
 \ACRO{recordpage}{MSE}{40}{1}{39}
 \ACRO{recordpage}{MAE}{40}{1}{39}
 \ACRO{recordpage}{CRPS}{40}{1}{39}
-\ACRO{recordpage}{MSE}{40}{1}{39}
-\ACRO{recordpage}{MAE}{40}{1}{39}
-\ACRO{recordpage}{MSE}{40}{1}{39}
-\ACRO{recordpage}{MAE}{40}{1}{39}
-\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces Comparison of the autoregressive linear and GRU model\relax }}{40}{figure.caption.26}\protected@file@percent }
-\newlabel{fig:ar_linear_gru_comparison}{{15}{40}{Comparison of the autoregressive linear and GRU model\relax }{figure.caption.26}{}}
-\@writefile{toc}{\contentsline {section}{\numberline {7}Policies for battery optimization}{41}{section.7}\protected@file@percent }
-\@writefile{toc}{\contentsline {subsection}{\numberline {7.1}Baselines}{41}{subsection.7.1}\protected@file@percent }
-\ACRO{recordpage}{NRV}{42}{1}{41}
-\ACRO{recordpage}{NRV}{42}{1}{41}
+\@writefile{lot}{\contentsline {table}{\numberline {10}{\ignorespaces Comparison of the different models using the \ac {MSE}, \ac {MAE} and \ac {CRPS} metrics. The best-performing models for a certain type are selected based on the \ac {CRPS}.\relax }}{40}{table.caption.25}\protected@file@percent }
+\newlabel{tab:model_comparison}{{10}{40}{Comparison of the different models using the \ac {MSE}, \ac {MAE} and \ac {CRPS} metrics. The best-performing models for a certain type are selected based on the \ac {CRPS}.\relax }{table.caption.25}{}}
+\ACRO{recordpage}{NAQR}{41}{1}{40}
+\ACRO{recordpage}{MSE}{41}{1}{40}
+\ACRO{recordpage}{MAE}{41}{1}{40}
+\ACRO{recordpage}{CRPS}{41}{1}{40}
+\ACRO{recordpage}{MSE}{41}{1}{40}
+\ACRO{recordpage}{MAE}{41}{1}{40}
+\ACRO{recordpage}{MSE}{41}{1}{40}
+\ACRO{recordpage}{MAE}{41}{1}{40}
+\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces Comparison of the autoregressive linear and GRU model\relax }}{41}{figure.caption.26}\protected@file@percent }
+\newlabel{fig:ar_linear_gru_comparison}{{15}{41}{Comparison of the autoregressive linear and GRU model\relax }{figure.caption.26}{}}
+\@writefile{toc}{\contentsline {section}{\numberline {7}Policies for battery optimization}{42}{section.7}\protected@file@percent }
+\@writefile{toc}{\contentsline {subsection}{\numberline {7.1}Baselines}{42}{subsection.7.1}\protected@file@percent }
 \ACRO{recordpage}{NRV}{43}{1}{42}
 \ACRO{recordpage}{NRV}{43}{1}{42}
-\ACRO{recordpage}{NRV}{43}{1}{42}
-\ACRO{recordpage}{NRV}{43}{1}{42}
-\ACRO{recordpage}{NRV}{43}{1}{42}
-\ACRO{recordpage}{NRV}{43}{1}{42}
-\ACRO{recordpage}{NRV}{43}{1}{42}
-\@writefile{lot}{\contentsline {table}{\numberline {11}{\ignorespaces Results of the baseline policies on the test set. \relax }}{42}{table.caption.27}\protected@file@percent }
-\newlabel{tab:fixed_thresholds}{{11}{42}{Results of the baseline policies on the test set. \relax }{table.caption.27}{}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {7.2}Policy using generated NRV samples}{42}{subsection.7.2}\protected@file@percent }
+\ACRO{recordpage}{NRV}{44}{1}{43}
+\ACRO{recordpage}{NRV}{44}{1}{43}
+\ACRO{recordpage}{NRV}{44}{1}{43}
+\ACRO{recordpage}{NRV}{44}{1}{43}
+\ACRO{recordpage}{NRV}{44}{1}{43}
+\ACRO{recordpage}{NRV}{44}{1}{43}
+\ACRO{recordpage}{NRV}{44}{1}{43}
+\@writefile{lot}{\contentsline {table}{\numberline {11}{\ignorespaces Results of the baseline policies on the test set. \relax }}{43}{table.caption.27}\protected@file@percent }
+\newlabel{tab:fixed_thresholds}{{11}{43}{Results of the baseline policies on the test set. \relax }{table.caption.27}{}}
+\@writefile{toc}{\contentsline {subsection}{\numberline {7.2}Policy using generated NRV samples}{43}{subsection.7.2}\protected@file@percent }
 \ACRO{total-barriers}{1}
-\abx@aux@page{6}{44}
-\abx@aux@page{7}{44}
-\abx@aux@page{8}{44}
-\abx@aux@page{9}{44}
+\abx@aux@page{6}{45}
+\abx@aux@page{7}{45}
+\abx@aux@page{8}{45}
+\abx@aux@page{9}{45}
 \ACRO{usage}{QR=={0}}
 \ACRO{usage}{AQR=={0}}
 \ACRO{usage}{NAQR=={1}}
@@ -131,15 +131,15 @@
 \ACRO{usage}{NRV=={9}}
 \ACRO{usage}{PV=={0}}
 \ACRO{usage}{NP=={0}}
-\ACRO{pages}{NAQR=={40@1@39}}
-\ACRO{pages}{CRPS=={39@1@38|40@1@39}}
-\ACRO{pages}{MSE=={39@1@38|40@1@39}}
-\ACRO{pages}{MAE=={39@1@38|40@1@39}}
-\ACRO{pages}{NRV=={42@1@41|43@1@42}}
+\ACRO{pages}{NAQR=={41@1@40}}
+\ACRO{pages}{CRPS=={40@1@39|41@1@40}}
+\ACRO{pages}{MSE=={40@1@39|41@1@40}}
+\ACRO{pages}{MAE=={40@1@39|41@1@40}}
+\ACRO{pages}{NRV=={43@1@42|44@1@43}}
 \abx@aux@read@bbl@mdfivesum{5DC935CC8C8FAB8A3CAF97A486ED2386}
 \abx@aux@read@bblrerun
 \abx@aux@defaultrefcontext{0}{dumas_deep_2022}{nyt/global//global/global}
 \abx@aux@defaultrefcontext{0}{lu_scenarios_2022}{nyt/global//global/global}
 \abx@aux@defaultrefcontext{0}{poggi_electricity_2023}{nyt/global//global/global}
 \abx@aux@defaultrefcontext{0}{weron_electricity_2014}{nyt/global//global/global}
-\gdef \@abspage@last{45}
+\gdef \@abspage@last{46}
--- a/Reports/Thesis/verslag.log
+++ b/Reports/Thesis/verslag.log
@@ -1,4 +1,4 @@
-This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.17)  10 MAY 2024 00:24
+This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.17)  10 MAY 2024 15:51
 entering extended mode
 restricted \write18 enabled.
 file:line:error style messages enabled.
@@ -1728,7 +1728,7 @@ Package pdftex.def Info: images/diffusion/results/intermediates/Testing Intermed

 LaTeX Warning: `h' float specifier changed to `ht'.

-[37]) (./sections/results/models/comparison.tex [38 <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 1_00000000.jpeg> <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 2_00000000.jpeg> <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 3_00000000.jpeg> <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 4_00000000.jpeg>] [39]
+[37] [38 <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 1_00000000.jpeg> <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 2_00000000.jpeg> <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 3_00000000.jpeg> <./images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 4_00000000.jpeg>]) (./sections/results/models/comparison.tex [39] [40]
 File: images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_864.png Graphic file (type png)
 <use images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_864.png>
 Package pdftex.def Info: images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_864.png  used on input line 46.
@@ -1761,14 +1761,14 @@ File: images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV
 <use images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_7008.png>
 Package pdftex.def Info: images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_7008.png  used on input line 72.
 (pdftex.def)             Requested size: 223.07211pt x 112.49284pt.
-) [40] (./sections/results/policies/baselines.tex [41]
+) [41] (./sections/results/policies/baselines.tex [42]
 Underfull \hbox (badness 10000) in paragraph at lines 6--7

 []

 LaTeX Font Info:    Font shape `TS1/LinuxLibertineT-TLF/b/n' will be
 (Font)              scaled to size 12.0pt on input line 12.
-) (./sections/results/policies/nrv_samples_policy.tex)) [42{/usr/local/texlive/2023/texmf-dist/fonts/enc/dvips/libertine/lbtn_7f4ce4.enc}] [43] [44] (./verslag.aux (./sections/introduction.aux) (./sections/background.aux) (./sections/policies.aux) (./sections/literature_study.aux))
+) (./sections/results/policies/nrv_samples_policy.tex)) [43{/usr/local/texlive/2023/texmf-dist/fonts/enc/dvips/libertine/lbtn_7f4ce4.enc}] [44] [45] (./verslag.aux (./sections/introduction.aux) (./sections/background.aux) (./sections/policies.aux) (./sections/literature_study.aux))

 LaTeX Warning: There were undefined references.

@@ -1784,18 +1784,18 @@ Package logreq Info: Writing requests to 'verslag.run.xml'.

 ) 
 Here is how much of TeX's memory you used:
- 41798 strings out of 476025
- 868725 string characters out of 5790017
+ 41799 strings out of 476025
+ 868732 string characters out of 5790017
 1884388 words of memory out of 5000000
 61396 multiletter control sequences out of 15000+600000
- 606965 words of font info for 100 fonts, out of 8000000 for 9000
+ 606966 words of font info for 100 fonts, out of 8000000 for 9000
 1141 hyphenation exceptions out of 8191
 84i,16n,131p,2100b,5180s stack positions out of 10000i,1000n,20000p,200000b,200000s
 </Users/victormylle/Library/texlive/2023/texmf-var/fonts/pk/ljfour/public/bbm/bbm12.600pk></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertinust1math/LibertinusT1Math.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinBiolinumT.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinBiolinumTB.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinLibertineT.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinLibertineTB.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/libertine/LinLibertineTI.pfb></usr/local/texlive/2023/texmf-dist/fonts/type1/public/stix/stix-mathcal.pfb>
-Output written on verslag.pdf (45 pages, 8231760 bytes).
+Output written on verslag.pdf (46 pages, 8233151 bytes).
 PDF statistics:
- 627 PDF objects out of 1000 (max. 8388607)
- 464 compressed objects within 5 object streams
- 115 named destinations out of 1000 (max. 500000)
+ 632 PDF objects out of 1000 (max. 8388607)
+ 468 compressed objects within 5 object streams
+ 116 named destinations out of 1000 (max. 500000)
 526 words of extra memory for PDF output out of 10000 (max. 10000000)

--- a/Reports/Thesis/verslag.pdf
+++ b/Reports/Thesis/verslag.pdf
--- a/Reports/Thesis/verslag.synctex.gz
+++ b/Reports/Thesis/verslag.synctex.gz
--- a/Reports/Thesis/verslag.toc
+++ b/Reports/Thesis/verslag.toc
@@ -27,7 +27,7 @@
 \contentsline {subsubsection}{\numberline {6.2.2}Non-Linear Model}{29}{subsubsection.6.2.2}%
 \contentsline {subsubsection}{\numberline {6.2.3}GRU Model}{32}{subsubsection.6.2.3}%
 \contentsline {subsection}{\numberline {6.3}Diffusion}{36}{subsection.6.3}%
-\contentsline {subsection}{\numberline {6.4}Comparison}{38}{subsection.6.4}%
-\contentsline {section}{\numberline {7}Policies for battery optimization}{41}{section.7}%
-\contentsline {subsection}{\numberline {7.1}Baselines}{41}{subsection.7.1}%
-\contentsline {subsection}{\numberline {7.2}Policy using generated NRV samples}{42}{subsection.7.2}%
+\contentsline {subsection}{\numberline {6.4}Comparison}{39}{subsection.6.4}%
+\contentsline {section}{\numberline {7}Policies for battery optimization}{42}{section.7}%
+\contentsline {subsection}{\numberline {7.1}Baselines}{42}{subsection.7.1}%
+\contentsline {subsection}{\numberline {7.2}Policy using generated NRV samples}{43}{subsection.7.2}%
--- a/src/training_scripts/autoregressive_quantiles.py
+++ b/src/training_scripts/autoregressive_quantiles.py
@@ -52,7 +52,7 @@ data_processor.set_full_day_skip(False)
 #### Hyperparameters ####
 data_processor.set_output_size(1)
 inputDim = data_processor.get_input_size()
-epochs = 300
+epochs = 16

 # add parameters to clearml
 quantiles = task.get_parameter("general/quantiles", cast=True)
--- a/src/training_scripts/diffusion_training.py
+++ b/src/training_scripts/diffusion_training.py
@@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper

 clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
 task = clearml_helper.get_task(
-    task_name="Diffusion Training: hidden_sizes=[1024, 1024, 1024, 1024] (300 steps), lr=0.0001, time_dim=8"
+    task_name="Diffusion Training: hidden_sizes=[2048, 2048, 2048, 2048] (300 steps), lr=0.0001, time_dim=8"
 )
 task.execute_remotely(queue_name="default", exit_process=True)

@@ -42,7 +42,7 @@ print("Input dim: ", inputDim)
 model_parameters = {
    "epochs": 15000,
    "learning_rate": 0.0001,
-    "hidden_sizes": [1024, 1024, 1024, 1024],
+    "hidden_sizes": [2048, 2048, 2048, 2048],
    "time_dim": 8,
 }