Quarter embedding using trigonometry + more thesis writing

This commit is contained in:
2024-04-17 21:48:13 +02:00
parent 6b02c9aab8
commit 8fb2a7fc7b
18 changed files with 3467 additions and 55 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 391 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 368 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 425 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 423 KiB

View File

@@ -0,0 +1,151 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\@setckpt{sections/introduction}{
\setcounter{page}{2}
\setcounter{equation}{0}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{section}{1}
\setcounter{subsection}{0}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{0}
\setcounter{table}{0}
\setcounter{parentequation}{0}
\setcounter{float@type}{4}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{0}
\setcounter{maxnames}{2}
\setcounter{minnames}{1}
\setcounter{maxitems}{999}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{blx@maxsection}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{narrator}{0}
\setcounter{savednarrator}{0}
\setcounter{execproducer}{0}
\setcounter{savedexecproducer}{0}
\setcounter{execdirector}{0}
\setcounter{savedexecdirector}{0}
\setcounter{with}{0}
\setcounter{savedwith}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{citation}{0}
\setcounter{savedcitation}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{section@level}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{1}
}

View File

@@ -0,0 +1,155 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {3}Literature Study}{9}{section.3}\protected@file@percent }
\@setckpt{sections/literature_study}{
\setcounter{page}{10}
\setcounter{equation}{0}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{section}{3}
\setcounter{subsection}{0}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{1}
\setcounter{table}{2}
\setcounter{parentequation}{0}
\setcounter{float@type}{4}
\setcounter{caption@flags}{2}
\setcounter{continuedfloat}{0}
\setcounter{subfigure}{0}
\setcounter{subtable}{0}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{0}
\setcounter{maxnames}{2}
\setcounter{minnames}{1}
\setcounter{maxitems}{999}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{blx@maxsection}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{narrator}{0}
\setcounter{savednarrator}{0}
\setcounter{execproducer}{0}
\setcounter{savedexecproducer}{0}
\setcounter{execdirector}{0}
\setcounter{savedexecdirector}{0}
\setcounter{with}{0}
\setcounter{savedwith}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{citation}{0}
\setcounter{savedcitation}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{section@level}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{9}
}

View File

@@ -0,0 +1,4 @@
\section{Literature Study}
% - Literatuur forecasting imbalance price
% - Literatuur policies adhv forecasts
Forecasting the electricity price is a challenging task that has been researched extensively. Knowing the future electricity price is crucial for market participants to make informed decisions and optimize their operations and profit.

View File

@@ -67,7 +67,7 @@ TODO: reduce the use of the world NRV and cumulative distribution function
The NRV value for a quarter can be sampled from the reconstructed cumulative distribution function. A full-day prediction for the NRV exists of 96 values. The cumulative distribution function which is sampled is only used for a certain quarter.
\\\\
TODO: Explain non autoregressive and autoregressive models\\\\
Two methods exist to sample full-day NRV values. The first method is a non-autoregressive model. This model outputs the quantiles for every quarter. For each quarter, the cumulative distribution function is reconstructed and sampled. The model is conditioned on the NRV timeline of the previous day. This consists of 96 values. The second method is an autoregressive model. This model outputs the quantiles for the next quarter for which the NRV distribution is wanted. This model is conditioned on the 96 previous NRV values. When a full-day sample of the NRV is wanted, the model is used recursively. The model predicts the quantiles for the next quarter, the cumulative distribution function is reconstructed and the NRV value is sampled. This value can then be used as input for the next quarter. This process is repeated until a full-day sample is obtained.
Two methods exist to sample full-day NRV values. The first method is a non-autoregressive model. This model outputs the quantiles for every quarter. For each quarter, the cumulative distribution function is reconstructed and sampled. The model is conditioned on the NRV timeline of the previous day. This consists of 96 values. The second method is an autoregressive model. This model outputs the quantiles for the next quarter for which the NRV distribution is wanted. This model is conditioned on the 96 previous NRV values. When a full-day sample of the NRV is wanted, the model is used recursively. The model predicts the quantiles for the next quarter, the cumulative distribution function is reconstructed and the NRV value is sampled. This value can then be used as input for the next quarter. This process is repeated until a full-day sample is obtained. Autoregressive problems suffer from the problem that errors in the prediction of early quarters, propagate through the model and can lead to larger errors in later quarters.
\\\\
\subsubsection{Training}
The quantile regression model is trained using the pinball loss function, also known as the quantile loss. The model outputs the quantile values for the NRV. The quantile values themselves are not available in the training data. Only the real NRV values are known. The loss function is defined as:
@@ -162,20 +162,99 @@ where:
\item \(\beta_{0,\tau}, \beta_{1,\tau}, \beta_{2,\tau}, \ldots, \beta_{n,\tau} \) are the coefficients including the bias
\end{itemize}
The linear model outputs the values for the chosen quantiles. The total amount of parameters depends on the input features and the number of chosen quantiles. Assuming the input features are the 96 previous NRV values and 13 quantiles are chosen, the total amount of parameters is $96 * 13 + 13 = 1261$.
% TODO: is it necessary to provide the parameter calculation?
The linear model outputs the values for the chosen quantiles. The total amount of parameters depends on the input features and the number of chosen quantiles. Assuming the input features are the 96 previous NRV values and 13 quantiles are chosen, the total amount of parameters is $96 * 13 + 13 = 1261$. The linear model is trained using the Adam optimizer with a learning rate of 1e-4. Early stopping is used with a patience of 5 epochs. Different sets of input features are experimented with and are compared to each other based on the previously mentioned metrics. All results are shown in table \ref{tab:autoregressive_linear_model_baseline_results}.
\\\\
% TODO: ask Jonas: add number of parameters to this table?
\begin{table}[ht]
\centering
\begin{tabular}{@{}lccccc@{}} % Corrected to six columns
\begin{tabular}{@{}lcccccc@{}}
\toprule
& \multicolumn{2}{c}{MSE} & \multicolumn{2}{c}{MAE} & CRPS \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-6}
& Train & Test & Train & Test & Test \\
& \multicolumn{2}{c}{MSE} & \multicolumn{2}{c}{MAE} & \multicolumn{2}{c}{CRPS} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
& Train & Test & Train & Test & Train & Test \\
\midrule
Model 1 & 0.012 & 0.015 & 0.085 & 0.090 & 0.070 \\
Model 2 & 0.010 & 0.013 & 0.080 & 0.085 & 0.065 \\
Model 3 & 0.008 & 0.011 & 0.075 & 0.080 & 0.060 \\
NRV & 35307.34 & 39222.41 & 146.38 & 152.49 & 88.46 & 91.56 \\
NRV + Load & 34274.70 & 39266.29 & 144.20 & 152.54 & 85.84 & 90.36 \\
NRV + PV & 33099.80 & 37489.68 & 141.57 & 149.32 & 84.87 & 89.42 \\
NRV + Load + PV & 32777.86 & 37642.66 & 141.01 & 149.90 & 83.40 & 89.34 \\
NRV + Load + Wind & 33009.05 & 39284.68 & 141.20 & 152.32 & 82.46 & 88.60 \\
NRV + Load + PV + Wind & 29816.77 & 36134.87 & 133.89 & 146.22 & 78.19 & 84.56 \\
NRV + Load + Wind + Net Position & 31634.27 & 37890.66 & 137.87 & 149.37 & 81.17 & 86.19 \\
NRV + Load + PV + Wind + Net Position & 29034.53 & \textbf{35725.42} & 131.87 & \textbf{145.64} & 76.23 & \textbf{83.30} \\
\bottomrule
\end{tabular}
\caption{Performance Metrics of the Model}
\end{table}
\label{tab:autoregressive_linear_model_baseline_results}
\caption{Autoregressive linear model results}
\end{table}
The linear model outputs the quantiles for the next quarter based on the given input features. The input features consist of previous history values of a certain feature or forecasts of a certain feature. The model, however, does not know which quarter of the day it is modeling.
\\\\
Multiple methods exist to provide such information to the model. The quarter of the day can be provided as a one-hot encoded vector. The cyclic nature of the quarter would not be captured using a one-hot encoded vector. The vectors for quarter 0 and quarter 95 would be very different while they should be very close to each other. Other methods exist that do take the cyclic property of the quarter into account. Trigonometric functions can be used to provide the quarter of the day information. The quarter of the day can be mapped to a sine and cosine value which can be used as input features. The sine and cosine values are calculated as follows:
\begin{equation}
\text{sin}(\frac{2\pi}{96} \times \text{quarter}) \quad \text{and} \quad \text{cos}(\frac{2\pi}{96} \times \text{quarter})
\end{equation}
The sine and cosine values are then concatenated with the input features. Another method that can be used is adding an embedding layer to the model. The discrete quarter of the day value can then be mapped to a vector. The embedding layer itself is learned during the training process which allows the model to learn patterns between quarters. The length of the embedding vector can be chosen and experimented with. The quarter-of-the-day information is then concatenated with the input features. Other information (eg. day of the week, month, year) can also easily be added to the model using this method by just increasing the size of the embedding layer. The results of the linear model with the quarter embeddings are shown in table \ref{tab:autoregressive_linear_model_quarter_embedding_baseline_results}. The results show that adding the quarter embedding to the model improves the performance of the linear model.
\\\\
% TODO: Ask Jonas: Find cleaner way to present this table (remove repitition)
% TODO: Add more time information like day of week, month
\begin{table}[ht]
\centering
\begin{tabular}{@{}lcccccc@{}}
\toprule
& \multicolumn{2}{c}{MSE} & \multicolumn{2}{c}{MAE} & \multicolumn{2}{c}{CRPS} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
& Train & Test & Train & Test & Train & Test \\
\midrule
NRV & 35307.34 & 39222.41 & 146.38 & 152.49 & 88.46 & 91.56 \\
NRV + Quarter Embedding \textbf{(2 dim)} & 34412.60 & \textbf{38216.27} & 144.35 & \textbf{150.41} & 86.59 & \textbf{89.69} \\
NRV + Quarter Embedding \textbf{(5 dim)} & 34801.22 & 38617.17 & 145.22 & 151.20 & 86.64 & 89.72 \\
NRV + Quarter Embedding \textbf{(8 dim)} & 34558.29 & 38423.30 & 144.73 & 150.89 & 86.59 & 89.81 \\
\midrule
NRV + Load + PV + Wind + Net Position & 29034.53 & 35725.42 & 131.87 & 145.64 & 76.23 & 83.30 \\
NRV + Load + PV + Wind + Net Position \\ + Quarter Embedding \textbf{(2 dim)} & 28667.13 & 35746.01 & 131.20 & 146.01 & 77.28 & 85.54 \\
NRV + Load + PV + Wind + Net Position \\ + Quarter Embedding \textbf{(5 dim)} & 27407.41 & \textbf{34031.71} & 128.31 & \textbf{142.29} & 72.06 & \textbf{79.99} \\
\bottomrule
\end{tabular}
\label{tab:autoregressive_linear_model_quarter_embedding_baseline_results}
\caption{Autoregressive linear model results with time features}
\end{table}
Some examples of the sampled full NRV day samples are shown in figure \ref{fig:autoregressive_linear_model_samples}. The figure shows the real NRV values and the confidence intervals calculated based on 1000 full-day NRV samples. The mean of these samples is also plotted in the figure. The confidence intervals show the uncertainty of the NRV values. When the confidence interval is large, the model is not very certain about the NRV value and samples the NRV for that quarter with a high variance. The confidence intervals seen in the figure are quite narrow and do not always capture the real NRV value.
% TODO: explain more what we see in the figure, we see the small peaks every hour. Talk about the problem of autoregression (error propagation)
\begin{figure}[ht]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\caption{Sample 1}
\label{fig:autoregressive_linear_model_sample_1}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\caption{Sample 2}
\label{fig:autoregressive_linear_model_sample_2}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\caption{Sample 3}
\label{fig:autoregressive_linear_model_sample_3}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Sample 4}
\label{fig:autoregressive_linear_model_sample_4}
\end{subfigure}
\caption{Test examples of the autoregressive linear model. The plots show the confidence intervals calculated from 1000 generated full-day NRV samples.}
\label{fig:autoregressive_linear_model_samples}
\end{figure}
The linear model is a simple model and can be used as a baseline to compare the more complex models.
\newpage
\subsection{Diffusion}

View File

@@ -0,0 +1,56 @@
\relax
\providecommand\babel@aux[2]{}
\@nameuse{bbl@beforestart}
\abx@aux@refcontext{nyt/global//global/global}
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\babel@aux{english}{}
\@input{sections/introduction.aux}
\@input{sections/background.aux}
\abx@aux@refcontext{nyt/apasortcite//global/global}
\abx@aux@cite{0}{ho2020denoising}
\abx@aux@segm{0}{0}{ho2020denoising}
\@writefile{toc}{\contentsline {section}{\numberline {3}Literature Study}{8}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}TODO: Better title for this section}{8}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}NRV Prediction}{8}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Data}{8}{subsection.5.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Quantile Regression}{10}{subsection.5.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Example of quantiles}}{10}{figure.2}\protected@file@percent }
\newlabel{fig:quantile_example}{{2}{10}{Example of quantiles}{figure.2}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Example of quantile regression output for one-quarter of the NRV, showing interpolated values for quantiles at 1\%, 5\%, 10\%, 15\%, 30\%, 40\%, 50\%, 60\%, 70\%, 85\%, 90\%, 95\%, and 99\%. These quantiles are used to reconstruct the cumulative distribution function.}}{11}{figure.3}\protected@file@percent }
\newlabel{fig:quantile_regression_example}{{3}{11}{Example of quantile regression output for one-quarter of the NRV, showing interpolated values for quantiles at 1\%, 5\%, 10\%, 15\%, 30\%, 40\%, 50\%, 60\%, 70\%, 85\%, 90\%, 95\%, and 99\%. These quantiles are used to reconstruct the cumulative distribution function}{figure.3}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.1}Training}{12}{subsubsection.5.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.2}Evaluation}{13}{subsubsection.5.2.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Visualization of the CRPS metric}}{14}{figure.4}\protected@file@percent }
\newlabel{fig:crps_visualization}{{4}{14}{Visualization of the CRPS metric}{figure.4}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {5.2.3}Models}{14}{subsubsection.5.2.3}\protected@file@percent }
\newlabel{tab:autoregressive_linear_model_baseline_results}{{5.2.3}{15}{Models}{equation.5.6}{}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Autoregressive linear model results}}{15}{table.3}\protected@file@percent }
\newlabel{tab:autoregressive_linear_model_quarter_embedding_baseline_results}{{5.2.3}{16}{Models}{equation.5.7}{}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Autoregressive linear model results with time features}}{16}{table.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Diffusion}{18}{subsection.5.3}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Sample 1}}{19}{figure.5}\protected@file@percent }
\newlabel{fig:autoregressive_linear_model_sample_1}{{5}{19}{Sample 1}{figure.5}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Sample 2}}{19}{figure.6}\protected@file@percent }
\newlabel{fig:autoregressive_linear_model_sample_2}{{6}{19}{Sample 2}{figure.6}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Sample 3}}{19}{figure.7}\protected@file@percent }
\newlabel{fig:autoregressive_linear_model_sample_3}{{7}{19}{Sample 3}{figure.7}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Sample 4}}{19}{figure.8}\protected@file@percent }
\newlabel{fig:autoregressive_linear_model_sample_4}{{8}{19}{Sample 4}{figure.8}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Test examples of the autoregressive linear model. The plots show the confidence intervals calculated from 1000 generated full-day NRV samples.}}{19}{figure.9}\protected@file@percent }
\newlabel{fig:autoregressive_linear_model_samples}{{9}{19}{Test examples of the autoregressive linear model. The plots show the confidence intervals calculated from 1000 generated full-day NRV samples}{figure.9}{}}
\abx@aux@read@bbl@mdfivesum{nohash}
\abx@aux@read@bblrerun
\gdef \@abspage@last{20}

2873
Reports/Thesis/verslag.bcf Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
\BOOKMARK [1][-]{section.1}{\376\377\000I\000n\000t\000r\000o\000d\000u\000c\000t\000i\000o\000n}{}% 1
\BOOKMARK [1][-]{section.2}{\376\377\000B\000a\000c\000k\000g\000r\000o\000u\000n\000d}{}% 2
\BOOKMARK [2][-]{subsection.2.1}{\376\377\000E\000l\000e\000c\000t\000r\000i\000c\000i\000t\000y\000\040\000m\000a\000r\000k\000e\000t}{section.2}% 3
\BOOKMARK [2][-]{subsection.2.2}{\376\377\000G\000e\000n\000e\000r\000a\000t\000i\000v\000e\000\040\000m\000o\000d\000e\000l\000i\000n\000g}{section.2}% 4
\BOOKMARK [2][-]{subsection.2.3}{\376\377\000D\000i\000f\000f\000u\000s\000i\000o\000n\000\040\000m\000o\000d\000e\000l\000s}{section.2}% 5
\BOOKMARK [3][-]{subsubsection.2.3.1}{\376\377\000O\000v\000e\000r\000v\000i\000e\000w}{subsection.2.3}% 6
\BOOKMARK [3][-]{subsubsection.2.3.2}{\376\377\000A\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000s}{subsection.2.3}% 7
\BOOKMARK [3][-]{subsubsection.2.3.3}{\376\377\000G\000e\000n\000e\000r\000a\000t\000i\000o\000n\000\040\000p\000r\000o\000c\000e\000s\000s}{subsection.2.3}% 8
\BOOKMARK [1][-]{section.3}{\376\377\000L\000i\000t\000e\000r\000a\000t\000u\000r\000e\000\040\000S\000t\000u\000d\000y}{}% 9
\BOOKMARK [1][-]{section.4}{\376\377\000T\000O\000D\000O\000:\000\040\000B\000e\000t\000t\000e\000r\000\040\000t\000i\000t\000l\000e\000\040\000f\000o\000r\000\040\000t\000h\000i\000s\000\040\000s\000e\000c\000t\000i\000o\000n}{}% 10
\BOOKMARK [1][-]{section.5}{\376\377\000N\000R\000V\000\040\000P\000r\000e\000d\000i\000c\000t\000i\000o\000n}{}% 11
\BOOKMARK [2][-]{subsection.5.1}{\376\377\000D\000a\000t\000a}{section.5}% 12
\BOOKMARK [2][-]{subsection.5.2}{\376\377\000Q\000u\000a\000n\000t\000i\000l\000e\000\040\000R\000e\000g\000r\000e\000s\000s\000i\000o\000n}{section.5}% 13
\BOOKMARK [3][-]{subsubsection.5.2.1}{\376\377\000T\000r\000a\000i\000n\000i\000n\000g}{subsection.5.2}% 14
\BOOKMARK [3][-]{subsubsection.5.2.2}{\376\377\000E\000v\000a\000l\000u\000a\000t\000i\000o\000n}{subsection.5.2}% 15
\BOOKMARK [3][-]{subsubsection.5.2.3}{\376\377\000M\000o\000d\000e\000l\000s}{subsection.5.2}% 16
\BOOKMARK [2][-]{subsection.5.3}{\376\377\000D\000i\000f\000f\000u\000s\000i\000o\000n}{section.5}% 17

Binary file not shown.

Binary file not shown.

View File

@@ -46,7 +46,7 @@ class NrvDataset(Dataset):
if self.data_config.LOAD_HISTORY:
self.history_features.append("total_load")
if self.data_config.PV_HISTORY:
self.history_features.append("pv_gen_forecast")
self.history_features.append("pv_history")
if self.data_config.WIND_HISTORY:
self.history_features.append("wind_history")
if self.data_config.NOMINAL_NET_POSITION:
@@ -56,7 +56,7 @@ class NrvDataset(Dataset):
if self.data_config.LOAD_FORECAST:
self.forecast_features.append("load_forecast")
if self.data_config.PV_FORECAST:
self.forecast_features.append("pv_gen_forecast")
self.forecast_features.append("pv_forecast")
if self.data_config.WIND_FORECAST:
self.forecast_features.append("wind_forecast")
if self.data_config.NOMINAL_NET_POSITION:

View File

@@ -40,7 +40,7 @@ class DataConfig:
class DataProcessor:
def __init__(self, data_config: DataConfig, lstm: bool = False, path:str="./"):
def __init__(self, data_config: DataConfig, lstm: bool = False, path: str = "./"):
self.batch_size = 2048
self.path = path
self.lstm = lstm
@@ -55,20 +55,21 @@ class DataProcessor:
self.history_features = self.get_nrv_history()
self.future_features = self.get_load_forecast()
# self.pv_forecast = self.get_pv_forecast()
self.pv_forecast = self.get_pv_forecast()
self.wind_forecast = self.get_wind_forecast()
self.all_features = self.history_features.merge(
self.future_features, on="datetime", how="left"
)
# self.all_features = self.all_features.merge(
# self.pv_forecast, on="datetime", how="left"
# )
self.all_features = self.all_features.merge(
self.pv_forecast, on="datetime", how="left"
)
self.all_features = self.all_features.merge(
self.wind_forecast, on="datetime", how="left"
)
self.all_features = self.all_features.merge(
self.get_nominal_net_position(), on="datetime", how="left"
)
@@ -86,6 +87,7 @@ class DataProcessor:
self.nrv_scaler = MinMaxScaler(feature_range=(-1, 1))
self.load_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
self.pv_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
self.wind_forecast_scaler = MinMaxScaler(feature_range=(-1, 1))
self.nominal_net_position_scaler = MinMaxScaler(feature_range=(-1, 1))
@@ -151,10 +153,19 @@ class DataProcessor:
def get_pv_forecast(self):
df = pd.read_csv(self.path + pv_forecast_data_path, delimiter=";")
df = df[df["region"] == "Belgium"]
df = df.rename(
columns={"dayahead11hforecast": "pv_forecast", "Datetime": "datetime"}
columns={
"dayahead11hforecast": "pv_forecast",
"Datetime": "datetime",
"measured": "pv_history",
}
)
df = df[["datetime", "pv_forecast"]]
df = df[["datetime", "pv_forecast", "pv_history"]]
# replace nan by zero
df = df.fillna(0)
df = df.groupby("datetime").mean().reset_index()
df["datetime"] = pd.to_datetime(df["datetime"], utc=True)
@@ -165,7 +176,11 @@ class DataProcessor:
df = pd.read_csv(self.path + wind_forecast_data_path, delimiter=";")
df = df.rename(
columns={"measured": "wind_history", "dayaheadforecast": "wind_forecast", "datetime": "datetime"}
columns={
"measured": "wind_history",
"dayaheadforecast": "wind_forecast",
"datetime": "datetime",
}
)
df = df[["datetime", "wind_forecast", "wind_history"]]
@@ -198,8 +213,6 @@ class DataProcessor:
df = df.set_index("datetime").resample("15min").ffill().reset_index()
return df
def set_batch_size(self, batch_size: int):
self.batch_size = batch_size
@@ -233,15 +246,26 @@ class DataProcessor:
train_df["total_load"] = self.load_forecast_scaler.transform(
train_df["total_load"].values.reshape(-1, 1)
).reshape(-1)
train_df["pv_forecast"] = self.pv_forecast_scaler.fit_transform(
train_df["pv_forecast"].values.reshape(-1, 1)
).reshape(-1)
train_df["pv_history"] = self.pv_forecast_scaler.transform(
train_df["pv_history"].values.reshape(-1, 1)
).reshape(-1)
train_df["wind_forecast"] = self.wind_forecast_scaler.fit_transform(
train_df["wind_forecast"].values.reshape(-1, 1)
).reshape(-1)
train_df["wind_history"] = self.wind_forecast_scaler.transform(
train_df["wind_history"].values.reshape(-1, 1)
).reshape(-1)
train_df["nominal_net_position"] = self.nominal_net_position_scaler.fit_transform(
train_df["nominal_net_position"].values.reshape(-1, 1)
).reshape(-1)
train_df["nominal_net_position"] = (
self.nominal_net_position_scaler.fit_transform(
train_df["nominal_net_position"].values.reshape(-1, 1)
).reshape(-1)
)
train_dataset = NrvDataset(
train_df,
@@ -253,7 +277,10 @@ class DataProcessor:
return self.get_dataloader(train_dataset, shuffle=shuffle)
def get_test_dataloader(
self, transform: bool = True, predict_sequence_length: int = 96, full_day_skip: bool = False
self,
transform: bool = True,
predict_sequence_length: int = 96,
full_day_skip: bool = False,
):
test_df = self.all_features.copy()
@@ -273,16 +300,26 @@ class DataProcessor:
test_df["total_load"] = self.load_forecast_scaler.transform(
test_df["total_load"].values.reshape(-1, 1)
).reshape(-1)
test_df["pv_forecast"] = self.pv_forecast_scaler.transform(
test_df["pv_forecast"].values.reshape(-1, 1)
).reshape(-1)
test_df["pv_history"] = self.pv_forecast_scaler.transform(
test_df["pv_history"].values.reshape(-1, 1)
).reshape(-1)
test_df["wind_forecast"] = self.wind_forecast_scaler.transform(
test_df["wind_forecast"].values.reshape(-1, 1)
).reshape(-1)
test_df["wind_history"] = self.wind_forecast_scaler.transform(
test_df["wind_history"].values.reshape(-1, 1)
).reshape(-1)
test_df["nominal_net_position"] = self.nominal_net_position_scaler.transform(
test_df["nominal_net_position"].values.reshape(-1, 1)
).reshape(-1)
test_df["nominal_net_position"] = (
self.nominal_net_position_scaler.transform(
test_df["nominal_net_position"].values.reshape(-1, 1)
).reshape(-1)
)
test_dataset = NrvDataset(
test_df,
@@ -294,12 +331,17 @@ class DataProcessor:
return self.get_dataloader(test_dataset, shuffle=False)
def get_dataloaders(
self, transform: bool = True, predict_sequence_length: int = 96, full_day_skip: bool = False
self,
transform: bool = True,
predict_sequence_length: int = 96,
full_day_skip: bool = False,
):
return self.get_train_dataloader(
transform=transform, predict_sequence_length=predict_sequence_length
), self.get_test_dataloader(
transform=transform, predict_sequence_length=predict_sequence_length, full_day_skip=full_day_skip
transform=transform,
predict_sequence_length=predict_sequence_length,
full_day_skip=full_day_skip,
)
def inverse_transform(self, input_data):
@@ -338,7 +380,7 @@ class DataProcessor:
time_feature_size *= 96
if self.data_config.DAY_OF_WEEK:
time_feature_size *= 7
if time_feature_size == 1:
return 0
return time_feature_size

View File

@@ -1,5 +1,7 @@
from torch import nn
import torch
import numpy as np
class TimeEmbedding(nn.Module):
def __init__(self, time_features: int, embedding_dim: int):
@@ -17,9 +19,10 @@ class TimeEmbedding(nn.Module):
# Embed these time features
embedded_time = self.embedding(time_feature)
# Concatenate the embedded features with the original input (minus the last 'time feature')
return torch.cat((x[..., :-1], embedded_time), dim=-1) # Use -1 to specify the last dimension
return torch.cat(
(x[..., :-1], embedded_time), dim=-1
) # Use -1 to specify the last dimension
def output_dim(self, input_dim):
if self.time_features == 0:
return input_dim
@@ -30,3 +33,32 @@ class TimeEmbedding(nn.Module):
# Convert the list back to a torch.Size object
output_dim = torch.Size(input_dim_list)
return output_dim
class TrigonometricTimeEmbedding(nn.Module):
def __init__(self, time_features: int):
super().__init__()
self.time_features = time_features
def forward(self, x):
if self.time_features == 0:
return x
time_feature = x[..., -1] # Use ellipsis to access the last dimension
time_feature = time_feature.int()
# Calculate the sine and cosine of the time feature
sin_time = torch.sin(2 * np.pi * time_feature.float() / self.time_features)
cos_time = torch.cos(2 * np.pi * time_feature.float() / self.time_features)
# Stack the sine and cosine features
time_embedding = torch.stack((sin_time, cos_time), dim=-1)
# Concatenate the embedded features with the original input (minus the last 'time feature')
return torch.cat(
(x[..., :-1], time_embedding), dim=-1
) # Use -1 to specify the last dimension
def output_dim(self, input_dim):
if self.time_features == 0:
return input_dim
input_dim_list = list(input_dim)
input_dim_list[-1] = input_dim_list[-1] - 1 + 2
output_dim = torch.Size(input_dim_list)
return output_dim

View File

@@ -262,13 +262,11 @@ class Trainer:
self.model.eval()
# set full day skip
_, test_loader = self.data_processor.get_dataloaders(
train_loader, test_loader = self.data_processor.get_dataloaders(
predict_sequence_length=self.model.output_size
)
# if not hasattr(self, "plot_quantile_percentages"):
# self.log_final_metrics(task, train_loader, train=True)
self.log_final_metrics(task, train_loader, train=True)
self.log_final_metrics(task, test_loader, train=False)
def test(self, test_loader: torch.utils.data.DataLoader):

View File

@@ -2,7 +2,7 @@ from src.utils.clearml import ClearMLHelper
#### ClearML ####
clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast")
task = clearml_helper.get_task(task_name="AQR: Linear Baseline")
task = clearml_helper.get_task(task_name="AQR: Linear Baseline + Quarter Trigonometric")
task.execute_remotely(queue_name="default", exit_process=True)
from src.policies.PolicyEvaluator import PolicyEvaluator
@@ -20,7 +20,7 @@ from src.losses import *
import torch
from torch.nn import MSELoss, L1Loss
import torch.nn as nn
from src.models.time_embedding_layer import TimeEmbedding
from src.models.time_embedding_layer import TimeEmbedding, TrigonometricTimeEmbedding
#### Data Processor ####
@@ -30,18 +30,21 @@ data_config.NRV_HISTORY = True
data_config.LOAD_HISTORY = True
data_config.LOAD_FORECAST = True
data_config.WIND_FORECAST = False
data_config.WIND_HISTORY = False
data_config.WIND_FORECAST = True
data_config.WIND_HISTORY = True
data_config.QUARTER = False
data_config.PV_FORECAST = True
data_config.PV_HISTORY = True
data_config.QUARTER = True
data_config.DAY_OF_WEEK = False
data_config.NOMINAL_NET_POSITION = False
data_config.NOMINAL_NET_POSITION = True
data_config = task.connect(data_config, name="data_features")
data_processor = DataProcessor(data_config, path="", lstm=True)
data_processor = DataProcessor(data_config, path="", lstm=False)
data_processor.set_batch_size(512)
data_processor.set_full_day_skip(False)
@@ -67,7 +70,7 @@ model_parameters = {
"hidden_size": 256,
"num_layers": 2,
"dropout": 0.2,
"time_feature_embedding": 8,
"time_feature_embedding": 2,
}
model_parameters = task.connect(model_parameters, name="model_parameters")
@@ -76,6 +79,8 @@ model_parameters = task.connect(model_parameters, name="model_parameters")
# data_processor.get_time_feature_size(), model_parameters["time_feature_embedding"]
# )
time_embedding = TrigonometricTimeEmbedding(data_processor.get_time_feature_size())
# lstm_model = GRUModel(
# time_embedding.output_dim(inputDim),
# len(quantiles),
@@ -92,11 +97,11 @@ model_parameters = task.connect(model_parameters, name="model_parameters")
# dropout=model_parameters["dropout"],
# )
# linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
linear_model = LinearRegression(inputDim, len(quantiles))
linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles))
# linear_model = LinearRegression(inputDim, len(quantiles))
# model = nn.Sequential(time_embedding, lstm_model)
model = linear_model
model = nn.Sequential(time_embedding, linear_model)
# model = linear_model
model.output_size = 1
optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"])
@@ -121,7 +126,7 @@ trainer.add_metrics_to_track(
[PinballLoss(quantiles), MSELoss(), L1Loss(), CRPSLoss(quantiles)]
)
trainer.early_stopping(patience=5)
trainer.plot_every(2)
trainer.plot_every(15)
trainer.train(task=task, epochs=epochs, remotely=True)
### Policy Evaluation ###