diff --git a/Reports/Thesis/sections/results/linear.tex b/Reports/Thesis/sections/results/linear.tex index c8d4bc1..02dcd75 100644 --- a/Reports/Thesis/sections/results/linear.tex +++ b/Reports/Thesis/sections/results/linear.tex @@ -1,4 +1,4 @@ -\subsubsection{Linear Model} +\subsection{Linear Model} % TODO: explainedi nsection reference ? The simplest model to be trained for the NRV modeling is the linear model. The linear model is trained using the pinball loss function explained in the section above. The outputs of the model are values for the chosen quantiles. The linear model can be trained in an autoregressive and non-autoregressive way. Both methods will be compared to each other. The linear model is trained using the Adam optimizer with a learning rate of 1e-4. Early stopping is used with a patience of 5 epochs. The linear model is evaluated using the mean squared error (MSE), mean absolute error (MAE), and continuous ranked probability score (CRPS). The influence of the input features is also evaluated by training the models with different input feature sets. diff --git a/Reports/Thesis/sections/results/non-linear.tex b/Reports/Thesis/sections/results/non-linear.tex index 9673ee6..d67420c 100644 --- a/Reports/Thesis/sections/results/non-linear.tex +++ b/Reports/Thesis/sections/results/non-linear.tex @@ -1,4 +1,4 @@ -\subsubsection{Non-Linear Model} +\subsection{Non-Linear Model} Adding nonlinearity to the model can be done by adding some non-linear activations between linear layers. This improves the model's ability to learn more complex patterns in the data. The model is trained the same way as the linear model for quantile regression using the pinball loss. Because a non-linear model is more complex, it is more prone to overfitting the training data. Because of this, dropout layers are added to the model to prevent overfitting. The architecture of the non-linear model is illustrated in Table \ref{tab:non_linear_model_architecture}. The autoregressive model begins with an input layer that converts the quarter of the day into an embedding. This layer concatenates the other input features with the quarter embedding. These combined features are then processed through a sequence of layers: @@ -45,21 +45,21 @@ While this non-linear model is still quite simple, it offers the flexibility in & & & Train & Test & Train & Test & Train & Test \\ \midrule NRV & & & & & & & & \\ - & 2 & 256 & 32982.64 & 38117.43 & 138.92 & 147.55 & 82.10 & 86.42 \\ - & 4 & 256 & 33317.10 & 37817.78 & 139.42 & 146.90 & 82.17 & 85.63 \\ - & 8 & 256 & 32727.90 & 36346.57 & 139.21 & 144.80 & 81.86 & 84.51 \\ - & 16 & 256 & 35076.57 & 38624.83 & 143.28 & 148.61 & 84.70 & 87.05 \\ + & 2 & 256 & 38117.43 & 41574.38 & 147.55 & 153.83 & 86.42 & 75.61 \\ + & 4 & 256 & 37817.78 & 40200.92 & 146.90 & 152.00 & 85.63 & 74.37 \\ + & 8 & 256 & 36346.57 & 38746.81 & 144.80 & 148.82 & 84.51 & 74.55 \\ + & 16 & 256 & 38624.83 & 39328.47 & 148.61 & 149.19 & 87.05 & 75.38 \\ \midrule NRV + Load + PV\\ + Wind & & & & & & & & \\ - & 2 & 256 & 28860.10 & 42983.21 & 130.46 & 156.65 & 75.47 & 92.15 \\ + & 2 & 256 & 42983.21 & 42950.17 & 156.65 & 156.88 & 92.15 & 76.21 \\ \midrule NRV + Load + PV\\ + Wind + Net Position\\ + QE (dim 5) & & & & & & & & \\ - & 2 & 256 & 25064.82 & 37785.49 & 121.45 & 146.99 & 70.47 & 85.22 \\ - & 4 & 256 & 24333.62 & 34232.57 & 119.16 & 139.78 & 68.60 & 80.14 \\ - & 8 & 256 & 26399.20 & \textbf{32447.41} & 124.75 & \textbf{137.24} & 72.07 & \textbf{79.22} \\ - & 2 & 512 & 28608.20 & 44281.20 & 12x9.41 & 158.63 & 75.54 & 91.82 \\ - & 4 & 512 & 24564.89 & 34839.79 & 119.74 & 140.67 & 69.02 & 80.21 \\ - & 8 & 512 & 24523.61 & 34925.46 & 119.90 & 141.11 & 69.26 & 81.11 \\ + & 2 & 256 & 37785.49 & 42828.61 & 146.99 & 157.03 & 85.22 & 76.36 \\ + & 4 & 256 & 34232.57 & 42588.16 & 139.78 & 157.20 & 80.14 & 73.75 \\ + & 8 & 256 & \textbf{32447.41} & 40541.92 & \textbf{137.24} & 151.60 & \textbf{79.22} & 75.52 \\ + & 2 & 512 & 44281.20 & 44018.79 & 158.63 & 159.06 & 91.82 & 77.99 \\ + & 4 & 512 & 34839.79 & 41999.79 & 140.67 & 154.86 & 80.21 & 75.70 \\ + & 8 & 512 & 34925.46 & 39774.38 & 141.11 & 150.62 & 81.11 & 74.67 \\ \bottomrule \end{tabular} diff --git a/Reports/Thesis/verslag.aux b/Reports/Thesis/verslag.aux index 8128e54..5fac949 100644 --- a/Reports/Thesis/verslag.aux +++ b/Reports/Thesis/verslag.aux @@ -47,7 +47,7 @@ \abx@aux@page{5}{19} \@writefile{toc}{\contentsline {section}{\numberline {6}Results \& Discussion}{20}{section.6}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {6.1}Data}{20}{subsection.6.1}\protected@file@percent } -\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.1.1}Linear Model}{21}{subsubsection.6.1.1}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Linear Model}{21}{subsection.6.2}\protected@file@percent } \@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Linear model results\relax }}{22}{table.caption.9}\protected@file@percent } \newlabel{tab:linear_model_baseline_results}{{3}{22}{Linear model results\relax }{table.caption.9}{}} \@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Mean and standard deviation of the NRV values over the quarter of the day\relax }}{23}{figure.caption.10}\protected@file@percent } @@ -60,19 +60,19 @@ \newlabel{fig:linear_model_samples_comparison}{{8}{26}{Samples for two examples from the test set for the autoregressive and non-autoregressive linear model. The real NRV is shown in orange.\relax }{figure.caption.13}{}} \@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Over/underestimation of the quantiles for the autoregressive and non-autoregressive linear models. Both the quantile performance for the training and test set are shown. The plots are generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding (only for the autoregressive model).\relax }}{27}{figure.caption.14}\protected@file@percent } \newlabel{fig:linear_model_quantile_over_underestimation}{{9}{27}{Over/underestimation of the quantiles for the autoregressive and non-autoregressive linear models. Both the quantile performance for the training and test set are shown. The plots are generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding (only for the autoregressive model).\relax }{figure.caption.14}{}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.1.2}Non-Linear Model}{27}{subsubsection.6.1.2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Non-Linear Model}{27}{subsection.6.3}\protected@file@percent } \@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Non-linear Quantile Regression Model Architecture\relax }}{28}{table.caption.15}\protected@file@percent } \newlabel{tab:non_linear_model_architecture}{{5}{28}{Non-linear Quantile Regression Model Architecture\relax }{table.caption.15}{}} \@writefile{lot}{\contentsline {table}{\numberline {6}{\ignorespaces Autoregressive non-linear quantile regression model results. All the models used a dropout of 0.2 .\relax }}{29}{table.caption.16}\protected@file@percent } \newlabel{tab:non_linear_model_results}{{6}{29}{Autoregressive non-linear quantile regression model results. All the models used a dropout of 0.2 .\relax }{table.caption.16}{}} \@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Comparison for examples from test set between the autoregressive linear and non-linear models. The plots show the confidence intervals calculated from 1000 generated full-day NRV samples. The samples were generated using input features NRV, Load, Wind, PV, Net Position and the quarter embedding. The non-linear model used 8 layers with a hidden size of 256 and a dropout rate of 0.2.\relax }}{30}{figure.caption.17}\protected@file@percent } \newlabel{fig:linear_non_linear_sample_comparison}{{10}{30}{Comparison for examples from test set between the autoregressive linear and non-linear models. The plots show the confidence intervals calculated from 1000 generated full-day NRV samples. The samples were generated using input features NRV, Load, Wind, PV, Net Position and the quarter embedding. The non-linear model used 8 layers with a hidden size of 256 and a dropout rate of 0.2.\relax }{figure.caption.17}{}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.1.3}GRU Model}{30}{subsubsection.6.1.3}\protected@file@percent } +\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.3.1}GRU Model}{30}{subsubsection.6.3.1}\protected@file@percent } \@writefile{lot}{\contentsline {table}{\numberline {7}{\ignorespaces GRU Model Architecture\relax }}{31}{table.caption.18}\protected@file@percent } \newlabel{tab:gru_model_architecture}{{7}{31}{GRU Model Architecture\relax }{table.caption.18}{}} \@writefile{lot}{\contentsline {table}{\numberline {8}{\ignorespaces Autoregressive GRU quantile regression model results. All the models used a dropout of 0.2 .\relax }}{32}{table.caption.19}\protected@file@percent } \newlabel{tab:autoregressive_gru_model_results}{{8}{32}{Autoregressive GRU quantile regression model results. All the models used a dropout of 0.2 .\relax }{table.caption.19}{}} -\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Diffusion}{33}{subsection.6.2}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {6.4}Diffusion}{33}{subsection.6.4}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {7}Policies for battery optimization}{33}{section.7}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {7.1}Baselines}{33}{subsection.7.1}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {7.2}Policies using NRV predictions}{33}{subsection.7.2}\protected@file@percent } diff --git a/Reports/Thesis/verslag.log b/Reports/Thesis/verslag.log index 78a23d8..2f04ce9 100644 --- a/Reports/Thesis/verslag.log +++ b/Reports/Thesis/verslag.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.17) 4 MAY 2024 16:47 +This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.17) 5 MAY 2024 02:15 entering extended mode restricted \write18 enabled. file:line:error style messages enabled. @@ -1411,7 +1411,7 @@ Underfull \hbox (badness 10000) in paragraph at lines 168--176 LaTeX Warning: There were undefined references. Package rerunfilecheck Info: File `verslag.out' has not changed. -(rerunfilecheck) Checksum: F09B5AD300DAC40E6FB795819625DD24;4349. +(rerunfilecheck) Checksum: 69591E981E96AB0B0521BC78DAAADEA0;4329. Package biblatex Warning: Please (re)run Biber on the file: (biblatex) verslag @@ -1423,14 +1423,14 @@ Package logreq Info: Writing requests to 'verslag.run.xml'. ) Here is how much of TeX's memory you used: 26915 strings out of 476025 - 492086 string characters out of 5790017 + 492066 string characters out of 5790017 1882388 words of memory out of 5000000 46809 multiletter control sequences out of 15000+600000 603223 words of font info for 88 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 83i,16n,131p,2100b,5180s stack positions out of 10000i,1000n,20000p,200000b,200000s -Output written on verslag.pdf (35 pages, 5303237 bytes). +Output written on verslag.pdf (35 pages, 5303263 bytes). PDF statistics: 508 PDF objects out of 1000 (max. 8388607) 395 compressed objects within 4 object streams diff --git a/Reports/Thesis/verslag.out b/Reports/Thesis/verslag.out index 825ee8e..0829f4e 100644 --- a/Reports/Thesis/verslag.out +++ b/Reports/Thesis/verslag.out @@ -20,10 +20,10 @@ \BOOKMARK [2][-]{subsection.5.2}{\376\377\000P\000o\000l\000i\000c\000i\000e\000s\000\040\000f\000o\000r\000\040\000B\000a\000t\000t\000e\000r\000y\000\040\000O\000p\000t\000i\000m\000i\000z\000a\000t\000i\000o\000n}{section.5}% 20 \BOOKMARK [1][-]{section.6}{\376\377\000R\000e\000s\000u\000l\000t\000s\000\040\000\046\000\040\000D\000i\000s\000c\000u\000s\000s\000i\000o\000n}{}% 21 \BOOKMARK [2][-]{subsection.6.1}{\376\377\000D\000a\000t\000a}{section.6}% 22 -\BOOKMARK [3][-]{subsubsection.6.1.1}{\376\377\000L\000i\000n\000e\000a\000r\000\040\000M\000o\000d\000e\000l}{subsection.6.1}% 23 -\BOOKMARK [3][-]{subsubsection.6.1.2}{\376\377\000N\000o\000n\000-\000L\000i\000n\000e\000a\000r\000\040\000M\000o\000d\000e\000l}{subsection.6.1}% 24 -\BOOKMARK [3][-]{subsubsection.6.1.3}{\376\377\000G\000R\000U\000\040\000M\000o\000d\000e\000l}{subsection.6.1}% 25 -\BOOKMARK [2][-]{subsection.6.2}{\376\377\000D\000i\000f\000f\000u\000s\000i\000o\000n}{section.6}% 26 +\BOOKMARK [2][-]{subsection.6.2}{\376\377\000L\000i\000n\000e\000a\000r\000\040\000M\000o\000d\000e\000l}{section.6}% 23 +\BOOKMARK [2][-]{subsection.6.3}{\376\377\000N\000o\000n\000-\000L\000i\000n\000e\000a\000r\000\040\000M\000o\000d\000e\000l}{section.6}% 24 +\BOOKMARK [3][-]{subsubsection.6.3.1}{\376\377\000G\000R\000U\000\040\000M\000o\000d\000e\000l}{subsection.6.3}% 25 +\BOOKMARK [2][-]{subsection.6.4}{\376\377\000D\000i\000f\000f\000u\000s\000i\000o\000n}{section.6}% 26 \BOOKMARK [1][-]{section.7}{\376\377\000P\000o\000l\000i\000c\000i\000e\000s\000\040\000f\000o\000r\000\040\000b\000a\000t\000t\000e\000r\000y\000\040\000o\000p\000t\000i\000m\000i\000z\000a\000t\000i\000o\000n}{}% 27 \BOOKMARK [2][-]{subsection.7.1}{\376\377\000B\000a\000s\000e\000l\000i\000n\000e\000s}{section.7}% 28 \BOOKMARK [2][-]{subsection.7.2}{\376\377\000P\000o\000l\000i\000c\000i\000e\000s\000\040\000u\000s\000i\000n\000g\000\040\000N\000R\000V\000\040\000p\000r\000e\000d\000i\000c\000t\000i\000o\000n\000s}{section.7}% 29 diff --git a/Reports/Thesis/verslag.pdf b/Reports/Thesis/verslag.pdf index b6d46dc..f109d49 100644 Binary files a/Reports/Thesis/verslag.pdf and b/Reports/Thesis/verslag.pdf differ diff --git a/Reports/Thesis/verslag.synctex.gz b/Reports/Thesis/verslag.synctex.gz index ce17fbf..aaadf5e 100644 Binary files a/Reports/Thesis/verslag.synctex.gz and b/Reports/Thesis/verslag.synctex.gz differ diff --git a/Reports/Thesis/verslag.toc b/Reports/Thesis/verslag.toc index 2ab34d0..fa37762 100644 --- a/Reports/Thesis/verslag.toc +++ b/Reports/Thesis/verslag.toc @@ -21,10 +21,10 @@ \contentsline {subsection}{\numberline {5.2}Policies for Battery Optimization}{19}{subsection.5.2}% \contentsline {section}{\numberline {6}Results \& Discussion}{20}{section.6}% \contentsline {subsection}{\numberline {6.1}Data}{20}{subsection.6.1}% -\contentsline {subsubsection}{\numberline {6.1.1}Linear Model}{21}{subsubsection.6.1.1}% -\contentsline {subsubsection}{\numberline {6.1.2}Non-Linear Model}{27}{subsubsection.6.1.2}% -\contentsline {subsubsection}{\numberline {6.1.3}GRU Model}{30}{subsubsection.6.1.3}% -\contentsline {subsection}{\numberline {6.2}Diffusion}{33}{subsection.6.2}% +\contentsline {subsection}{\numberline {6.2}Linear Model}{21}{subsection.6.2}% +\contentsline {subsection}{\numberline {6.3}Non-Linear Model}{27}{subsection.6.3}% +\contentsline {subsubsection}{\numberline {6.3.1}GRU Model}{30}{subsubsection.6.3.1}% +\contentsline {subsection}{\numberline {6.4}Diffusion}{33}{subsection.6.4}% \contentsline {section}{\numberline {7}Policies for battery optimization}{33}{section.7}% \contentsline {subsection}{\numberline {7.1}Baselines}{33}{subsection.7.1}% \contentsline {subsection}{\numberline {7.2}Policies using NRV predictions}{33}{subsection.7.2}% diff --git a/src/training_scripts/autoregressive_quantiles.py b/src/training_scripts/autoregressive_quantiles.py index 7580344..30762a5 100644 --- a/src/training_scripts/autoregressive_quantiles.py +++ b/src/training_scripts/autoregressive_quantiles.py @@ -2,9 +2,7 @@ from src.utils.clearml import ClearMLHelper #### ClearML #### clearml_helper = ClearMLHelper(project_name="Thesis/NrvForecast") -task = clearml_helper.get_task( - task_name="AQR: GRU (8 - 512) + Load + PV + Wind + NP + QE (dim 5)" -) +task = clearml_helper.get_task(task_name="AQR: Linear + QE (dim 2)") # task.execute_remotely(queue_name="default", exit_process=True) from src.policies.PolicyEvaluator import PolicyEvaluator @@ -29,24 +27,24 @@ data_config = DataConfig() data_config.NRV_HISTORY = True -data_config.LOAD_HISTORY = True -data_config.LOAD_FORECAST = True +data_config.LOAD_HISTORY = False +data_config.LOAD_FORECAST = False -data_config.WIND_FORECAST = True -data_config.WIND_HISTORY = True +data_config.WIND_FORECAST = False +data_config.WIND_HISTORY = False -data_config.PV_FORECAST = True -data_config.PV_HISTORY = True +data_config.PV_FORECAST = False +data_config.PV_HISTORY = False data_config.QUARTER = True data_config.DAY_OF_WEEK = False -data_config.NOMINAL_NET_POSITION = True +data_config.NOMINAL_NET_POSITION = False data_config = task.connect(data_config, name="data_features") -data_processor = DataProcessor(data_config, path="", lstm=True) +data_processor = DataProcessor(data_config, path="", lstm=False) data_processor.set_batch_size(512) data_processor.set_full_day_skip(False) @@ -72,7 +70,7 @@ model_parameters = { "hidden_size": 512, "num_layers": 8, "dropout": 0.2, - "time_feature_embedding": 5, + "time_feature_embedding": 2, } model_parameters = task.connect(model_parameters, name="model_parameters") @@ -83,13 +81,13 @@ time_embedding = TimeEmbedding( # time_embedding = TrigonometricTimeEmbedding(data_processor.get_time_feature_size()) -lstm_model = GRUModel( - time_embedding.output_dim(inputDim), - len(quantiles), - hidden_size=model_parameters["hidden_size"], - num_layers=model_parameters["num_layers"], - dropout=model_parameters["dropout"], -) +# lstm_model = GRUModel( +# time_embedding.output_dim(inputDim), +# len(quantiles), +# hidden_size=model_parameters["hidden_size"], +# num_layers=model_parameters["num_layers"], +# dropout=model_parameters["dropout"], +# ) # non_linear_model = NonLinearRegression( # time_embedding.output_dim(inputDim), @@ -99,9 +97,9 @@ lstm_model = GRUModel( # dropout=model_parameters["dropout"], # ) -# linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) +linear_model = LinearRegression(time_embedding.output_dim(inputDim), len(quantiles)) -model = nn.Sequential(time_embedding, lstm_model) +model = nn.Sequential(time_embedding, linear_model) model.output_size = 1 optimizer = torch.optim.Adam(model.parameters(), lr=model_parameters["learning_rate"]) diff --git a/src/training_scripts/non_autoregressive_quantiles.py b/src/training_scripts/non_autoregressive_quantiles.py index e96a7c1..b2afd2e 100644 --- a/src/training_scripts/non_autoregressive_quantiles.py +++ b/src/training_scripts/non_autoregressive_quantiles.py @@ -2,7 +2,9 @@ from src.utils.clearml import ClearMLHelper #### ClearML #### clearml_helper = ClearMLHelper(project_name="Thesis/NAQR: Non-Linear") -task = clearml_helper.get_task(task_name="NAQR: Non-Linear (2 - 256)") +task = clearml_helper.get_task( + task_name="NAQR: Non-Linear (8 - 512) + NRV + LOAD + PV + WIND + NP" +) task.execute_remotely(queue_name="default", exit_process=True) from src.policies.PolicyEvaluator import PolicyEvaluator @@ -27,16 +29,16 @@ from src.models.time_embedding_layer import TimeEmbedding data_config = DataConfig() data_config.NRV_HISTORY = True -data_config.LOAD_HISTORY = False -data_config.LOAD_FORECAST = False +data_config.LOAD_HISTORY = True +data_config.LOAD_FORECAST = True -data_config.WIND_FORECAST = False +data_config.WIND_FORECAST = True data_config.WIND_HISTORY = True -data_config.PV_FORECAST = False -data_config.PV_HISTORY = False +data_config.PV_FORECAST = True +data_config.PV_HISTORY = True -data_config.NOMINAL_NET_POSITION = False +data_config.NOMINAL_NET_POSITION = True data_config = task.connect(data_config, name="data_features") @@ -64,8 +66,8 @@ else: model_parameters = { "learning_rate": 0.0001, - "hidden_size": 256, - "num_layers": 2, + "hidden_size": 512, + "num_layers": 8, "dropout": 0.2, }