Compare commits

57 Commits

Author SHA1 Message Date
db8a527949 Updated thesis 2024-05-11 02:10:35 +02:00
70a57eacb6 Updated thesis 2024-05-11 02:09:17 +02:00
934d4951ff Updated thesis 2024-05-11 02:06:07 +02:00
1b2b3518e2 Updated thesis 2024-05-10 15:52:57 +02:00
0bcaa2f63f Updated thesis 2024-05-10 00:29:12 +02:00
Victor Mylle
2db604d2da Updated thesis 2024-05-09 20:46:54 +00:00
fa03debf86 Updated thesis 2024-05-09 13:59:18 +02:00
907f62d9cd Updated thesis 2024-05-09 13:25:22 +02:00
4c4914e227 Merge branch 'main' of https://git.victormylle.be/VictorMylle/Thesis 2024-05-08 17:56:59 +02:00
8a2e1ce7d5 Worked further on thesis 2024-05-08 17:53:19 +02:00
Victor Mylle
3fba166dc5 Wrote more about workings of diffusion models and fixed intermediate samples 2024-05-07 22:04:34 +00:00
Victor Mylle
d9b6f34e97 Added GRU results to thesis + intermediate samples of diffusion model 2024-05-06 23:28:42 +00:00
d7f4c1849b Non autregressive gru model load 2024-05-06 16:11:15 +02:00
19ab597ae6 Non linear results section done 2024-05-06 14:23:10 +02:00
177fa1ad86 Added non-autoregresive non-linear results to thesis 2024-05-05 02:17:04 +02:00
75b35bb2c9 Non-linear non autoregressive experiments 2024-05-04 16:51:26 +02:00
e0c42797e0 Updated thesis and added quantile performance plots to non autoregressive quantiles 2024-05-04 14:13:37 +02:00
074e654b8a Updated thesis 2024-05-03 18:13:01 +02:00
2597577d3d Other changes 2024-04-25 14:09:09 +02:00
361414cd41 Updated thesis 2024-04-25 14:08:43 +02:00
c7bcd5be55 Updated diffusion section of thesis + adding metrics to diffusion experiments 2024-04-24 22:06:30 +02:00
f691ab384b Increased patience for AQR 2024-04-23 21:02:40 +02:00
12bff03d69 Started writing about GRU model 2024-04-22 15:54:25 +02:00
ac08707369 Adding intermediate table with non linear model results 2024-04-20 18:49:26 +02:00
3a40959a32 Wrote more about non-autoregressive linear quantile regression 2024-04-19 23:28:08 +02:00
e0fbf54347 Updated non autoregressive images in thesis 2024-04-19 16:19:18 +02:00
afa70fc3b3 Trying to reduce white space for saved matplotlib images 2024-04-19 16:00:37 +02:00
2680973baf Added non autoregressive examples to thesis 2024-04-19 15:28:20 +02:00
0817f60e72 Fixed issue with clearml report image 2024-04-19 15:09:55 +02:00
2cdd2257a0 Fixed some accidental mistake xs 2024-04-19 14:09:23 +02:00
46c7c6f7e5 Saving samples plot as png at end of training 2024-04-19 14:05:20 +02:00
4e713ef564 Added non autoregressive quantile results + changing sample plots 2024-04-19 12:35:27 +02:00
Victor Mylle
98a7244995 Fixed the non autoregressive final metric calculations 2024-04-18 16:53:17 +00:00
dc102926fa Non-autoregressive Linear baseline update + wrote further at thesis 2024-04-18 00:30:25 +02:00
8fb2a7fc7b Quarter embedding using trigonometry + more thesis writing 2024-04-17 21:48:13 +02:00
6b02c9aab8 Trying out more linear baselines 2024-04-17 12:55:46 +02:00
0edcc91e65 Made more changes 2024-04-16 22:07:53 +02:00
937b6abc0b Updated Thesis and linear baseline 2024-04-16 21:19:19 +02:00
ef094c659c Added baseline with perfect predictions 2024-03-28 14:56:28 +01:00
65ec8fcd54 Not resetting state of charge 2024-03-23 19:18:55 +01:00
e780b46af7 Updated some stuff 2024-03-20 22:16:19 +01:00
dad64d00be Updated some stuff 2024-03-20 22:14:18 +01:00
acaa8ff054 Added non autoregressive quantiles training scripts 2024-03-20 16:59:22 +01:00
ba3b3cf882 Added background information about Electricty market in Belgium 2024-03-20 16:48:07 +01:00
1a8e735cbc Updated training scripts 2024-03-18 12:15:06 +01:00
34335cd9fe Fixed policy evaluation for autoregressive 2024-02-29 23:23:11 +01:00
fe1e388ffb Added crps + profit logging and updated plots for non autoregressive models 2024-02-28 17:12:51 +01:00
420c9dc6ac Added yesterday policy evaluator 2024-02-26 18:21:06 +01:00
ca120e5715 Finished baseline policy evaluator 2024-02-26 18:20:53 +01:00
be38536758 Adding baseline policy evaluator 2024-02-26 16:26:03 +01:00
f1b54df2c9 Policy evaluation during training 2024-02-25 22:13:00 +01:00
Victor Mylle
90751866a4 Fixed git lfs issue 2024-02-22 16:52:03 +01:00
Victor Mylle
4ad3336b98 Set training script to execute remotely 2024-02-21 18:13:51 +01:00
Victor Mylle
f8823f7efa Autoregressive Quantile Training with Policy evaluation 2024-02-21 18:11:38 +01:00
Victor Mylle
2b22b6935e Merge branch 'February-Report' into main 2024-02-19 15:49:15 +01:00
Victor Mylle
174a82fab2 Plots to compare between quantile regression and diffusion 2024-02-18 19:21:59 +01:00
Victor Mylle
bd250a664b Fixed diffusion confidence interval plot 2024-02-18 16:01:18 +01:00
153 changed files with 8983 additions and 1686 deletions

1
.gitattributes vendored
View File

@@ -1 +0,0 @@
*.csv filter=lfs diff=lfs merge=lfs -text

View File

@@ -2,6 +2,7 @@ FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
RUN apt-get update
RUN apt-get install -y git
# RUN apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-bibtex-extra
COPY requirements.txt /tmp/requirements.txt

View File

@@ -1,31 +0,0 @@
\relax
\providecommand\babel@aux[2]{}
\@nameuse{bbl@beforestart}
\abx@aux@refcontext{nyt/global//global/global}
\providecommand\hyper@newdestlabel[2]{}
\providecommand\HyperFirstAtBeginDocument{\AtBeginDocument}
\HyperFirstAtBeginDocument{\ifx\hyper@anchor\@undefined
\global\let\oldnewlabel\newlabel
\gdef\newlabel#1#2{\newlabelxx{#1}#2}
\gdef\newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\ifx\hyper@anchor\@undefined
\let\newlabel\oldnewlabel
\fi}
\fi}
\global\let\hyper@last\relax
\gdef\HyperFirstAtBeginDocument#1{#1}
\providecommand\HyField@AuxAddToFields[1]{}
\providecommand\HyField@AuxAddToCoFields[2]{}
\babel@aux{english}{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Intermediate Results}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Previous day as forecast}{1}{subsection.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}All Zeros}{1}{subsection.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}Linear Model}{1}{subsection.1.3}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Results of the linear model with different ranges of training data}}{2}{table.1}\protected@file@percent }
\newlabel{tab:linear_model}{{1}{2}{Results of the linear model with different ranges of training data}{table.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Schedule next months}{3}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Other input features}{3}{subsection.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}More complex models}{3}{subsection.2.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Reinforcement learning}{3}{subsection.2.3}\protected@file@percent }
\abx@aux@read@bbl@mdfivesum{nobblfile}
\gdef \@abspage@last{4}

View File

@@ -0,0 +1,20 @@
% $ biblatex auxiliary file $
% $ biblatex bbl format version 3.2 $
% Do not modify the above lines!
%
% This is an auxiliary file used by the 'biblatex' package.
% This file may safely be deleted. It will be recreated by
% biber as required.
%
\begingroup
\makeatletter
\@ifundefined{ver@biblatex.sty}
{\@latex@error
{Missing 'biblatex' package}
{The bibliography requires the 'biblatex' package.}
\aftergroup\endinput}
{}
\endgroup
\endinput

File diff suppressed because it is too large Load Diff

View File

@@ -1,8 +0,0 @@
\BOOKMARK [1][-]{section.1}{\376\377\000I\000n\000t\000e\000r\000m\000e\000d\000i\000a\000t\000e\000\040\000R\000e\000s\000u\000l\000t\000s}{}% 1
\BOOKMARK [2][-]{subsection.1.1}{\376\377\000P\000r\000e\000v\000i\000o\000u\000s\000\040\000d\000a\000y\000\040\000a\000s\000\040\000f\000o\000r\000e\000c\000a\000s\000t}{section.1}% 2
\BOOKMARK [2][-]{subsection.1.2}{\376\377\000A\000l\000l\000\040\000Z\000e\000r\000o\000s}{section.1}% 3
\BOOKMARK [2][-]{subsection.1.3}{\376\377\000L\000i\000n\000e\000a\000r\000\040\000M\000o\000d\000e\000l}{section.1}% 4
\BOOKMARK [1][-]{section.2}{\376\377\000S\000c\000h\000e\000d\000u\000l\000e\000\040\000n\000e\000x\000t\000\040\000m\000o\000n\000t\000h\000s}{}% 5
\BOOKMARK [2][-]{subsection.2.1}{\376\377\000O\000t\000h\000e\000r\000\040\000i\000n\000p\000u\000t\000\040\000f\000e\000a\000t\000u\000r\000e\000s}{section.2}% 6
\BOOKMARK [2][-]{subsection.2.2}{\376\377\000M\000o\000r\000e\000\040\000c\000o\000m\000p\000l\000e\000x\000\040\000m\000o\000d\000e\000l\000s}{section.2}% 7
\BOOKMARK [2][-]{subsection.2.3}{\376\377\000R\000e\000i\000n\000f\000o\000r\000c\000e\000m\000e\000n\000t\000\040\000l\000e\000a\000r\000n\000i\000n\000g}{section.2}% 8

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,47 @@
% Training methods
\DeclareAcronym{QR}{
short = QR,
long = Quantile Regression
}
\DeclareAcronym{AQR}{
short = AQR,
long = Autoregressive Quantile Regression
}
\DeclareAcronym{NAQR}{
short = NAQR,
long = Non-Autoregressive Quantile Regression
}
% Metrics
\DeclareAcronym{MSE}{
short = MSE,
long = Mean Squared Error
}
\DeclareAcronym{MAE}{
short = MAE,
long = Mean Absolute Error
}
\DeclareAcronym{CRPS}{
short = CRPS,
long = Continuous Ranked Probability Score
}
% Electricity Market Terms
\DeclareAcronym{NRV}{
short = NRV,
long = Net Regulation Volume
}
\DeclareAcronym{PV}{
short = PV,
long = Photovoltaic
}
\DeclareAcronym{NP}{
short = NP,
long = Implicit Net Position
}

View File

@@ -0,0 +1,67 @@
<mxfile host="Electron" modified="2024-04-21T19:27:00.133Z" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.8 Chrome/114.0.5735.289 Electron/25.5.0 Safari/537.36" etag="iP2ZMkiDESp7J3viivZF" version="21.6.8" type="device">
<diagram name="Page-1" id="FePbCUh0FgINugyzgCKY">
<mxGraphModel dx="1834" dy="806" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="XPfo9y-A51sPOulxTgxz-3" value="" style="group" vertex="1" connectable="0" parent="1">
<mxGeometry x="-40" y="280" width="200" height="140" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-1" value="&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real NRV &lt;b&gt;(T-96)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Load &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real PV &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Wind &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Net Position &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- QE &lt;b&gt;(T-96)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=default;strokeWidth=2;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-3">
<mxGeometry y="30" width="200" height="110" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-2" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;Quarter T-96&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-3">
<mxGeometry x="52.941764705882356" width="94.11764705882354" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-4" value="" style="group" vertex="1" connectable="0" parent="1">
<mxGeometry x="400" y="280" width="200" height="140" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-5" value="&lt;div style=&quot;text-align: left;&quot;&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;- Real NRV (T-1)&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast Load (T)&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast&amp;nbsp;PV (T)&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast&amp;nbsp;Wind (T)&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;- Forecast&amp;nbsp;Net Position &lt;b&gt;(T)&lt;/b&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- QE &lt;b&gt;(T-1)&lt;/b&gt;&lt;/span&gt;&lt;br&gt;&lt;/div&gt;&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=default;strokeWidth=2;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-4">
<mxGeometry y="30" width="200" height="110" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-6" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;Quarter T-1&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-4">
<mxGeometry x="38.46153846153845" width="123.07692307692304" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-7" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=4;rounded=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-1" target="XPfo9y-A51sPOulxTgxz-5">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="110" y="410" as="sourcePoint" />
<mxPoint x="160" y="360" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-15" value="RNN" style="rounded=1;whiteSpace=wrap;html=1;strokeWidth=2;fontStyle=1;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="-40" y="190" width="640" height="40" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.156;entryY=1.075;entryDx=0;entryDy=0;entryPerimeter=0;strokeWidth=2;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-2" target="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.844;entryY=1.05;entryDx=0;entryDy=0;entryPerimeter=0;strokeWidth=2;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-6" target="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-18" value="" style="endArrow=classic;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="61" y="190" as="sourcePoint" />
<mxPoint x="61" y="140" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-19" value="NRV Quantiles &lt;b&gt;(T-95)&lt;/b&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="10" y="100" width="100" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-20" value="Inputs" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=15;fontStyle=1" vertex="1" parent="1">
<mxGeometry x="-150" y="350" width="60" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-21" value="Outputs" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=15;fontStyle=1" vertex="1" parent="1">
<mxGeometry x="-150" y="100" width="60" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-22" value="" style="endArrow=classic;html=1;rounded=0;strokeWidth=2;exitX=0.844;exitY=0;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="500" y="180" as="sourcePoint" />
<mxPoint x="500" y="140" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-24" value="NRV Quantiles &lt;b&gt;(T)&lt;/b&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="450" y="100" width="100" height="30" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>

View File

@@ -0,0 +1,67 @@
<mxfile host="Electron" modified="2024-04-21T19:39:13.066Z" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.8 Chrome/114.0.5735.289 Electron/25.5.0 Safari/537.36" etag="Yo_Te6-BRO91UZdKIgwU" version="21.6.8" type="device">
<diagram name="Page-1" id="FePbCUh0FgINugyzgCKY">
<mxGraphModel dx="1834" dy="806" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="XPfo9y-A51sPOulxTgxz-3" value="" style="group" vertex="1" connectable="0" parent="1">
<mxGeometry x="-40" y="280" width="200" height="140" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-1" value="&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real NRV &lt;b&gt;(T-96)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Load &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real PV &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Wind &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Net Position &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- QE &lt;b&gt;(T-96)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=default;strokeWidth=2;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-3">
<mxGeometry y="30" width="200" height="110" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-2" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;Quarter T-96&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-3">
<mxGeometry x="52.941764705882356" width="94.11764705882354" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-4" value="" style="group" vertex="1" connectable="0" parent="1">
<mxGeometry x="400" y="280" width="200" height="140" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-5" value="&lt;div style=&quot;text-align: left;&quot;&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;- Real NRV &lt;b&gt;(T-1)&lt;/b&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast Load &lt;b&gt;(T)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast&amp;nbsp;PV &lt;b&gt;(T)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast&amp;nbsp;Wind &lt;b&gt;(T)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;- Forecast&amp;nbsp;Net Position &lt;b&gt;(T)&lt;/b&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- QE &lt;b&gt;(T-1)&lt;/b&gt;&lt;/span&gt;&lt;br&gt;&lt;/div&gt;&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=default;strokeWidth=2;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-4">
<mxGeometry y="30" width="200" height="110" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-6" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;Quarter T-1&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-4">
<mxGeometry x="38.46153846153845" width="123.07692307692304" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-7" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=4;rounded=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-1" target="XPfo9y-A51sPOulxTgxz-5">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="110" y="410" as="sourcePoint" />
<mxPoint x="160" y="360" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-15" value="RNN" style="rounded=1;whiteSpace=wrap;html=1;strokeWidth=2;fontStyle=1;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="-40" y="190" width="640" height="40" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.156;entryY=1.075;entryDx=0;entryDy=0;entryPerimeter=0;strokeWidth=2;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-2" target="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.844;entryY=1.05;entryDx=0;entryDy=0;entryPerimeter=0;strokeWidth=2;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-6" target="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-18" value="" style="endArrow=classic;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="61" y="190" as="sourcePoint" />
<mxPoint x="61" y="140" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-19" value="NRV Quantiles &lt;b&gt;(T-95)&lt;/b&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="10" y="100" width="100" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-20" value="Inputs" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=15;fontStyle=1" vertex="1" parent="1">
<mxGeometry x="-150" y="350" width="60" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-21" value="Outputs" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=15;fontStyle=1" vertex="1" parent="1">
<mxGeometry x="-150" y="100" width="60" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-22" value="" style="endArrow=classic;html=1;rounded=0;strokeWidth=2;exitX=0.844;exitY=0;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="500" y="180" as="sourcePoint" />
<mxPoint x="500" y="140" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-24" value="NRV Quantiles &lt;b&gt;(T)&lt;/b&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="450" y="100" width="100" height="30" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>

View File

@@ -0,0 +1,67 @@
<mxfile host="Electron" modified="2024-04-21T19:39:08.279Z" agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/21.6.8 Chrome/114.0.5735.289 Electron/25.5.0 Safari/537.36" etag="UVK9ORhi1HUABHBPMAua" version="21.6.8" type="device">
<diagram name="Page-1" id="FePbCUh0FgINugyzgCKY">
<mxGraphModel dx="1834" dy="806" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0">
<root>
<mxCell id="0" />
<mxCell id="1" parent="0" />
<mxCell id="XPfo9y-A51sPOulxTgxz-3" value="" style="group" vertex="1" connectable="0" parent="1">
<mxGeometry x="-40" y="280" width="200" height="140" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-1" value="&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real NRV &lt;b&gt;(T-96)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Load &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real PV &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Wind &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- Real Net Position &lt;b&gt;(T-95)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;text-align: left;&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- QE &lt;b&gt;(T-96)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=default;strokeWidth=2;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-3">
<mxGeometry y="30" width="200" height="110" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-2" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;Quarter T-96&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-3">
<mxGeometry x="52.941764705882356" width="94.11764705882354" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-4" value="" style="group" vertex="1" connectable="0" parent="1">
<mxGeometry x="400" y="280" width="200" height="140" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-5" value="&lt;div style=&quot;text-align: left;&quot;&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;- Real NRV &lt;b&gt;(T-1)&lt;/b&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast Load &lt;b&gt;(T)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast&amp;nbsp;PV &lt;b&gt;(T)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;border-color: var(--border-color); background-color: initial;&quot;&gt;- Forecast&amp;nbsp;Wind &lt;b&gt;(T)&lt;/b&gt;&lt;/span&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;- Forecast&amp;nbsp;Net Position &lt;b&gt;(T)&lt;/b&gt;&lt;/div&gt;&lt;div style=&quot;border-color: var(--border-color);&quot;&gt;&lt;span style=&quot;background-color: initial;&quot;&gt;- QE &lt;b&gt;(T-1)&lt;/b&gt;&lt;/span&gt;&lt;br&gt;&lt;/div&gt;&lt;/div&gt;" style="rounded=1;whiteSpace=wrap;html=1;fillColor=default;strokeWidth=2;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-4">
<mxGeometry y="30" width="200" height="110" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-6" value="&lt;font style=&quot;font-size: 14px;&quot;&gt;Quarter T-1&lt;/font&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;" vertex="1" parent="XPfo9y-A51sPOulxTgxz-4">
<mxGeometry x="38.46153846153845" width="123.07692307692304" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-7" value="" style="endArrow=none;dashed=1;html=1;dashPattern=1 3;strokeWidth=4;rounded=0;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-1" target="XPfo9y-A51sPOulxTgxz-5">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="110" y="410" as="sourcePoint" />
<mxPoint x="160" y="360" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-15" value="RNN" style="rounded=1;whiteSpace=wrap;html=1;strokeWidth=2;fontStyle=1;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="-40" y="190" width="640" height="40" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.156;entryY=1.075;entryDx=0;entryDy=0;entryPerimeter=0;strokeWidth=2;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-2" target="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-17" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=0;exitDx=0;exitDy=0;entryX=0.844;entryY=1.05;entryDx=0;entryDy=0;entryPerimeter=0;strokeWidth=2;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-6" target="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry relative="1" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-18" value="" style="endArrow=classic;html=1;rounded=0;strokeWidth=2;" edge="1" parent="1">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="61" y="190" as="sourcePoint" />
<mxPoint x="61" y="140" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-19" value="NRV Quantiles &lt;b&gt;(T-95)&lt;/b&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="10" y="100" width="100" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-20" value="Inputs" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=15;fontStyle=1" vertex="1" parent="1">
<mxGeometry x="-150" y="350" width="60" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-21" value="Outputs" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=15;fontStyle=1" vertex="1" parent="1">
<mxGeometry x="-150" y="100" width="60" height="30" as="geometry" />
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-22" value="" style="endArrow=classic;html=1;rounded=0;strokeWidth=2;exitX=0.844;exitY=0;exitDx=0;exitDy=0;exitPerimeter=0;" edge="1" parent="1" source="XPfo9y-A51sPOulxTgxz-15">
<mxGeometry width="50" height="50" relative="1" as="geometry">
<mxPoint x="500" y="180" as="sourcePoint" />
<mxPoint x="500" y="140" as="targetPoint" />
</mxGeometry>
</mxCell>
<mxCell id="XPfo9y-A51sPOulxTgxz-24" value="NRV Quantiles &lt;b&gt;(T)&lt;/b&gt;" style="text;html=1;strokeColor=none;fillColor=none;align=center;verticalAlign=middle;whiteSpace=wrap;rounded=0;fontSize=14;" vertex="1" parent="1">
<mxGeometry x="450" y="100" width="100" height="30" as="geometry" />
</mxCell>
</root>
</mxGraphModel>
</diagram>
</mxfile>

BIN
Reports/Thesis/ea-en.pdf Normal file

Binary file not shown.

BIN
Reports/Thesis/ea-nl.pdf Normal file

Binary file not shown.

BIN
Reports/Thesis/eb-en.pdf Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 388 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 214 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 204 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 215 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 178 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 176 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 189 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 196 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 188 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 185 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 192 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 201 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 225 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 214 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 246 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 242 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 87 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@@ -0,0 +1,781 @@
@online{noauthor_zotero_nodate,
title = {Zotero {\textbar} Connectors},
url = {https://www.zotero.org/download/connectors},
urldate = {2022-10-12},
file = {Zotero | Connectors:/Users/victormylle/Zotero/storage/EPF3ZZRA/connectors.html:text/html},
}
@online{noauthor_elia_nodate,
title = {Elia: de electriciteitsmarkt en -systeem},
url = {https://www.elia.be/nl/elektriciteitsmarkt-en-systeem},
shorttitle = {Elia},
abstract = {Elia deelt de Europese ambitie om een geïntegreerde elektriciteitsmarkt tot stand te brengen en verschillende marktspelers aan te moedigen tot het aanbieden van systeemdiensten.},
urldate = {2023-06-23},
langid = {dutch},
file = {Snapshot:/Users/victormylle/Zotero/storage/7QY94WTW/elektriciteitsmarkt-en-systeem.html:text/html},
}
@misc{gao_easy--hard_2023,
title = {Easy-to-Hard Learning for Information Extraction},
url = {http://arxiv.org/abs/2305.09193},
abstract = {Information extraction ({IE}) systems aim to automatically extract structured information, such as named entities, relations between entities, and events, from unstructured texts. While most existing work addresses a particular {IE} task, universally modeling various {IE} tasks with one model has achieved great success recently. Despite their success, they employ a one-stage learning strategy, i.e., directly learning to extract the target structure given the input text, which contradicts the human learning process. In this paper, we propose a unified easy-to-hard learning framework consisting of three stages, i.e., the easy stage, the hard stage, and the main stage, for {IE} by mimicking the human learning process. By breaking down the learning process into multiple stages, our framework facilitates the model to acquire general {IE} task knowledge and improve its generalization ability. Extensive experiments across four {IE} tasks demonstrate the effectiveness of our framework. We achieve new state-of-the-art results on 13 out of 17 datasets. Our code is available at {\textbackslash}url\{https://github.com/{DAMO}-{NLP}-{SG}/{IE}-E2H\}.},
number = {{arXiv}:2305.09193},
publisher = {{arXiv}},
author = {Gao, Chang and Zhang, Wenxuan and Lam, Wai and Bing, Lidong},
urldate = {2023-07-10},
date = {2023-05-19},
eprinttype = {arxiv},
eprint = {2305.09193 [cs]},
keywords = {Computer Science - Computation and Language},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/5YBG5XYS/2305.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/D8LIDUE8/Gao et al. - 2023 - Easy-to-Hard Learning for Information Extraction.pdf:application/pdf},
}
@article{gaur_semi-supervised_2021,
title = {Semi-supervised deep learning based named entity recognition model to parse education section of resumes},
volume = {33},
issn = {1433-3058},
url = {https://doi.org/10.1007/s00521-020-05351-2},
doi = {10.1007/s00521-020-05351-2},
abstract = {A job seekers resume contains several sections, including educational qualifications. Educational qualifications capture the knowledge and skills relevant to the job. Machine processing of the education sections of resumes has been a difficult task. In this paper, we attempt to identify educational institutions names and degrees from a resumes education section. Usually, a significant amount of annotated data is required for neural network-based named entity recognition techniques. A semi-supervised approach is used to overcome the lack of large annotated data. We trained a deep neural network model on an initial (seed) set of resume education sections. This model is used to predict entities of unlabeled education sections and is rectified using a correction module. The education sections containing the rectified entities are augmented to the seed set. The updated seed set is used for retraining, leading to better accuracy than the previously trained model. This way, it can provide a high overall accuracy without the need of large annotated data. Our model has achieved an accuracy of 92.06\% on the named entity recognition task.},
pages = {5705--5718},
number = {11},
journaltitle = {Neural Computing and Applications},
shortjournal = {Neural Comput \& Applic},
author = {Gaur, Bodhvi and Saluja, Gurpreet Singh and Sivakumar, Hamsa Bharathi and Singh, Sanjay},
urldate = {2023-07-10},
date = {2021-06-01},
langid = {english},
keywords = {Deep learning models, Named entity recognition ({NER}), Natural language processing, Resume information extraction, Semi-supervised learning},
file = {Full Text PDF:/Users/victormylle/Zotero/storage/4NK6IXHZ/Gaur et al. - 2021 - Semi-supervised deep learning based named entity r.pdf:application/pdf},
}
@article{landolsi_information_2023,
title = {Information extraction from electronic medical documents: state of the art and future research directions},
volume = {65},
issn = {0219-3116},
url = {https://doi.org/10.1007/s10115-022-01779-1},
doi = {10.1007/s10115-022-01779-1},
shorttitle = {Information extraction from electronic medical documents},
abstract = {In the medical field, a doctor must have a comprehensive knowledge by reading and writing narrative documents, and he is responsible for every decision he takes for patients. Unfortunately, it is very tiring to read all necessary information about drugs, diseases and patients due to the large amount of documents that are increasing every day. Consequently, so many medical errors can happen and even kill people. Likewise, there is such an important field that can handle this problem, which is the information extraction. There are several important tasks in this field to extract the important and desired information from unstructured text written in natural language. The main principal tasks are named entity recognition and relation extraction since they can structure the text by extracting the relevant information. However, in order to treat the narrative text we should use natural language processing techniques to extract useful information and features. In our paper, we introduce and discuss the several techniques and solutions used in these tasks. Furthermore, we outline the challenges in information extraction from medical documents. In our knowledge, this is the most comprehensive survey in the literature with an experimental analysis and a suggestion for some uncovered directions.},
pages = {463--516},
number = {2},
journaltitle = {Knowledge and Information Systems},
shortjournal = {Knowl Inf Syst},
author = {Landolsi, Mohamed Yassine and Hlaoua, Lobna and Ben Romdhane, Lotfi},
urldate = {2023-07-10},
date = {2023-02-01},
langid = {english},
keywords = {Electronic medical records, Information extraction, Medical named entities recognition, Medical relation extraction, Section detection},
file = {Full Text PDF:/Users/victormylle/Zotero/storage/KRTKZW3M/Landolsi et al. - 2023 - Information extraction from electronic medical doc.pdf:application/pdf},
}
@inproceedings{fu_spanner_2021,
location = {Online},
title = {{SpanNER}: Named Entity Re-/Recognition as Span Prediction},
url = {https://aclanthology.org/2021.acl-long.558},
doi = {10.18653/v1/2021.acl-long.558},
shorttitle = {{SpanNER}},
abstract = {Recent years have seen the paradigm shift of Named Entity Recognition ({NER}) systems from sequence labeling to span prediction. Despite its preliminary effectiveness, the span prediction model's architectural bias has not been fully understood. In this paper, we first investigate the strengths and weaknesses when the span prediction model is used for named entity recognition compared with the sequence labeling framework and how to further improve it, which motivates us to make complementary advantages of systems based on different paradigms. We then reveal that span prediction, simultaneously, can serve as a system combiner to re-recognize named entities from different systems' outputs. We experimentally implement 154 systems on 11 datasets, covering three languages, comprehensive results show the effectiveness of span prediction models that both serve as base {NER} systems and system combiners. We make all codes and datasets available: https://github.com/neulab/spanner, as well as an online system demo: http://spanner.sh. Our model also has been deployed into the {ExplainaBoard} platform, which allows users to flexibly perform a system combination of top-scoring systems in an interactive way: http://explainaboard.nlpedia.ai/leaderboard/task-ner/.},
eventtitle = {{ACL}-{IJCNLP} 2021},
pages = {7183--7195},
booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
publisher = {Association for Computational Linguistics},
author = {Fu, Jinlan and Huang, Xuanjing and Liu, Pengfei},
urldate = {2023-07-10},
date = {2021-08},
file = {Full Text PDF:/Users/victormylle/Zotero/storage/6JU4DR5Y/Fu et al. - 2021 - SpanNER Named Entity Re-Recognition as Span Pred.pdf:application/pdf},
}
@inproceedings{li_unified_2020,
location = {Online},
title = {A Unified {MRC} Framework for Named Entity Recognition},
url = {https://aclanthology.org/2020.acl-main.519},
doi = {10.18653/v1/2020.acl-main.519},
abstract = {The task of named entity recognition ({NER}) is normally divided into nested {NER} and flat {NER} depending on whether named entities are nested or not.Models are usually separately developed for the two tasks, since sequence labeling models, the most widely used backbone for flat {NER}, are only able to assign a single label to a particular token, which is unsuitable for nested {NER} where a token may be assigned several labels. In this paper, we propose a unified framework that is capable of handling both flat and nested {NER} tasks. Instead of treating the task of {NER} as a sequence labeling problem, we propose to formulate it as a machine reading comprehension ({MRC}) task. For example, extracting entities with the per label is formalized as extracting answer spans to the question “which person is mentioned in the text”.This formulation naturally tackles the entity overlapping issue in nested {NER}: the extraction of two overlapping entities with different categories requires answering two independent questions. Additionally, since the query encodes informative prior knowledge, this strategy facilitates the process of entity extraction, leading to better performances for not only nested {NER}, but flat {NER}. We conduct experiments on both nested and flat {NER} datasets.Experiment results demonstrate the effectiveness of the proposed formulation. We are able to achieve a vast amount of performance boost over current {SOTA} models on nested {NER} datasets, i.e., +1.28, +2.55, +5.44, +6.37,respectively on {ACE}04, {ACE}05, {GENIA} and {KBP}17, along with {SOTA} results on flat {NER} datasets, i.e., +0.24, +1.95, +0.21, +1.49 respectively on English {CoNLL} 2003, English {OntoNotes} 5.0, Chinese {MSRA} and Chinese {OntoNotes} 4.0.},
eventtitle = {{ACL} 2020},
pages = {5849--5859},
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
publisher = {Association for Computational Linguistics},
author = {Li, Xiaoya and Feng, Jingrong and Meng, Yuxian and Han, Qinghong and Wu, Fei and Li, Jiwei},
urldate = {2023-07-10},
date = {2020-07},
file = {Full Text PDF:/Users/victormylle/Zotero/storage/TIVIKNGN/Li et al. - 2020 - A Unified MRC Framework for Named Entity Recogniti.pdf:application/pdf},
}
@misc{decorte_jobbert_2021,
title = {{JobBERT}: Understanding Job Titles through Skills},
url = {http://arxiv.org/abs/2109.09605},
shorttitle = {{JobBERT}},
abstract = {Job titles form a cornerstone of today's human resources ({HR}) processes. Within online recruitment, they allow candidates to understand the contents of a vacancy at a glance, while internal {HR} departments use them to organize and structure many of their processes. As job titles are a compact, convenient, and readily available data source, modeling them with high accuracy can greatly benefit many {HR} tech applications. In this paper, we propose a neural representation model for job titles, by augmenting a pre-trained language model with co-occurrence information from skill labels extracted from vacancies. Our {JobBERT} method leads to considerable improvements compared to using generic sentence encoders, for the task of job title normalization, for which we release a new evaluation benchmark.},
number = {{arXiv}:2109.09605},
publisher = {{arXiv}},
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Demeester, Thomas and Develder, Chris},
urldate = {2023-07-20},
date = {2021-09-20},
eprinttype = {arxiv},
eprint = {2109.09605 [cs]},
keywords = {Computer Science - Computation and Language},
}
@misc{sun_retentive_2023,
title = {Retentive Network: A Successor to Transformer for Large Language Models},
url = {http://arxiv.org/abs/2307.08621},
shorttitle = {Retentive Network},
abstract = {In this work, we propose Retentive Network ({RetNet}) as a foundation architecture for large language models, simultaneously achieving training parallelism, low-cost inference, and good performance. We theoretically derive the connection between recurrence and attention. Then we propose the retention mechanism for sequence modeling, which supports three computation paradigms, i.e., parallel, recurrent, and chunkwise recurrent. Specifically, the parallel representation allows for training parallelism. The recurrent representation enables low-cost \$O(1)\$ inference, which improves decoding throughput, latency, and {GPU} memory without sacrificing performance. The chunkwise recurrent representation facilitates efficient long-sequence modeling with linear complexity, where each chunk is encoded parallelly while recurrently summarizing the chunks. Experimental results on language modeling show that {RetNet} achieves favorable scaling results, parallel training, low-cost deployment, and efficient inference. The intriguing properties make {RetNet} a strong successor to Transformer for large language models. Code will be available at https://aka.ms/retnet.},
number = {{arXiv}:2307.08621},
publisher = {{arXiv}},
author = {Sun, Yutao and Dong, Li and Huang, Shaohan and Ma, Shuming and Xia, Yuqing and Xue, Jilong and Wang, Jianyong and Wei, Furu},
urldate = {2023-07-25},
date = {2023-07-19},
eprinttype = {arxiv},
eprint = {2307.08621 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
}
@misc{zhang_generation-driven_2023,
title = {Generation-driven Contrastive Self-training for Zero-shot Text Classification with Instruction-tuned {GPT}},
url = {http://arxiv.org/abs/2304.11872},
abstract = {Moreover, {GPT}-based zero-shot classification models tend to make independent predictions over test instances, which can be sub-optimal as the instance correlations and the decision boundaries in the target space are ignored. To address these difficulties and limitations, we propose a new approach to zero-shot text classification, namely {\textbackslash}ourmodelshort, which leverages the strong generative power of {GPT} to assist in training a smaller, more adaptable, and efficient sentence encoder classifier with contrastive self-training. Specifically, {GenCo} applies {GPT} in two ways: firstly, it generates multiple augmented texts for each input instance to enhance the semantic embedding of the instance and improve the mapping to relevant labels; secondly, it generates augmented texts conditioned on the predicted label during self-training, which makes the generative process tailored to the decision boundaries in the target space. In our experiments, {GenCo} outperforms previous state-of-the-art methods on multiple benchmark datasets, even when only limited in-domain text data is available.},
number = {{arXiv}:2304.11872},
publisher = {{arXiv}},
author = {Zhang, Ruohong and Wang, Yau-Shian and Yang, Yiming},
urldate = {2023-08-01},
date = {2023-04-24},
eprinttype = {arxiv},
eprint = {2304.11872 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence, interesting},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/2ULMRMN5/2304.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/D98MRNHP/Zhang et al. - 2023 - Generation-driven Contrastive Self-training for Ze.pdf:application/pdf},
}
@misc{zhang_clusterllm_2023,
title = {{ClusterLLM}: Large Language Models as a Guide for Text Clustering},
url = {http://arxiv.org/abs/2305.14871},
shorttitle = {{ClusterLLM}},
abstract = {We introduce {ClusterLLM}, a novel text clustering framework that leverages feedback from an instruction-tuned large language model, such as {ChatGPT}. Compared with traditional unsupervised methods that builds upon "small" embedders, {ClusterLLM} exhibits two intriguing advantages: (1) it enjoys the emergent capability of {LLM} even if its embeddings are inaccessible; and (2) it understands the user's preference on clustering through textual instruction and/or a few annotated data. First, we prompt {ChatGPT} for insights on clustering perspective by constructing hard triplet questions {\textless}does A better correspond to B than C{\textgreater}, where A, B and C are similar data points that belong to different clusters according to small embedder. We empirically show that this strategy is both effective for fine-tuning small embedder and cost-efficient to query {ChatGPT}. Second, we prompt {ChatGPT} for helps on clustering granularity by carefully designed pairwise questions {\textless}do A and B belong to the same category{\textgreater}, and tune the granularity from cluster hierarchies that is the most consistent with the {ChatGPT} answers. Extensive experiments on 14 datasets show that {ClusterLLM} consistently improves clustering quality, at an average cost of {\textasciitilde}\$0.6 per dataset.},
number = {{arXiv}:2305.14871},
publisher = {{arXiv}},
author = {Zhang, Yuwei and Wang, Zihan and Shang, Jingbo},
urldate = {2023-08-08},
date = {2023-05-24},
eprinttype = {arxiv},
eprint = {2305.14871 [cs]},
keywords = {Computer Science - Computation and Language},
}
@misc{zhang_clusterllm_2023-1,
title = {{ClusterLLM}: Large Language Models as a Guide for Text Clustering},
url = {http://arxiv.org/abs/2305.14871},
shorttitle = {{ClusterLLM}},
abstract = {We introduce {ClusterLLM}, a novel text clustering framework that leverages feedback from an instruction-tuned large language model, such as {ChatGPT}. Compared with traditional unsupervised methods that builds upon "small" embedders, {ClusterLLM} exhibits two intriguing advantages: (1) it enjoys the emergent capability of {LLM} even if its embeddings are inaccessible; and (2) it understands the user's preference on clustering through textual instruction and/or a few annotated data. First, we prompt {ChatGPT} for insights on clustering perspective by constructing hard triplet questions {\textless}does A better correspond to B than C{\textgreater}, where A, B and C are similar data points that belong to different clusters according to small embedder. We empirically show that this strategy is both effective for fine-tuning small embedder and cost-efficient to query {ChatGPT}. Second, we prompt {ChatGPT} for helps on clustering granularity by carefully designed pairwise questions {\textless}do A and B belong to the same category{\textgreater}, and tune the granularity from cluster hierarchies that is the most consistent with the {ChatGPT} answers. Extensive experiments on 14 datasets show that {ClusterLLM} consistently improves clustering quality, at an average cost of {\textasciitilde}\$0.6 per dataset.},
number = {{arXiv}:2305.14871},
publisher = {{arXiv}},
author = {Zhang, Yuwei and Wang, Zihan and Shang, Jingbo},
urldate = {2023-08-08},
date = {2023-05-24},
eprinttype = {arxiv},
eprint = {2305.14871 [cs]},
keywords = {Computer Science - Computation and Language},
}
@misc{zhang_clusterllm_2023-2,
title = {{ClusterLLM}: Large Language Models as a Guide for Text Clustering},
url = {http://arxiv.org/abs/2305.14871},
shorttitle = {{ClusterLLM}},
abstract = {We introduce {ClusterLLM}, a novel text clustering framework that leverages feedback from an instruction-tuned large language model, such as {ChatGPT}. Compared with traditional unsupervised methods that builds upon "small" embedders, {ClusterLLM} exhibits two intriguing advantages: (1) it enjoys the emergent capability of {LLM} even if its embeddings are inaccessible; and (2) it understands the user's preference on clustering through textual instruction and/or a few annotated data. First, we prompt {ChatGPT} for insights on clustering perspective by constructing hard triplet questions {\textless}does A better correspond to B than C{\textgreater}, where A, B and C are similar data points that belong to different clusters according to small embedder. We empirically show that this strategy is both effective for fine-tuning small embedder and cost-efficient to query {ChatGPT}. Second, we prompt {ChatGPT} for helps on clustering granularity by carefully designed pairwise questions {\textless}do A and B belong to the same category{\textgreater}, and tune the granularity from cluster hierarchies that is the most consistent with the {ChatGPT} answers. Extensive experiments on 14 datasets show that {ClusterLLM} consistently improves clustering quality, at an average cost of {\textasciitilde}\$0.6 per dataset.},
number = {{arXiv}:2305.14871},
publisher = {{arXiv}},
author = {Zhang, Yuwei and Wang, Zihan and Shang, Jingbo},
urldate = {2023-08-08},
date = {2023-05-24},
eprinttype = {arxiv},
eprint = {2305.14871 [cs]},
keywords = {Computer Science - Computation and Language},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/68L6AESY/2305.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/VGWL9LRC/Zhang et al. - 2023 - ClusterLLM Large Language Models as a Guide for T.pdf:application/pdf},
}
@article{vijeikis_efficient_2022,
title = {Efficient Violence Detection in Surveillance},
volume = {22},
rights = {http://creativecommons.org/licenses/by/3.0/},
issn = {1424-8220},
url = {https://www.mdpi.com/1424-8220/22/6/2216},
doi = {10.3390/s22062216},
abstract = {Intelligent video surveillance systems are rapidly being introduced to public places. The adoption of computer vision and machine learning techniques enables various applications for collected video features; one of the major is safety monitoring. The efficacy of violent event detection is measured by the efficiency and accuracy of violent event detection. In this paper, we present a novel architecture for violence detection from video surveillance cameras. Our proposed model is a spatial feature extracting a U-Net-like network that uses {MobileNet} V2 as an encoder followed by {LSTM} for temporal feature extraction and classification. The proposed model is computationally light and still achieves good results—experiments showed that an average accuracy is 0.82 ± 2\% and average precision is 0.81 ± 3\% using a complex real-world security camera footage dataset based on {RWF}-2000.},
pages = {2216},
number = {6},
journaltitle = {Sensors},
author = {Vijeikis, Romas and Raudonis, Vidas and Dervinis, Gintaras},
urldate = {2023-08-08},
date = {2022-01},
langid = {english},
note = {Number: 6
Publisher: Multidisciplinary Digital Publishing Institute},
keywords = {computer vision, deep learning, intelligent video surveillance, {LSTM}, U-Net, violence detection, violent behavior},
file = {Full Text PDF:/Users/victormylle/Zotero/storage/PSYA8YSJ/Vijeikis et al. - 2022 - Efficient Violence Detection in Surveillance.pdf:application/pdf},
}
@article{toubeau_interpretable_2022,
title = {Interpretable Probabilistic Forecasting of Imbalances in Renewable-Dominated Electricity Systems},
volume = {13},
issn = {1949-3029, 1949-3037},
url = {https://ieeexplore.ieee.org/document/9464660/},
doi = {10.1109/TSTE.2021.3092137},
abstract = {High penetration of renewable energy such as wind power and photovoltaic ({PV}) requires large amounts of flexibility to balance their inherent variability. Making an accurate prediction of the future power system imbalance is an efficient approach to reduce these balancing costs. However, the imbalance is affected not only by renewables but also by complex market dynamics and technology constraints, for which the dependence structure is unknown. Therefore, this paper introduces a new architecture of sequence-to-sequence recurrent neural networks to efficiently process time-based information in an interpretable fashion. To that end, the selection of relevant variables is internalized into the model, which provides insights on the relative importance of individual inputs, while bypassing the cumbersome need for data preprocessing. Then, the model is further enriched with an attention mechanism that is tailored to focus on the relevant contextual information, which is useful to better understand the underlying dynamics such as seasonal patterns. Outcomes show that adding modules to generate explainable forecasts makes the model more efficient and robust, thus leading to enhanced performance.},
pages = {1267--1277},
number = {2},
journaltitle = {{IEEE} Transactions on Sustainable Energy},
shortjournal = {{IEEE} Trans. Sustain. Energy},
author = {Toubeau, Jean-Francois and Bottieau, Jeremie and Wang, Yi and Vallee, Francois},
urldate = {2023-09-28},
date = {2022-04},
langid = {english},
file = {Toubeau et al. - 2022 - Interpretable Probabilistic Forecasting of Imbalan.pdf:/Users/victormylle/Zotero/storage/WA7DZBXX/Toubeau et al. - 2022 - Interpretable Probabilistic Forecasting of Imbalan.pdf:application/pdf},
}
@online{noauthor_deep_nodate,
title = {Deep Generative Modelling: A Comparative Review of {VAEs}, {GANs}, Normalizing Flows, Energy-Based and Autoregressive Models {\textbar} {IEEE} Journals \& Magazine {\textbar} {IEEE} Xplore},
url = {https://ieeexplore.ieee.org/document/9555209},
urldate = {2023-10-11},
}
@article{bond-taylor_deep_2022,
title = {Deep Generative Modelling: A Comparative Review of {VAEs}, {GANs}, Normalizing Flows, Energy-Based and Autoregressive Models},
volume = {44},
issn = {0162-8828, 2160-9292, 1939-3539},
url = {https://ieeexplore.ieee.org/document/9555209/},
doi = {10.1109/TPAMI.2021.3116668},
shorttitle = {Deep Generative Modelling},
abstract = {Deep generative models are a class of techniques that train deep neural networks to model the distribution of training samples. Research has fragmented into various interconnected approaches, each of which make trade-offs including run-time, diversity, and architectural restrictions. In particular, this compendium covers energy-based models, variational autoencoders, generative adversarial networks, autoregressive models, normalizing flows, in addition to numerous hybrid approaches. These techniques are compared and contrasted, explaining the premises behind each and how they are interrelated, while reviewing current state-of-the-art advances and implementations.},
pages = {7327--7347},
number = {11},
journaltitle = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
shortjournal = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
author = {Bond-Taylor, Sam and Leach, Adam and Long, Yang and Willcocks, Chris G.},
urldate = {2023-10-11},
date = {2022-11-01},
langid = {english},
file = {Bond-Taylor et al. - 2022 - Deep Generative Modelling A Comparative Review of.pdf:/Users/victormylle/Zotero/storage/UNAST9UC/Bond-Taylor et al. - 2022 - Deep Generative Modelling A Comparative Review of.pdf:application/pdf},
}
@article{lecun_tutorial_nodate,
title = {A Tutorial on Energy-Based Learning},
abstract = {Energy-Based Models ({EBMs}) capture dependencies between variables by associating a scalar energy to each configuration of the variables. Inference consists in clamping the value of observed variables and finding configurations of the remaining variables that minimize the energy. Learning consists in finding an energy function in which observed configurations of the variables are given lower energies than unobserved ones. The {EBM} approach provides a common theoretical framework for many learning models, including traditional discriminative and generative approaches, as well as graph-transformer networks, conditional random fields, maximum margin Markov networks, and several manifold learning methods.},
author = {{LeCun}, Yann and Chopra, Sumit and Hadsell, Raia and Ranzato, MarcAurelio and Huang, Fu Jie},
langid = {english},
file = {LeCun et al. - A Tutorial on Energy-Based Learning.pdf:/Users/victormylle/Zotero/storage/8932975Z/LeCun et al. - A Tutorial on Energy-Based Learning.pdf:application/pdf},
}
@article{gatta_neural_2022,
title = {Neural networks generative models for time series},
volume = {34},
issn = {1319-1578},
url = {https://www.sciencedirect.com/science/article/pii/S1319157822002361},
doi = {10.1016/j.jksuci.2022.07.010},
abstract = {Nowadays, time series are a widely-exploited methodology to describe phenomena belonging to different fields. In fact, electrical consumption can be explained, from a data analysis perspective, with a time series, as for healthcare, financial index, air pollution or parking occupancy rate. Applying time series to different areas of interest has contributed to the exponential rise in interest by both practitioners and academics. On the other side, especially regarding static data, a new trend is acquiring even more relevance in the data analysis community, namely neural network generative approaches. Generative approaches aim to generate new, fake samples given a dataset of real data by implicitly learning the probability distribution underlining data. In this way, several tasks can be addressed, such as data augmentation, class imbalance, anomaly detection or privacy. However, even if this topic is relatively well-established in the literature related to static data regarding time series, the debate is still open. This paper contributes to this debate by comparing four neural network-based generative approaches for time series belonging to the state-of-the-art methodologies in literature. The comparison has been carried out on five public and private datasets and on different time granularities, with a total number of 13 experimental scenario. Our work aims to provide a wide overview of the performances of the compared methodologies when working in different conditions like seasonality, strong autoregressive components and long or short sequences.},
pages = {7920--7939},
number = {10},
journaltitle = {Journal of King Saud University - Computer and Information Sciences},
shortjournal = {Journal of King Saud University - Computer and Information Sciences},
author = {Gatta, Federico and Giampaolo, Fabio and Prezioso, Edoardo and Mei, Gang and Cuomo, Salvatore and Piccialli, Francesco},
urldate = {2023-10-11},
date = {2022-11-01},
keywords = {Deep learning, Generative adversarial networks, Healthcare, Industry 4.0, Time series},
file = {Full Text:/Users/victormylle/Zotero/storage/ZU6BCM28/Gatta et al. - 2022 - Neural networks generative models for time series.pdf:application/pdf;ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/2HSHCJN7/S1319157822002361.html:text/html},
}
@article{dumas_deep_2022,
title = {A deep generative model for probabilistic energy forecasting in power systems: normalizing flows},
volume = {305},
issn = {03062619},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0306261921011909},
doi = {10.1016/j.apenergy.2021.117871},
shorttitle = {A deep generative model for probabilistic energy forecasting in power systems},
abstract = {Greater direct electrification of end-use sectors with a higher share of renewables is one of the pillars to power a carbon-neutral society by 2050. However, in contrast to conventional power plants, renewable energy is subject to uncertainty raising challenges for their interaction with power systems. Scenario-based probabilistic forecasting models have become a vital tool to equip decision-makers. This paper presents to the power systems forecasting practitioners a recent deep learning technique, the normalizing flows, to produce accurate scenario-based probabilistic forecasts that are crucial to face the new challenges in power systems applications. The strength of this technique is to directly learn the stochastic multivariate distribution of the underlying process by maximizing the likelihood. Through comprehensive empirical evaluations using the open data of the Global Energy Forecasting Competition 2014, we demonstrate that this methodology is competitive with other state-of-the-art deep learning generative models: generative adversarial networks and variational autoencoders. The models producing weather-based wind, solar power, and load scenarios are properly compared in terms of forecast value by considering the case study of an energy retailer and quality using several complementary metrics. The numerical experiments are simple and easily reproducible. Thus, we hope it will encourage other forecasting practitioners to test and use normalizing flows in power system applications such as bidding on electricity markets, scheduling power systems with high renewable energy sources penetration, energy management of virtual power plan or microgrids, and unit commitment.},
pages = {117871},
journaltitle = {Applied Energy},
shortjournal = {Applied Energy},
author = {Dumas, Jonathan and Wehenkel, Antoine and Lanaspeze, Damien and Cornélusse, Bertrand and Sutera, Antonio},
urldate = {2023-10-11},
date = {2022-01},
langid = {english},
file = {Dumas et al. - 2022 - A deep generative model for probabilistic energy f.pdf:/Users/victormylle/Zotero/storage/3CW249QI/Dumas et al. - 2022 - A deep generative model for probabilistic energy f.pdf:application/pdf},
}
@article{lu_scenarios_2022,
title = {Scenarios modelling for forecasting day-ahead electricity prices: Case studies in Australia},
volume = {308},
issn = {0306-2619},
url = {https://www.sciencedirect.com/science/article/pii/S0306261921015555},
doi = {10.1016/j.apenergy.2021.118296},
shorttitle = {Scenarios modelling for forecasting day-ahead electricity prices},
abstract = {Electricity prices in spot markets are volatile and can be affected by various factors, such as generation and demand, system contingencies, local weather patterns, bidding strategies of market participants, and uncertain renewable energy outputs. Because of these factors, electricity price forecasting is challenging. This paper proposes a scenario modeling approach to improve forecasting accuracy, conditioning time series generative adversarial networks on external factors. After data pre-processing and condition selection, a conditional {TSGAN} or {CTSGAN} is designed to forecast electricity prices. Wasserstein Distance, weights limitation, and {RMSProp} optimizer are used to ensure that the {CTGAN} training process is stable. By changing the dimensionality of random noise input, the point forecasting model can be transformed into a probabilistic forecasting model. For electricity price point forecasting, the proposed {CTSGAN} model has better accuracy and has better generalization ability than the {TSGAN} and other deep learning methods. For probabilistic forecasting, the proposed {CTSGAN} model can significantly improve the continuously ranked probability score and Winkler score. The effectiveness and superiority of the proposed {CTSGAN} forecasting model are verified by case studies.},
pages = {118296},
journaltitle = {Applied Energy},
shortjournal = {Applied Energy},
author = {Lu, Xin and Qiu, Jing and Lei, Gang and Zhu, Jianguo},
urldate = {2023-10-13},
date = {2022-02-15},
keywords = {Generative adversarial networks, Conditions, Electricity Price, Point forecasting, Probabilistic forecasting},
file = {Lu et al. - 2022 - Scenarios modelling for forecasting day-ahead elec.pdf:/Users/victormylle/Zotero/storage/3XL3T253/Lu et al. - 2022 - Scenarios modelling for forecasting day-ahead elec.pdf:application/pdf;ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/9K2RFGGU/S0306261921015555.html:text/html},
}
@article{gabrielli_data-driven_2022,
title = {Data-driven modeling for long-term electricity price forecasting},
volume = {244},
issn = {03605442},
url = {https://linkinghub.elsevier.com/retrieve/pii/S036054422200010X},
doi = {10.1016/j.energy.2022.123107},
abstract = {Estimating the financial viability of renewable energy investments requires the availability of long-term, finely-resolved electricity prices over the investment lifespan. This entails, however, two major challenges: (i) the combination of extensive time horizons and fine time resolutions, and (ii) the prediction of out-of-sample electricity prices in future energy and market scenarios, or shifts in pricing regime, that were not observed in the past. This paper tackles such challenges by proposing a data-driven model for the long-term prediction of electricity market prices that is based on Fourier analysis. The electricity price is decomposed into components leading to its base evolution, which are described through the amplitudes of the main frequencies of the Fourier series, and components leading to high price volatility, which are described by the residual frequencies. The former are predicted via a regression model that uses as input annual values of relevant energy and market quantities, such as electricity generation, prices and demands. The proposed method shows capable of (i) predicting the most relevant dynamics of the electricity price; (ii) generalization by capturing the market mechanisms of previously unseen electricity markets. These findings support the relevance and validity of data-driven, finely-resolved, long-term predictions and highlight the potential for hybrid data-driven and market-based models.},
pages = {123107},
journaltitle = {Energy},
shortjournal = {Energy},
author = {Gabrielli, Paolo and Wüthrich, Moritz and Blume, Steffen and Sansavini, Giovanni},
urldate = {2023-10-15},
date = {2022-04},
langid = {english},
file = {Gabrielli et al. - 2022 - Data-driven modeling for long-term electricity pri.pdf:/Users/victormylle/Zotero/storage/YHDVP399/Gabrielli et al. - 2022 - Data-driven modeling for long-term electricity pri.pdf:application/pdf},
}
@misc{kollovieh_predict_2023,
title = {Predict, Refine, Synthesize: Self-Guiding Diffusion Models for Probabilistic Time Series Forecasting},
url = {http://arxiv.org/abs/2307.11494},
shorttitle = {Predict, Refine, Synthesize},
abstract = {Diffusion models have achieved state-of-the-art performance in generative modeling tasks across various domains. Prior works on time series diffusion models have primarily focused on developing conditional models tailored to specific forecasting or imputation tasks. In this work, we explore the potential of task-agnostic, unconditional diffusion models for several time series applications. We propose {TSDiff}, an unconditionally trained diffusion model for time series. Our proposed self-guidance mechanism enables conditioning {TSDiff} for downstream tasks during inference, without requiring auxiliary networks or altering the training procedure. We demonstrate the effectiveness of our method on three different time series tasks: forecasting, refinement, and synthetic data generation. First, we show that {TSDiff} is competitive with several task-specific conditional forecasting methods (predict). Second, we leverage the learned implicit probability density of {TSDiff} to iteratively refine the predictions of base forecasters with reduced computational overhead over reverse diffusion (refine). Notably, the generative performance of the model remains intact -- downstream forecasters trained on synthetic samples from {TSDiff} outperform forecasters that are trained on samples from other state-of-the-art generative time series models, occasionally even outperforming models trained on real data (synthesize).},
number = {{arXiv}:2307.11494},
publisher = {{arXiv}},
author = {Kollovieh, Marcel and Ansari, Abdul Fatir and Bohlke-Schneider, Michael and Zschiegner, Jasper and Wang, Hao and Wang, Yuyang},
urldate = {2023-10-15},
date = {2023-07-21},
eprinttype = {arxiv},
eprint = {2307.11494 [cs, stat]},
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Statistics - Machine Learning, {TODO}},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/PBVHEPD9/2307.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/QIBWKG57/Kollovieh et al. - 2023 - Predict, Refine, Synthesize Self-Guiding Diffusio.pdf:application/pdf},
}
@misc{rasul_autoregressive_2021,
title = {Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting},
url = {http://arxiv.org/abs/2101.12072},
abstract = {In this work, we propose {\textbackslash}texttt\{{TimeGrad}\}, an autoregressive model for multivariate probabilistic time series forecasting which samples from the data distribution at each time step by estimating its gradient. To this end, we use diffusion probabilistic models, a class of latent variable models closely connected to score matching and energy-based methods. Our model learns gradients by optimizing a variational bound on the data likelihood and at inference time converts white noise into a sample of the distribution of interest through a Markov chain using Langevin sampling. We demonstrate experimentally that the proposed autoregressive denoising diffusion model is the new state-of-the-art multivariate probabilistic forecasting method on real-world data sets with thousands of correlated dimensions. We hope that this method is a useful tool for practitioners and lays the foundation for future research in this area.},
number = {{arXiv}:2101.12072},
publisher = {{arXiv}},
author = {Rasul, Kashif and Seward, Calvin and Schuster, Ingmar and Vollgraf, Roland},
urldate = {2023-10-15},
date = {2021-02-02},
eprinttype = {arxiv},
eprint = {2101.12072 [cs]},
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/8LIRWZ4G/2101.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/QPPFJVR5/Rasul et al. - 2021 - Autoregressive Denoising Diffusion Models for Mult.pdf:application/pdf},
}
@online{noauthor_spacy_nodate,
title = {{spaCy} · Industrial-strength Natural Language Processing in Python},
url = {https://spacy.io/},
abstract = {{spaCy} is a free open-source library for Natural Language Processing in Python. It features {NER}, {POS} tagging, dependency parsing, word vectors and more.},
urldate = {2023-10-17},
langid = {english},
file = {Snapshot:/Users/victormylle/Zotero/storage/8WWDDEH4/spacy.io.html:text/html},
}
@online{noauthor_intfloatmultilingual-e5-base_nodate,
title = {intfloat/multilingual-e5-base · Hugging Face},
url = {https://huggingface.co/intfloat/multilingual-e5-base},
abstract = {Were on a journey to advance and democratize artificial intelligence through open source and open science.},
urldate = {2023-10-17},
file = {Snapshot:/Users/victormylle/Zotero/storage/LYSDP8CD/multilingual-e5-base.html:text/html},
}
@online{noauthor_googleflan-t5-base_nodate,
title = {google/flan-t5-base · Hugging Face},
url = {https://huggingface.co/google/flan-t5-base},
urldate = {2023-10-17},
file = {flan-t5-base · Hugging Face:/Users/victormylle/Zotero/storage/284DLNVT/flan-t5-base.html:text/html},
}
@online{noauthor_openai_nodate,
title = {{OpenAI} Platform},
url = {https://platform.openai.com},
abstract = {Explore developer resources, tutorials, {API} docs, and dynamic examples to get the most out of {OpenAI}'s platform.},
urldate = {2023-10-17},
langid = {english},
file = {Snapshot:/Users/victormylle/Zotero/storage/9NFW3FCP/gpt-3-5.html:text/html},
}
@article{cramer_normalizing_2022,
title = {Normalizing flow-based day-ahead wind power scenario generation for profitable and reliable delivery commitments by wind farm operators},
volume = {166},
issn = {0098-1354},
url = {https://www.sciencedirect.com/science/article/pii/S0098135422002617},
doi = {10.1016/j.compchemeng.2022.107923},
abstract = {We present a specialized scenario generation method that utilizes forecast information to generate scenarios for day-ahead scheduling problems. In particular, we use normalizing flows to generate wind power scenarios by sampling from a conditional distribution that uses wind speed forecasts to tailor the scenarios to a specific day. We apply the generated scenarios in a stochastic day-ahead bidding problem of a wind electricity producer and analyze whether the scenarios yield profitable decisions. Compared to Gaussian copulas and Wasserstein-generative adversarial networks, the normalizing flow successfully narrows the range of scenarios around the daily trends while maintaining a diverse variety of possible realizations. In the stochastic day-ahead bidding problem, the conditional scenarios from all methods lead to significantly more stable profitable results compared to an unconditional selection of historical scenarios. The normalizing flow consistently obtains the highest profits, even for small sets scenarios.},
pages = {107923},
journaltitle = {Computers \& Chemical Engineering},
shortjournal = {Computers \& Chemical Engineering},
author = {Cramer, Eike and Paeleke, Leonard and Mitsos, Alexander and Dahmen, Manuel},
urldate = {2023-10-18},
date = {2022-10-01},
keywords = {Scenario generation, Stability, Stochastic programming, Wind power},
file = {ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/PT76E9DL/S0098135422002617.html:text/html;Submitted Version:/Users/victormylle/Zotero/storage/M9KFSG3M/Cramer et al. - 2022 - Normalizing flow-based day-ahead wind power scenar.pdf:application/pdf},
}
@inproceedings{zhang_diffusion_2021,
title = {Diffusion Normalizing Flow},
volume = {34},
url = {https://proceedings.neurips.cc/paper/2021/hash/876f1f9954de0aa402d91bb988d12cd4-Abstract.html},
abstract = {We present a novel generative modeling method called diffusion normalizing flow based on stochastic differential equations ({SDEs}). The algorithm consists of two neural {SDEs}: a forward {SDE} that gradually adds noise to the data to transform the data into Gaussian random noise, and a backward {SDE} that gradually removes the noise to sample from the data distribution. By jointly training the two neural {SDEs} to minimize a common cost function that quantifies the difference between the two, the backward {SDE} converges to a diffusion process the starts with a Gaussian distribution and ends with the desired data distribution. Our method is closely related to normalizing flow and diffusion probabilistic models, and can be viewed as a combination of the two. Compared with normalizing flow, diffusion normalizing flow is able to learn distributions with sharp boundaries. Compared with diffusion probabilistic models, diffusion normalizing flow requires fewer discretization steps and thus has better sampling efficiency. Our algorithm demonstrates competitive performance in both high-dimension data density estimation and image generation tasks.},
pages = {16280--16291},
booktitle = {Advances in Neural Information Processing Systems},
publisher = {Curran Associates, Inc.},
author = {Zhang, Qinsheng and Chen, Yongxin},
urldate = {2023-10-18},
date = {2021},
keywords = {{TODO}},
file = {Full Text PDF:/Users/victormylle/Zotero/storage/U45EUFZU/Zhang and Chen - 2021 - Diffusion Normalizing Flow.pdf:application/pdf},
}
@misc{rezende_variational_2016,
title = {Variational Inference with Normalizing Flows},
url = {http://arxiv.org/abs/1505.05770},
abstract = {The choice of approximate posterior distribution is one of the core problems in variational inference. Most applications of variational inference employ simple families of posterior approximations in order to allow for efficient inference, focusing on mean-field or other simple structured approximations. This restriction has a significant impact on the quality of inferences made using variational methods. We introduce a new approach for specifying flexible, arbitrarily complex and scalable approximate posterior distributions. Our approximations are distributions constructed through a normalizing flow, whereby a simple initial density is transformed into a more complex one by applying a sequence of invertible transformations until a desired level of complexity is attained. We use this view of normalizing flows to develop categories of finite and infinitesimal flows and provide a unified view of approaches for constructing rich posterior approximations. We demonstrate that the theoretical advantages of having posteriors that better match the true posterior, combined with the scalability of amortized variational approaches, provides a clear improvement in performance and applicability of variational inference.},
number = {{arXiv}:1505.05770},
publisher = {{arXiv}},
author = {Rezende, Danilo Jimenez and Mohamed, Shakir},
urldate = {2023-10-18},
date = {2016-06-14},
eprinttype = {arxiv},
eprint = {1505.05770 [cs, stat]},
note = {version: 6},
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Statistics - Machine Learning, Statistics - Computation, Statistics - Methodology},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/2J7MPVV5/1505.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/GQWIFAAN/Rezende and Mohamed - 2016 - Variational Inference with Normalizing Flows.pdf:application/pdf},
}
@misc{gruver_large_2023,
title = {Large Language Models Are Zero-Shot Time Series Forecasters},
url = {http://arxiv.org/abs/2310.07820},
doi = {10.48550/arXiv.2310.07820},
abstract = {By encoding time series as a string of numerical digits, we can frame time series forecasting as next-token prediction in text. Developing this approach, we find that large language models ({LLMs}) such as {GPT}-3 and {LLaMA}-2 can surprisingly zero-shot extrapolate time series at a level comparable to or exceeding the performance of purpose-built time series models trained on the downstream tasks. To facilitate this performance, we propose procedures for effectively tokenizing time series data and converting discrete distributions over tokens into highly flexible densities over continuous values. We argue the success of {LLMs} for time series stems from their ability to naturally represent multimodal distributions, in conjunction with biases for simplicity, and repetition, which align with the salient features in many time series, such as repeated seasonal trends. We also show how {LLMs} can naturally handle missing data without imputation through non-numerical text, accommodate textual side information, and answer questions to help explain predictions. While we find that increasing model size generally improves performance on time series, we show {GPT}-4 can perform worse than {GPT}-3 because of how it tokenizes numbers, and poor uncertainty calibration, which is likely the result of alignment interventions such as {RLHF}.},
number = {{arXiv}:2310.07820},
author = {Gruver, Nate and Finzi, Marc and Qiu, Shikai and Wilson, Andrew Gordon},
urldate = {2023-10-13},
date = {2023-10-11},
eprinttype = {arxiv},
eprint = {2310.07820 [cs]},
keywords = {Computer Science - Machine Learning},
file = {Gruver et al. - 2023 - Large Language Models Are Zero-Shot Time Series Forecasters.pdf:/Users/victormylle/Zotero/storage/T5XZ227W/Gruver et al. - 2023 - Large Language Models Are Zero-Shot Time Series Forecasters.pdf:application/pdf},
}
@article{sweidan_probabilistic_nodate,
title = {Probabilistic Prediction in scikit-learn},
abstract = {Adding confidence measures to predictive models should increase the trustworthiness, but only if the models are well-calibrated. Historically, some algorithms like logistic regression, but also neural networks, have been considered to produce well-calibrated probability estimates off-the-shelf. Other techniques, like decision trees and Naive Bayes, on the other hand, are infamous for being significantly overconfident in their probabilistic predictions. In this paper, a large experimental study is conducted to investigate how well-calibrated models produced by a number of algorithms in the scikit-learn library are out-of-the-box, but also if either the built-in calibration techniques Platt scaling and isotonic regression, or Venn-Abers, can be used to improve the calibration. The results show that of the seven algorithms evaluated, the only one obtaining well-calibrated models without the external calibration is logistic regression. All other algorithms, i.e., decision trees, adaboost, gradient boosting, {kNN}, naive Bayes and random forest benefit from using any of the calibration techniques. In particular, decision trees, Naive Bayes and the boosted models are substantially improved using external calibration. From a practitioners perspective, the obvious recommendation becomes to incorporate calibration when using probabilistic prediction. Comparing the different calibration techniques, Platt scaling and {VennAbers} generally outperform isotonic regression, on these rather small datasets. Finally, the unique ability of Venn-Abers to output not only well-calibrated probability estimates, but also the confidence in these estimates is demonstrated.},
author = {Sweidan, Dirar and Johansson, Ulf},
langid = {english},
file = {Sweidan and Johansson - Probabilistic Prediction in scikit-learn.pdf:/Users/victormylle/Zotero/storage/8LDMB83T/Sweidan and Johansson - Probabilistic Prediction in scikit-learn.pdf:application/pdf},
}
@article{baskan_scenario-based_2023,
title = {A Scenario-Based Model Comparison for Short-Term Day-Ahead Electricity Prices in Times of Economic and Political Tension},
volume = {16},
issn = {1999-4893},
url = {https://www.mdpi.com/1999-4893/16/4/177},
doi = {10.3390/a16040177},
abstract = {In recent years, energy prices have become increasingly volatile, making it more challenging to predict them accurately. This uncertain market trend behavior makes it harder for market participants, e.g., power plant dispatchers, to make reliable decisions. Machine learning ({ML}) has recently emerged as a powerful artificial intelligence ({AI}) technique to get reliable predictions in particularly volatile and unforeseeable situations. This development makes {ML} models an attractive complement to other approaches that require more extensive human modeling effort and assumptions about market mechanisms. This study investigates the application of machine and deep learning approaches to predict day-ahead electricity prices for a 7-day horizon on the German spot market to give power plants enough time to ramp up or down. A qualitative and quantitative analysis is conducted, assessing model performance concerning the forecast horizon and their robustness depending on the selected hyperparameters. For evaluation purposes, three test scenarios with different characteristics are manually chosen. Various models are trained, optimized, and compared with each other using common performance metrics. This study shows that deep learning models outperform tree-based and statistical models despite or because of the volatile energy prices.},
pages = {177},
number = {4},
journaltitle = {Algorithms},
shortjournal = {Algorithms},
author = {Baskan, Denis E. and Meyer, Daniel and Mieck, Sebastian and Faubel, Leonhard and Klöpper, Benjamin and Strem, Nika and Wagner, Johannes A. and Koltermann, Jan J.},
urldate = {2023-10-22},
date = {2023-03-24},
langid = {english},
file = {Baskan et al. - 2023 - A Scenario-Based Model Comparison for Short-Term D.pdf:/Users/victormylle/Zotero/storage/TU5JX5D4/Baskan et al. - 2023 - A Scenario-Based Model Comparison for Short-Term D.pdf:application/pdf},
}
@online{tsaprounis_metrics_2023,
title = {Metrics for Distributional Forecasts},
url = {https://medium.com/trusted-data-science-haleon/metrics-for-distributional-forecasts-60e156c60177},
abstract = {How to evaluate distributional/probabilistic time series forecasts in Python.},
titleaddon = {Trusted Data Science @ Haleon},
author = {Tsaprounis, Leonidas},
urldate = {2023-10-24},
date = {2023-02-27},
langid = {english},
}
@misc{roy_recent_2021,
title = {Recent Trends in Named Entity Recognition ({NER})},
url = {http://arxiv.org/abs/2101.11420},
doi = {10.48550/arXiv.2101.11420},
abstract = {The availability of large amounts of computer-readable textual data and hardware that can process the data has shifted the focus of knowledge projects towards deep learning architecture. Natural Language Processing, particularly the task of Named Entity Recognition is no exception. The bulk of the learning methods that have produced state-of-the-art results have changed the deep learning model, the training method used, the training data itself or the encoding of the output of the {NER} system. In this paper, we review significant learning methods that have been employed for {NER} in the recent past and how they came about from the linear learning methods of the past. We also cover the progress of related tasks that are upstream or downstream to {NER}, e.g., sequence tagging, entity linking, etc., wherever the processes in question have also improved {NER} results.},
number = {{arXiv}:2101.11420},
publisher = {{arXiv}},
author = {Roy, Arya},
urldate = {2023-10-24},
date = {2021-01-25},
eprinttype = {arxiv},
eprint = {2101.11420 [cs]},
keywords = {Computer Science - Computation and Language},
file = {arXiv Fulltext PDF:/Users/victormylle/Zotero/storage/AAZ3I43G/Roy - 2021 - Recent Trends in Named Entity Recognition (NER).pdf:application/pdf;arXiv.org Snapshot:/Users/victormylle/Zotero/storage/DWNPFLCX/2101.html:text/html},
}
@online{noauthor_sentencetransformers_nodate,
title = {{SentenceTransformers} Documentation — Sentence-Transformers documentation},
url = {https://www.sbert.net/},
urldate = {2023-10-29},
file = {SentenceTransformers Documentation — Sentence-Transformers documentation:/Users/victormylle/Zotero/storage/7ZPK2DIZ/www.sbert.net.html:text/html},
}
@online{noauthor_hugging_2023,
title = {Hugging Face The {AI} community building the future.},
url = {https://huggingface.co/},
abstract = {Were on a journey to advance and democratize artificial intelligence through open source and open science.},
urldate = {2023-10-29},
date = {2023-10-22},
file = {Snapshot:/Users/victormylle/Zotero/storage/8U9I2BD9/huggingface.co.html:text/html},
}
@misc{narayan_regularization_2021,
title = {Regularization Strategies for Quantile Regression},
url = {http://arxiv.org/abs/2102.05135},
abstract = {We investigate different methods for regularizing quantile regression when predicting either a subset of quantiles or the full inverse {CDF}. We show that minimizing an expected pinball loss over a continuous distribution of quantiles is a good regularizer even when only predicting a specific quantile. For predicting multiple quantiles, we propose achieving the classic goal of non-crossing quantiles by using deep lattice networks that treat the quantile as a monotonic input feature, and we discuss why monotonicity on other features is an apt regularizer for quantile regression. We show that lattice models enable regularizing the predicted distribution to a location-scale family. Lastly, we propose applying rate constraints to improve the calibration of the quantile predictions on specific subsets of interest and improve fairness metrics. We demonstrate our contributions on simulations, benchmark datasets, and real quantile regression problems.},
number = {{arXiv}:2102.05135},
publisher = {{arXiv}},
author = {Narayan, Taman and Wang, Serena and Canini, Kevin and Gupta, Maya},
urldate = {2023-11-14},
date = {2021-02-09},
eprinttype = {arxiv},
eprint = {2102.05135 [cs, stat]},
note = {version: 1},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Statistics - Methodology},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/DQZGHBIS/2102.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/W6WTUZQ3/Narayan et al. - 2021 - Regularization Strategies for Quantile Regression.pdf:application/pdf},
}
@misc{chung_beyond_2021,
title = {Beyond Pinball Loss: Quantile Methods for Calibrated Uncertainty Quantification},
url = {http://arxiv.org/abs/2011.09588},
shorttitle = {Beyond Pinball Loss},
abstract = {Among the many ways of quantifying uncertainty in a regression setting, specifying the full quantile function is attractive, as quantiles are amenable to interpretation and evaluation. A model that predicts the true conditional quantiles for each input, at all quantile levels, presents a correct and efficient representation of the underlying uncertainty. To achieve this, many current quantile-based methods focus on optimizing the so-called pinball loss. However, this loss restricts the scope of applicable regression models, limits the ability to target many desirable properties (e.g. calibration, sharpness, centered intervals), and may produce poor conditional quantiles. In this work, we develop new quantile methods that address these shortcomings. In particular, we propose methods that can apply to any class of regression model, allow for selecting a trade-off between calibration and sharpness, optimize for calibration of centered intervals, and produce more accurate conditional quantiles. We provide a thorough experimental evaluation of our methods, which includes a high dimensional uncertainty quantification task in nuclear fusion.},
number = {{arXiv}:2011.09588},
publisher = {{arXiv}},
author = {Chung, Youngseog and Neiswanger, Willie and Char, Ian and Schneider, Jeff},
urldate = {2023-12-14},
date = {2021-12-09},
eprinttype = {arxiv},
eprint = {2011.09588 [cs, stat]},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/WWFHI3UN/2011.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/SHMRZ3Q7/Chung et al. - 2021 - Beyond Pinball Loss Quantile Methods for Calibrat.pdf:application/pdf},
}
@misc{van_hautte_bad_2019,
title = {Bad Form: Comparing Context-Based and Form-Based Few-Shot Learning in Distributional Semantic Models},
url = {http://arxiv.org/abs/1910.00275},
shorttitle = {Bad Form},
abstract = {Word embeddings are an essential component in a wide range of natural language processing applications. However, distributional semantic models are known to struggle when only a small number of context sentences are available. Several methods have been proposed to obtain higher-quality vectors for these words, leveraging both this context information and sometimes the word forms themselves through a hybrid approach. We show that the current tasks do not suffice to evaluate models that use word-form information, as such models can easily leverage word forms in the training data that are related to word forms in the test data. We introduce 3 new tasks, allowing for a more balanced comparison between models. Furthermore, we show that hyperparameters that have largely been ignored in previous work can consistently improve the performance of both baseline and advanced models, achieving a new state of the art on 4 out of 6 tasks.},
number = {{arXiv}:1910.00275},
publisher = {{arXiv}},
author = {Van Hautte, Jeroen and Emerson, Guy and Rei, Marek},
urldate = {2024-03-09},
date = {2019-10-01},
eprinttype = {arxiv},
eprint = {1910.00275 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/PUBS4DRK/1910.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/VY5YGVXU/Van Hautte et al. - 2019 - Bad Form Comparing Context-Based and Form-Based F.pdf:application/pdf},
}
@misc{decorte_jobbert_2021-1,
title = {{JobBERT}: Understanding Job Titles through Skills},
url = {http://arxiv.org/abs/2109.09605},
shorttitle = {{JobBERT}},
abstract = {Job titles form a cornerstone of today's human resources ({HR}) processes. Within online recruitment, they allow candidates to understand the contents of a vacancy at a glance, while internal {HR} departments use them to organize and structure many of their processes. As job titles are a compact, convenient, and readily available data source, modeling them with high accuracy can greatly benefit many {HR} tech applications. In this paper, we propose a neural representation model for job titles, by augmenting a pre-trained language model with co-occurrence information from skill labels extracted from vacancies. Our {JobBERT} method leads to considerable improvements compared to using generic sentence encoders, for the task of job title normalization, for which we release a new evaluation benchmark.},
number = {{arXiv}:2109.09605},
publisher = {{arXiv}},
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Demeester, Thomas and Develder, Chris},
urldate = {2024-03-09},
date = {2021-09-20},
eprinttype = {arxiv},
eprint = {2109.09605 [cs]},
keywords = {Computer Science - Computation and Language},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/6FMYQ68Y/2109.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/2SD3P252/Decorte et al. - 2021 - JobBERT Understanding Job Titles through Skills.pdf:application/pdf},
}
@misc{van_hautte_leveraging_2020,
title = {Leveraging the Inherent Hierarchy of Vacancy Titles for Automated Job Ontology Expansion},
url = {http://arxiv.org/abs/2004.02814},
abstract = {Machine learning plays an ever-bigger part in online recruitment, powering intelligent matchmaking and job recommendations across many of the world's largest job platforms. However, the main text is rarely enough to fully understand a job posting: more often than not, much of the required information is condensed into the job title. Several organised efforts have been made to map job titles onto a hand-made knowledge base as to provide this information, but these only cover around 60{\textbackslash}\% of online vacancies. We introduce a novel, purely data-driven approach towards the detection of new job titles. Our method is conceptually simple, extremely efficient and competitive with traditional {NER}-based approaches. Although the standalone application of our method does not outperform a finetuned {BERT} model, it can be applied as a preprocessing step as well, substantially boosting accuracy across several architectures.},
number = {{arXiv}:2004.02814},
publisher = {{arXiv}},
author = {Van Hautte, Jeroen and Schelstraete, Vincent and Wornoo, Mikaël},
urldate = {2024-03-09},
date = {2020-04-06},
eprinttype = {arxiv},
eprint = {2004.02814 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/6FAKZYDM/2004.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/EY3RNC6S/Van Hautte et al. - 2020 - Leveraging the Inherent Hierarchy of Vacancy Title.pdf:application/pdf},
}
@misc{decorte_design_2022,
title = {Design of Negative Sampling Strategies for Distantly Supervised Skill Extraction},
url = {http://arxiv.org/abs/2209.05987},
abstract = {Skills play a central role in the job market and many human resources ({HR}) processes. In the wake of other digital experiences, today's online job market has candidates expecting to see the right opportunities based on their skill set. Similarly, enterprises increasingly need to use data to guarantee that the skills within their workforce remain future-proof. However, structured information about skills is often missing, and processes building on self- or manager-assessment have shown to struggle with issues around adoption, completeness, and freshness of the resulting data. Extracting skills is a highly challenging task, given the many thousands of possible skill labels mentioned either explicitly or merely described implicitly and the lack of finely annotated training corpora. Previous work on skill extraction overly simplifies the task to an explicit entity detection task or builds on manually annotated training data that would be infeasible if applied to a complete vocabulary of skills. We propose an end-to-end system for skill extraction, based on distant supervision through literal matching. We propose and evaluate several negative sampling strategies, tuned on a small validation dataset, to improve the generalization of skill extraction towards implicitly mentioned skills, despite the lack of such implicit skills in the distantly supervised data. We observe that using the {ESCO} taxonomy to select negative examples from related skills yields the biggest improvements, and combining three different strategies in one model further increases the performance, up to 8 percentage points in {RP}@5. We introduce a manually annotated evaluation benchmark for skill extraction based on the {ESCO} taxonomy, on which we validate our models. We release the benchmark dataset for research purposes to stimulate further research on the task.},
number = {{arXiv}:2209.05987},
publisher = {{arXiv}},
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Deleu, Johannes and Develder, Chris and Demeester, Thomas},
urldate = {2024-03-09},
date = {2022-09-13},
eprinttype = {arxiv},
eprint = {2209.05987 [cs]},
keywords = {Computer Science - Computation and Language},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/E79F2EV8/2209.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/SBEAYV66/Decorte et al. - 2022 - Design of Negative Sampling Strategies for Distant.pdf:application/pdf},
}
@misc{decorte_extreme_2023,
title = {Extreme Multi-Label Skill Extraction Training using Large Language Models},
url = {http://arxiv.org/abs/2307.10778},
abstract = {Online job ads serve as a valuable source of information for skill requirements, playing a crucial role in labor market analysis and e-recruitment processes. Since such ads are typically formatted in free text, natural language processing ({NLP}) technologies are required to automatically process them. We specifically focus on the task of detecting skills (mentioned literally, or implicitly described) and linking them to a large skill ontology, making it a challenging case of extreme multi-label classification ({XMLC}). Given that there is no sizable labeled (training) dataset are available for this specific {XMLC} task, we propose techniques to leverage general Large Language Models ({LLMs}). We describe a cost-effective approach to generate an accurate, fully synthetic labeled dataset for skill extraction, and present a contrastive learning strategy that proves effective in the task. Our results across three skill extraction benchmarks show a consistent increase of between 15 to 25 percentage points in {\textbackslash}textit\{R-Precision@5\} compared to previously published results that relied solely on distant supervision through literal matches.},
number = {{arXiv}:2307.10778},
publisher = {{arXiv}},
author = {Decorte, Jens-Joris and Verlinden, Severine and Van Hautte, Jeroen and Deleu, Johannes and Develder, Chris and Demeester, Thomas},
urldate = {2024-03-09},
date = {2023-07-20},
eprinttype = {arxiv},
eprint = {2307.10778 [cs]},
keywords = {Computer Science - Computation and Language},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/VVZZQW45/2307.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/8U7P43IE/Decorte et al. - 2023 - Extreme Multi-Label Skill Extraction Training usin.pdf:application/pdf},
}
@misc{decorte_career_2023,
title = {Career Path Prediction using Resume Representation Learning and Skill-based Matching},
url = {http://arxiv.org/abs/2310.15636},
abstract = {The impact of person-job fit on job satisfaction and performance is widely acknowledged, which highlights the importance of providing workers with next steps at the right time in their career. This task of predicting the next step in a career is known as career path prediction, and has diverse applications such as turnover prevention and internal job mobility. Existing methods to career path prediction rely on large amounts of private career history data to model the interactions between job titles and companies. We propose leveraging the unexplored textual descriptions that are part of work experience sections in resumes. We introduce a structured dataset of 2,164 anonymized career histories, annotated with {ESCO} occupation labels. Based on this dataset, we present a novel representation learning approach, {CareerBERT}, specifically designed for work history data. We develop a skill-based model and a text-based model for career path prediction, which achieve 35.24\% and 39.61\% recall@10 respectively on our dataset. Finally, we show that both approaches are complementary as a hybrid approach achieves the strongest result with 43.01\% recall@10.},
number = {{arXiv}:2310.15636},
publisher = {{arXiv}},
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Deleu, Johannes and Develder, Chris and Demeester, Thomas},
urldate = {2024-03-09},
date = {2023-10-24},
eprinttype = {arxiv},
eprint = {2310.15636 [cs]},
keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/I6AMKGVA/2310.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/AKTKCWBR/Decorte et al. - 2023 - Career Path Prediction using Resume Representation.pdf:application/pdf},
}
@online{noauthor_liberalised_nodate,
title = {The liberalised electricity market includes many parties who all have to work together and at the same time try to make a profit. An overview of the most...},
url = {https://www.next-kraftwerke.be/en/knowledge-hub/players-in-the-belgian-power-market/},
abstract = {The liberalised electricity market includes many parties who all have to work together and at the same time try to make a profit. An overview of the most...},
urldate = {2024-03-20},
file = {Snapshot:/Users/victormylle/Zotero/storage/M9XWVY6F/players-in-the-belgian-power-market.html:text/html},
}
@misc{ho_denoising_2020,
title = {Denoising Diffusion Probabilistic Models},
url = {http://arxiv.org/abs/2006.11239},
doi = {10.48550/arXiv.2006.11239},
abstract = {We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional {CIFAR}10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art {FID} score of 3.17. On 256x256 {LSUN}, we obtain sample quality similar to {ProgressiveGAN}. Our implementation is available at https://github.com/hojonathanho/diffusion},
number = {{arXiv}:2006.11239},
publisher = {{arXiv}},
author = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
urldate = {2024-04-02},
date = {2020-12-16},
eprinttype = {arxiv},
eprint = {2006.11239 [cs, stat]},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
file = {arXiv Fulltext PDF:/Users/victormylle/Zotero/storage/CYMHCMUT/Ho et al. - 2020 - Denoising Diffusion Probabilistic Models.pdf:application/pdf;arXiv.org Snapshot:/Users/victormylle/Zotero/storage/CE8R84V5/2006.html:text/html},
}
@inproceedings{dumas_probabilistic_2019,
title = {Probabilistic Forecasting of Imbalance Prices in the Belgian Context},
url = {http://arxiv.org/abs/2106.07361},
doi = {10.1109/EEM.2019.8916375},
abstract = {Forecasting imbalance prices is essential for strategic participation in the short-term energy markets. A novel two-step probabilistic approach is proposed, with a particular focus on the Belgian case. The first step consists of computing the net regulation volume state transition probabilities. It is modeled as a matrix computed using historical data. This matrix is then used to infer the imbalance prices since the net regulation volume can be related to the level of reserves activated and the corresponding marginal prices for each activation level are published by the Belgian Transmission System Operator one day before electricity delivery. This approach is compared to a deterministic model, a multi-layer perceptron, and a widely used probabilistic technique, Gaussian Processes.},
pages = {1--7},
booktitle = {2019 16th International Conference on the European Energy Market ({EEM})},
author = {Dumas, Jonathan and Boukas, Ioannis and de Villena, Miguel Manuel and Mathieu, Sébastien and Cornélusse, Bertrand},
urldate = {2024-04-17},
date = {2019-09},
eprinttype = {arxiv},
eprint = {2106.07361 [cs, eess, q-fin]},
keywords = {Computer Science - Machine Learning, Electrical Engineering and Systems Science - Signal Processing, Quantitative Finance - Statistical Finance},
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/3N56FPYP/2106.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/958MBH5M/Dumas et al. - 2019 - Probabilistic Forecasting of Imbalance Prices in t.pdf:application/pdf},
}
@online{noauthor_ghent_nodate,
title = {Ghent University: Master of Science in Computer Science Engineering},
url = {https://studiekiezer.ugent.be/2024/master-of-science-in-computer-science-engineering-en},
urldate = {2024-04-17},
file = {master-of-science-in-computer-science-engineering-en:/Users/victormylle/Zotero/storage/JCELQ9VV/master-of-science-in-computer-science-engineering-en.html:text/html},
}
@article{gunduz_transfer_2023,
title = {Transfer learning for electricity price forecasting},
volume = {34},
issn = {2352-4677},
url = {https://www.sciencedirect.com/science/article/pii/S2352467723000048},
doi = {10.1016/j.segan.2023.100996},
abstract = {Electricity price forecasting is an essential task in all the deregulated markets of the world. The accurate prediction of day-ahead electricity prices is an active research field and available data from various markets can be used as input for forecasting. A collection of models have been proposed for this task, but the fundamental question on how to use the available big data is often neglected. In this paper, we propose to use transfer learning as a tool for utilizing information from other electricity price markets for forecasting. We pre-train a neural network model on source markets and finally do a fine-tuning for the target market. Moreover, we test different ways to use the rich input data from various electricity price markets to forecast 24 steps ahead in hourly frequency. Our experiments on four different day-ahead markets indicate that transfer learning improves the electricity price forecasting performance in a statistically significant manner. Furthermore, we compare our results with state-of-the-art methods in a rolling window scheme to demonstrate the performance of the transfer learning approach. Our method improves the performance of the state-of-the-art algorithms by 7\% for the French market and 3\% for the German market.},
pages = {100996},
journaltitle = {Sustainable Energy, Grids and Networks},
shortjournal = {Sustainable Energy, Grids and Networks},
author = {Gunduz, Salih and Ugurlu, Umut and Oksuz, Ilkay},
urldate = {2024-04-17},
date = {2023-06-01},
keywords = {Artificial neural networks, Electricity price forecasting, Market integration, Transfer learning},
file = {ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/BWI5FHS4/S2352467723000048.html:text/html;Submitted Version:/Users/victormylle/Zotero/storage/62FHBWJ8/Gunduz et al. - 2023 - Transfer learning for electricity price forecastin.pdf:application/pdf},
}
@article{lago_forecasting_2018,
title = {Forecasting spot electricity prices: Deep learning approaches and empirical comparison of traditional algorithms},
volume = {221},
issn = {0306-2619},
url = {https://www.sciencedirect.com/science/article/pii/S030626191830196X},
doi = {10.1016/j.apenergy.2018.02.069},
shorttitle = {Forecasting spot electricity prices},
abstract = {In this paper, a novel modeling framework for forecasting electricity prices is proposed. While many predictive models have been already proposed to perform this task, the area of deep learning algorithms remains yet unexplored. To fill this scientific gap, we propose four different deep learning models for predicting electricity prices and we show how they lead to improvements in predictive accuracy. In addition, we also consider that, despite the large number of proposed methods for predicting electricity prices, an extensive benchmark is still missing. To tackle that, we compare and analyze the accuracy of 27 common approaches for electricity price forecasting. Based on the benchmark results, we show how the proposed deep learning models outperform the state-of-the-art methods and obtain results that are statistically significant. Finally, using the same results, we also show that: (i) machine learning methods yield, in general, a better accuracy than statistical models; (ii) moving average terms do not improve the predictive accuracy; (iii) hybrid models do not outperform their simpler counterparts.},
pages = {386--405},
journaltitle = {Applied Energy},
shortjournal = {Applied Energy},
author = {Lago, Jesus and De Ridder, Fjo and De Schutter, Bart},
urldate = {2024-04-17},
date = {2018-07-01},
keywords = {Deep learning, Electricity price forecasting, Benchmark study},
file = {Full Text:/Users/victormylle/Zotero/storage/SZAAF5RK/Lago et al. - 2018 - Forecasting spot electricity prices Deep learning.pdf:application/pdf;ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/5JH9JLSM/S030626191830196X.html:text/html},
}
@article{weron_electricity_2014,
title = {Electricity price forecasting: A review of the state-of-the-art with a look into the future},
volume = {30},
issn = {0169-2070},
url = {https://www.sciencedirect.com/science/article/pii/S0169207014001083},
doi = {10.1016/j.ijforecast.2014.08.008},
shorttitle = {Electricity price forecasting},
abstract = {A variety of methods and ideas have been tried for electricity price forecasting ({EPF}) over the last 15 years, with varying degrees of success. This review article aims to explain the complexity of available solutions, their strengths and weaknesses, and the opportunities and threats that the forecasting tools offer or that may be encountered. The paper also looks ahead and speculates on the directions {EPF} will or should take in the next decade or so. In particular, it postulates the need for objective comparative {EPF} studies involving (i) the same datasets, (ii) the same robust error evaluation procedures, and (iii) statistical testing of the significance of one models outperformance of another.},
pages = {1030--1081},
number = {4},
journaltitle = {International Journal of Forecasting},
shortjournal = {International Journal of Forecasting},
author = {Weron, Rafał},
urldate = {2024-05-02},
date = {2014-10-01},
keywords = {Autoregression, Day-ahead market, Electricity price forecasting, Factor model, Forecast combination, Neural network, Probabilistic forecast, Seasonality},
file = {ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/DDGF263F/S0169207014001083.html:text/html},
}
@article{poggi_electricity_2023,
title = {Electricity Price Forecasting via Statistical and Deep Learning Approaches: The German Case},
volume = {3},
rights = {http://creativecommons.org/licenses/by/3.0/},
issn = {2673-9909},
url = {https://www.mdpi.com/2673-9909/3/2/18},
doi = {10.3390/appliedmath3020018},
shorttitle = {Electricity Price Forecasting via Statistical and Deep Learning Approaches},
abstract = {Our research involves analyzing the latest models used for electricity price forecasting, which include both traditional inferential statistical methods and newer deep learning techniques. Through our analysis of historical data and the use of multiple weekday dummies, we have proposed an innovative solution for forecasting electricity spot prices. This solution involves breaking down the spot price series into two components: a seasonal trend component and a stochastic component. By utilizing this approach, we are able to provide highly accurate predictions for all considered time frames.},
pages = {316--342},
number = {2},
journaltitle = {{AppliedMath}},
author = {Poggi, Aurora and Di Persio, Luca and Ehrhardt, Matthias},
urldate = {2024-05-02},
date = {2023-06},
langid = {english},
note = {Number: 2
Publisher: Multidisciplinary Digital Publishing Institute},
keywords = {autoregressive, deep learning, electricity price forecasting, machine learning, neural network, statistical method, univariate model},
file = {Full Text PDF:/Users/victormylle/Zotero/storage/3IR29RU3/Poggi et al. - 2023 - Electricity Price Forecasting via Statistical and .pdf:application/pdf},
}

View File

@@ -0,0 +1,192 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {2}Electricity market}{3}{section.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Overview of the most important parties in the electricity market\relax }}{3}{table.caption.1}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{tab:parties}{{1}{3}{Overview of the most important parties in the electricity market\relax }{table.caption.1}{}}
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Prices paid by the BRPs\relax }}{5}{table.caption.2}\protected@file@percent }
\newlabel{tab:imbalance_price}{{2}{5}{Prices paid by the BRPs\relax }{table.caption.2}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Generative modeling}{5}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Quantile Regression}{6}{subsection.3.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Example of quantiles\relax }}{6}{figure.caption.3}\protected@file@percent }
\newlabel{fig:quantile_example}{{1}{6}{Example of quantiles\relax }{figure.caption.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Example of quantile regression output for one-quarter of the NRV, showing interpolated values for quantiles at 1\%, 5\%, 10\%, 15\%, 30\%, 40\%, 50\%, 60\%, 70\%, 85\%, 90\%, 95\%, and 99\%. These quantiles are used to reconstruct the cumulative distribution function.\relax }}{7}{figure.caption.4}\protected@file@percent }
\newlabel{fig:quantile_regression_example}{{2}{7}{Example of quantile regression output for one-quarter of the NRV, showing interpolated values for quantiles at 1\%, 5\%, 10\%, 15\%, 30\%, 40\%, 50\%, 60\%, 70\%, 85\%, 90\%, 95\%, and 99\%. These quantiles are used to reconstruct the cumulative distribution function.\relax }{figure.caption.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Autoregressive vs Non-Autoregressive models}{8}{subsection.3.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Model Types}{9}{subsection.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.1}Linear Model}{9}{subsubsection.3.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.2}Non-Linear Model}{10}{subsubsection.3.3.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.3.3}Recurrent Neural Network (RNN)}{10}{subsubsection.3.3.3}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces RNN model input and output visualization\relax }}{11}{figure.caption.5}\protected@file@percent }
\newlabel{fig:rnn_model_visualization}{{3}{11}{RNN model input and output visualization\relax }{figure.caption.5}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Diffusion models}{11}{subsection.3.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.1}Overview}{12}{subsubsection.3.4.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.2}Applications}{12}{subsubsection.3.4.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Example of the diffusion process. The image of a cat is generated by starting from noise and iteratively denoising the image.\relax }}{12}{figure.caption.6}\protected@file@percent }
\newlabel{fig:diffusion_example}{{4}{12}{Example of the diffusion process. The image of a cat is generated by starting from noise and iteratively denoising the image.\relax }{figure.caption.6}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.3}Generation process}{12}{subsubsection.3.4.3}\protected@file@percent }
\newlabel{fig:diffusion_process}{{\caption@xref {fig:diffusion_process}{ on input line 281}}{14}{Generation process}{figure.caption.7}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}Evaluation}{14}{subsection.3.5}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Visualization of the CRPS metric\relax }}{16}{figure.caption.8}\protected@file@percent }
\newlabel{fig:crps_visualization}{{5}{16}{Visualization of the CRPS metric\relax }{figure.caption.8}{}}
\@setckpt{sections/background}{
\setcounter{page}{17}
\setcounter{equation}{7}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{section}{3}
\setcounter{subsection}{5}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{5}
\setcounter{table}{2}
\setcounter{parentequation}{0}
\setcounter{float@type}{4}
\setcounter{caption@flags}{2}
\setcounter{continuedfloat}{0}
\setcounter{subfigure}{0}
\setcounter{subtable}{0}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{0}
\setcounter{maxnames}{2}
\setcounter{minnames}{1}
\setcounter{maxitems}{999}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{blx@maxsection}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{narrator}{0}
\setcounter{savednarrator}{0}
\setcounter{execproducer}{0}
\setcounter{savedexecproducer}{0}
\setcounter{execdirector}{0}
\setcounter{savedexecdirector}{0}
\setcounter{with}{0}
\setcounter{savedwith}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{citation}{0}
\setcounter{savedcitation}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{section@level}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{14}
\setcounter{g@acro@QR@int}{0}
\setcounter{g@acro@AQR@int}{0}
\setcounter{g@acro@NAQR@int}{0}
\setcounter{g@acro@MSE@int}{0}
\setcounter{g@acro@MAE@int}{0}
\setcounter{g@acro@CRPS@int}{0}
\setcounter{g@acro@NRV@int}{0}
\setcounter{g@acro@PV@int}{0}
\setcounter{g@acro@NP@int}{0}
}

View File

@@ -0,0 +1,328 @@
% Achtergrond informatie
% Generatief modelleren
% -> enkel forecast is vaak brak -> reinforcement learning is lastig -> generatief modelleren, veel generaties om mee te trainen
% - Achtergrond electrititetismarkt
% - Achtergrond Generatief modelleren (van NRV)
% - TODO: Achtergrond RNN?
\section{Electricity market}
The electricity market consists of many different parties who all work together and want to make a profit in the end. An overview of the most important parties can be found in Table \ref{tab:parties}.
% table
\begin{table}[h]
\centering
\begin{tabularx}{\textwidth}{|C|C|}
\hline
\textbf{Party} & \textbf{Description} \\
\hline
Producers & Generates electricty. The electricity can be generated using coal, nuclear energy, wind parks etc. \\
\hline
Consumers & Uses electricity. This can be normal households, companies but also industry. \\
\hline
Transmission system operator (TSO) & Party responsible for reliable transmission of electricity from generation plants to local distribution networks. This is done over the high-voltage grid. In Belgium, this party is Elia.\\
\hline
Distribution system operator (DSO) & Party responsible for the distribution of electricity to the end users. Here, the electricity is transported over the low-voltage grid. \\
\hline
Balancing responsible party (BRP) & These parties forecast the electricity consumption and generation of their clients. They make balanced nominations to Elia.
\\
\hline
Balancing Service Provider (BSP) & Parties that provide the TSO (Elia) with balancing services. They submit Balancing Energy Bids to Elia. If needed, they will provide balancing energy at a set price. \\
\hline
\end{tabularx}
\caption{Overview of the most important parties in the electricity market}
\label{tab:parties}
\end{table}
Elia, the Transmission system operator (TSO) in Belgium is responsible for keeping the grid stable. They do this by balancing the electricity consumption and generation. If there is an imbalance, Elia will use reserves to balance the grid. These reserves are expensive and are paid by the market participants. The prices paid for the activations of these reserves is called the imbalance price. Keeping the grid balanced is a very important but also a very difficult task. If the grid is not balanced, it can lead to blackouts but also other problems like damage to equipment and so on.
Balance responsible parties (BRPs) forecast the electricity consumption and generation of their portfolio to effectively manage the balance between supply and demand within the grid they operate in. They submit a daily balance schedule for their portfolio the day before to the transmission system operator. This consists of the expected physical injections and offtakes from the grid and the commercial power trades. The power trades can be purchases and sales between BRPs or they can even be trades with other countries. BRPs must provide and deploy all reasonable resources to be balanced on a quarter-hourly basis. They can exchange electricity with other BRPs for the following day or the same day. There is one exception where a BRP can deviate from the balance schedule. This is when the grid is not balanced and they can help Elia to stabilize the grid. In this case, they will receive a compensation for their help. When a BRP deviates from the balance schedule in a way that destabilizes the grid, it will need to pay the imbalance price for the deviation.
The imbalance price is determined based on which reserves Elia needs to activate to stabilize the grid. The imbalance of a BRP is the quarter-hourly difference between total injections and offtakes from the grid. The Net Regulation Volume (NRV) is the net control volume of energy that Elia applies to maintain balance in the Elia control area. The Area Control Error is the current difference between the scheduled values and the actual values of power exchanged in the Belgian control area. The imbalance of the system (SI) is the Area Control Error minus the NRV. Using the System Imbalance, the imbalance price is calculated.
Elia, the Transmission System Operator (TSO) in Belgium, maintains grid stability by activating three types of reserves, each designed to address specific conditions of imbalance. These reserves are crucial for ensuring that the electricity supply continuously meets the demand, thereby maintaining the frequency within the required operational limits. The reserves include:
1) \textbf{Frequency Containment Reserve (FCR)} \\
FCR is a reserve that responds automatically to frequency deviations in the grid. The reserve responds automatically in seconds and provides a proportional response to the frequency deviation. Elia must provide a minimal share of this volume within the Belgian control area. This type of volume can also be offered by the BSPs.
2) \textbf{Automatic Frequency Restoration Process (aFRR)} \\
aFRR is the second reserve that Elia can activate to restore the frequency to 50Hz. The aFRR is activated when the FCR is not sufficient to restore the frequency. Every 4 seconds, Elia sends a set-point to the BSPs. The BSPs use this set-point to adjust their production or consumption. The BSPs have a 7.5-minute window to activate the full requested energy volume.
3) \textbf{Manual Frequency Restoration (mFRR)} \\
Sometimes the FCR and aFRR are not enough to restore the imbalance between generation and consumption. Elia activates the mFRR manually and the requested energy volume is to be activated in 15 minutes.
The order in which the reserves are activated is as follows: FCR, aFRR and mFRR. BSPs provide bids for the aFRR and mFRR volumes. The provided bids consist of the type (aFRR or mFRR), bid volume (MW), bid price (per MWh) and start price (per MWh).
The start price is used to cover the costs of starting a unit.
Elia selects the bids based on the order of activation and then the price. The highest marginal price paid for upward or downward activation determines the imbalance price. This means that the last bid that is activated determines the imbalance price. This price is paid by the BRPs that are not balanced. The imbalance price calculation is shown in Table \ref{tab:imbalance_price}.
\begin{table}[h]
\centering
\begin{tabular}{|c|c|c|}
\hline
& \multicolumn{2}{c|}{\textbf{System Imbalance}} \\
\cline{2-3}
\textbf{Imbalance of the balance responsible party} & \textbf{Positive} & \textbf{Negative or zero} \\
\hline
\textbf{Positive} & MDP - \(\alpha\) & MIP + \(\alpha\) \\
\hline
\textbf{Negative} & MDP - \(\alpha\) & MIP + \(\alpha\) \\
\hline
\end{tabular}
\caption{Prices paid by the BRPs}
\label{tab:imbalance_price}
\end{table}
The imbalance price calculation includes the following variables: \\
- MDP: Marginal price of downward activation \\
- MIP: Marginal price of upward activation \\
- \(\alpha\): Extra parameter dependent on System Imbalance \\
\\
TODO: Add more information about the imbalance price calculation, alpha?
The imbalance price can be reconstructed given the bids of a certain quarter/day and the System Imbalance. During this thesis, the system imbalance is assumed to be almost the same as the Net Regulation Volume. This is a simplification but it is a good approximation. The goal of this thesis is to model the Net Regulation Volume which can then be used to reconstruct the imbalance price and to make decisions on when to buy or sell electricity.
\section{Generative modeling}
Simple forecasting of the NRV is often not accurate and defining a policy using this forecast will lead to wrong decisions. A better method would be to try to model the NRV and sample multiple generations of the NRV for a whole day. This can give a better understanding of the uncertainty of the NRV. Better decisions can then be made based on multiple generations of the NRV.
Generative modeling is a type of machine learning that is used to generate new data samples that look like the training data. The goal of generative modeling is to learn the true data distribution and use this distribution to generate new samples. Generative modeling is used in many different fields including image generation, text generation etc.
In this thesis, generative modeling can be used to model the NRV of the Belgian electricity market using different conditional input features like the weather, the load forecast etc. The model can then be used to generate new samples of the NRV.
There exist many different types of generative models. Some of the most popular ones are:
\begin{itemize}
\item Generative Adversarial Networks (GANs)
\item Variational Autoencoders (VAEs)
\item Normalizing Flows
\item Diffusion models
\end{itemize}
\subsection{Quantile Regression}
Another method can be used to use any feedforward neural network as a generative model. This method is called quantile regression. This method enables the model to output values to reconstruct the distribution of the target variable instead of a single value for a quarter. This distribution can then be used to sample the NRV value for a quarter. The sampling allows for multiple full-day generations of the NRV.
When quantile regression is used, the model outputs the values for multiple quantiles for the target value of a certain quarter. A quantile is a statistical value of a random variable below which a certain proportion of observations fall. Figure \ref{fig:quantile_example} shows the cumulative distribution function of a normal distribution. The figure shows the 25th, 50th and 75th quantiles. The 25th quantile is the value below which 25\% of the observations fall. In the example, this value is -0.67. The other quantiles work in the same way.
\begin{figure}[H]
\centering
\includegraphics[width=0.8\textwidth]{images/quantile_regression/cdf_quantiles_example.png}
\caption{Example of quantiles}
\label{fig:quantile_example}
\end{figure}
Using the outputted quantiles, the cumulative distribution function can be reconstructed and used to sample the NRV value for the quarter to predict. An example of the output of a quantile regression model is shown in figure \ref{fig:quantile_regression_example}. The output values of the different quantiles are plotted and these are interpolated to get the cumulative distribution function. In this thesis, the quantiles used are 1\%, 5\%, 10\%, 15\%, 30\%, 40\%, 50\%, 60\%, 70\%, 85\%, 90\%, 95\%, and 99\%. These are chosen to get a good approximation of the cumulative distribution function. More quantiles at the tails of the distribution are used because the edges of the distribution are more important for the imbalance price calculation. The outputted quantile values are then interpolated using cubic spline and samples can be drawn from the reconstructed cumulative distribution function.
TODO: figure goes under 0, maybe use other values or other interpolation? + inverse the values to real values
\begin{figure}[H]
\centering
\includegraphics[width=0.8\textwidth]{images/quantile_regression/reconstructed_cdf.png}
\caption{Example of quantile regression output for one-quarter of the NRV, showing interpolated values for quantiles at 1\%, 5\%, 10\%, 15\%, 30\%, 40\%, 50\%, 60\%, 70\%, 85\%, 90\%, 95\%, and 99\%. These quantiles are used to reconstruct the cumulative distribution function.}
\label{fig:quantile_regression_example}
\end{figure}
The NRV value for a quarter can be sampled from the reconstructed cumulative distribution function. A full-day prediction for the NRV exists of 96 values. This means 96 cumulative distributions need to be reconstructed and samples need to be drawn from each of the distributions.
The quantile regression model is trained using the pinball loss function, also known as the quantile loss. The model outputs the quantile values for the NRV. The quantile values themselves are not available in the training data. Only the real NRV values are known. The loss function is defined as:
\begin{equation}
L_\tau(y, \hat{y}) = \begin{cases}
\tau(y - \hat{y}) & \text{if } y \geq \hat{y} \\
(1 - \tau)(\hat{y} - y) & \text{if } y < \hat{y}
\end{cases}
\end{equation}
\begin{align*}
\textbf{Where:} \\
\tau & = \text{Quantile of interest} \\
y & = \text{Actual observed value of NRV} \\
\hat{y} & = \text{Predicted quantile value of NRV} \\
\end{align*}
The loss function works by penalizing underestimation and overestimation differently. When a quantile is predicted that is lower than or equal to the actual value, the loss is calculated as the difference between the actual value and the predicted quantile value multiplied by the quantile of interest. This means that underestimations for high quantiles are penalized higher than for lower quantiles.
When the quantile value prediction is higher than the real NRV value, the loss is calculated as the difference between the predicted quantile value and the real NRV multiplied by $(1-\tau)$. This means that overestimations are penalized less for high quantiles of interest.
\begin{equation}
L = \frac{1}{N} \sum_{i=1}^{N} \sum_{\tau \in T} L_\tau(y_i, \hat{y}_i)
\end{equation}
\begin{align*}
\textbf{Where:} \\
N & = \text{Number of samples} \\
T & = \text{Quantiles of interest} \\
y_i & = \text{Actual observed value of NRV for sample i} \\
\hat{y}_i & = \text{Predicted quantile value of NRV for sample i} \\
\end{align*}
To calculate the pinball loss, the mean over the quantiles of interest and samples need to be taken. This gives a scalar loss value which can be used to do backpropagation. The lower this value, the better the NRV distribution is modeled.
\subsection{Autoregressive vs Non-Autoregressive models}
Two types of generative models exist, autoregressive and non-autoregressive models. Autoregressive models generate samples by sampling from the model one step at a time. The model generates the next value based on the previous values. This means that the model generates samples sequentially. Non-autoregressive models on the other hand generate samples in one step. The model generates the whole sample existing of multiple values at once. This means that the model can generate samples in parallel which can be done way faster than autoregressive models. The downside of non-autoregressive models is that the model itself is more complex and harder to train. It needs to predict all values at once which can be harder than predicting one value at a time.
The quantile regression method can be used with both types of models. The autoregressive model will only output the quantiles for the next quarter based on the given input features. The cumulative distribution function can be reconstructed from these and be used to sample the NRV value. To obtain a full-day sample, the model needs to be run 96 times sequentially. The sample for the next quarter depends on the sample of the previous quarter.
The non-autoregressive model will output the quantiles for all quarters of the day based on the input features. The cumulative distribution functions all need to be reconstructed and samples can be drawn from each of the distributions. When sampling from the distributions at once, the samples are independent of each other. The sample for the next quarter does not depend on the sample of the previous quarter which can result in some unrealistic samples.
The input features for autoregressive and non-autoregressive also differ. When forecasted features are used, the autoregressive model only uses the forecasted values for the next quarter while the non-autoregressive model uses the forecasted values for all quarters of the day. In theory, the autoregressive model should also be able to use forecasted values for quarters further in the future but this makes it harder to use in practice. When the last quarter of a day needs to be predicted, the forecasted values for the next day are needed which are not available. For simplicity, during this thesis, the autoregressive model will only be provided with the forecasted values for the next quarter.
\subsection{Model Types}
\subsubsection{Linear Model}
A simple linear model can be used as a baseline to compare the more complex models. This model assumes a linear relation exists between the input features and the output. The relationship is modeled using the following formula:
\begin{equation}
y = \beta_0 + \beta_1 x_1 + \beta_2 x_2 + ... + \beta_n x_n
\end{equation}
\begin{align*}
\textbf{Where:} \\
y & = \text{Output value} \\
\beta_0 & = \text{Intercept} \\
\beta_1, ..., \beta_n & = \text{Coefficients} \\
x_1, ..., x_n & = \text{Input features} \\
\end{align*}
This model needs to be adapted to be used for quantile regression. The model needs to output the quantiles for the target value. This can be done by training multiple linear models for each of the quantiles. The model can be trained using the pinball loss function. The number of parameters in this model is quite low which makes it easier and faster to train. The downside of this model is that it is very simple and might not be able to capture the complexity of the data. The number of parameters of this model is $\text{number of quantiles} \times (\text{number of input features} + 1)$.
\begin{equation}
\hat{y}_\tau = \beta_{0, \tau} + \beta_{1, \tau} x_1 + \beta_{2, \tau} x_2 + ... + \beta_{n, \tau} x_n
\end{equation}
\begin{align*}
\textbf{Where:} \\
\tau & = \text{Quantile of interest} \\
\hat{y}_\tau & = \text{Predicted quantile value for the target value} \\
\beta_{0, \tau} & = \text{Intercept for the quantile of interest} \\
\beta_{1, \tau}, ..., \beta_{n, \tau} & = \text{Coefficients for the quantile of interest} \\
x_1, ..., x_n & = \text{Input features} \\
\end{align*}
\subsubsection{Non-Linear Model}
A more complex model can be used to model the NRV. A feedforward neural network with multiple hidden layers and activation functions can be used. This model can then capture the non-linear relationships between the input features and the output. This model has more parameters and is harder to train than the linear model. The non-linear model also has some hyperparameters that need to be chosen like the number of hidden layers, the number of neurons in each layer, the activation function etc. The model can be trained to output the quantiles for the NRV based on the input features. The same pinball loss function can be used to train the model.
\subsubsection{Recurrent Neural Network (RNN)}
Another more complex model that can be used is a Recurrent Neural Network (RNN). The RNN can be used to model the NRV data because of the sequential nature of the input features. The RNN keeps a hidden state that is updated at every time step using the new input data. The hidden state contains information about the previous time steps and can be used to make predictions for the next time step. These models are used in multiple fields like natural language processing, time series forecasting etc.
The RNN model can be used to model the NRV data. The input features are structured in a way that the model can learn the sequential patterns in the data. The model can be trained to output the quantiles for the NRV based on the input features using the pinball loss function.
Multiple types of RNN models exist. The two most common types of RNNs are the Long Short-Term Memory (LSTM) and the Gated Recurrent Unit (GRU). The GRU is a simpler version of the LSTM. The GRU has fewer parameters which results in faster training times. The GRU still can capture long-term dependencies in the data and can achieve similar performance to the LSTM. The GRU model has two gates, the reset gate and the update gate. The reset gate determines how much of the past information to forget, and the update gate determines how much of the new information to keep.
\begin{figure}[H]
\centering
\includegraphics[width=0.8\textwidth]{images/quantile_regression/rnn/RNN_diagram.png}
\caption{RNN model input and output visualization}
\label{fig:rnn_model_visualization}
\end{figure}
The input features for the RNN model are carefully structured to capture the relevant information from the previous quarters and the forecasted values. Each input feature vector represents a quarter and consists of the following components:
\begin{itemize}
\item The actual NRV value from the current quarter (T-1), which provides the model with the historical context of the NRV.
\item The forecasted or real values for the next quarter (T), including load, PV, wind, and net position. If the next quarter is not the quarter to predict, the real values for that quarter are used. If the next quarter is the quarter to predict, the forecasted values are used.
\item A quarter embedding vector representing the current quarter (T-1). The embedding vector gives the model information about the time of day, which can help it learn the daily patterns in the NRV data.
\end{itemize}
The input feature structure is designed to provide the model with a comprehensive view of the previous quarters and the forecasted values for the current quarter. By incorporating both historical and forecasted information sequentially, the model can learn to predict the NRV quantiles for the next quarter more accurately.
\subsection{Diffusion models}
TODO: reference the paper
The "Denoising Diffusion Probabilistic Models" (DDPM)
\subsubsection{Overview}
Diffusion models are a type of probabilistic model designed to generate high-quality, diverse samples from complex data distributions. The way this type of model is trained is unique. The model is trained to reverse an iterative noise process that is applied to the data. This process is called the diffusion process. The model denoises the data in each iteration. During the training, the model learns to reverse the diffusion process. A training sample is transformed into a noise sample by applying the diffusion process. The model is then trained to recover the original sample from the noise sample. The model is trained to maximize the likelihood of the data given the noise. By doing this, the model learns to generate samples from the data distribution. Starting from the noise, the model can generate samples that look like the data. The model can also be conditioned on additional information to generate samples that follow other distributions.
\subsubsection{Applications}
Diffusion models gained popularity in the field of computer vision. They are used for inpainting, super-resolution, image generation, image editing etc. The paper introducing "Denoising Diffusion Probabilistic Models" (DDPM) showed that diffusion models can achieve state-of-the-art results in image generation. This type of model was then applied to other fields like text generation, audio generation etc. The most popular application of diffusion models is still image generation. Many different models and products exist that make use of diffusion models to generate images. Some examples are DALL·E, Stable Diffusion, Midjourney, etc. These models can generate or edit images based on a given text description.
This method can also be applied to other fields like audio generation, text generation etc. In this thesis, diffusion models are explored to model time series data conditioned on additional information. A small example of the diffusion process is shown in Figure \ref{fig:diffusion_example}. An image of a cat is generated by starting from noise and iteratively denoising the image.
\begin{figure}[H]
\centering
\includegraphics[width=0.8\textwidth]{images/diffusion/Generation-with-Diffusion-Models.png}
\caption{Example of the diffusion process. The image of a cat is generated by starting from noise and iteratively denoising the image.}
\label{fig:diffusion_example}
\end{figure}
\subsubsection{Generation process}
The generation process is quite different in comparison to other models. For example, GANs and VAE generate samples by sampling from a noise distribution and then transforming the noise into a sample that looks like the training data in one step using a generator network. Diffusion models generate samples by starting from a noise distribution and then applying a series of denoising steps to the noise. The diffusion process consists of 3 main components: the forward process, the reverse process and the sampling process.
\begin{itemize}
\item \textbf{Forward process} \\
This forward process is a Markov chain that starts from the data and applies a series of diffusion steps to the data. During this process, Gaussian noise is added to the data in each of the T time steps according to a variance schedule $\beta_1, ..., \beta_T$.
$q(\mathbf{x}_{1:T}|\mathbf{x}_0) \coloneqq \prod_{t=1}^{T} q(\mathbf{x}_t|\mathbf{x}_{t-1}) \quad$ with $\quad q(\mathbf{x}_t|\mathbf{x}_{t-1}) \coloneqq \mathcal{N}(\mathbf{x}_t; \sqrt{1-\beta_t}\mathbf{x}_{t-1}, \beta_t\mathbf{I})$
This formula shows that the noisy data distribution after T diffusion steps is the product of the transition probabilities at each step t. The noise added in each time step is a Gaussian distribution with mean $\sqrt{1-\beta_t}\mathbf{x}_{t-1}$ and variance $\beta_t\mathbf{I}$. The variance schedule $\beta_1, ..., \beta_T$ is a hyperparameter that needs to be chosen or optimized during training.
\item \textbf{Reverse process} \\
The diffusion process must then be reversed. The model is trained to model the noise distribution given the data and timestep.
$p_{\theta}(\mathbf{x}_{0:T}) \coloneqq p(\mathbf{x}_T) \prod_{t=1}^{T} p_{\theta}(\mathbf{x}_{t-1}|\mathbf{x}_t) \quad$ with $\quad p_{\theta}(\mathbf{x}_{t-1}|\mathbf{x}_t) \coloneqq \mathcal{N}(\mathbf{x}_{t-1}; \mu_{\theta}(\mathbf{x}_t, t), \Sigma_{\theta}(\mathbf{x}_t, t))$
In the reverse process, each step aims to undo the diffusion by estimating what the previous, less noisy state might have been. This is done using a series of conditional Gaussian distributions $p_{\theta}(\mathbf{x}_{t-1}|\mathbf{x}_t)$. For each of these Gaussians, a neural network with parameters $\theta$ is used to estimate the mean $\mu_{\theta}(\mathbf{x}_t, t)$ and the covariance $\Sigma_{\theta}(\mathbf{x}_t, t)$ of the distribution. The joint distribution $p_{\theta}(\mathbf{x}_{0:T})$ is then the product the marginal distribution of the last timestep $p(\mathbf{x}_T)$ and the conditional distributions $p_{\theta}(\mathbf{x}_{t-1}|\mathbf{x}_t)$ for each timestep.
\item \textbf{Training} \\
TODO: explain better! \\
The model training is done by optimizing the variational bound of the negative log-likelihood. This is also called the evidence lower bound (ELBO) in the context of generative models.
\begin{align*}
\log p(x) \geq & \mathbb{E}_q \left[ \log p_{\theta} (x_0 | x_1) | x_1 , x_0 \right] \\
& - D_{KL} \left( q(x_T | x_0) || p(x_T) \right) \\
& - \sum_{t=2}^{T} \mathbb{E}_q \left[ D_{KL} \left( q(x_{t-1} | x_t, x_0) || p_{\theta}(x_{t-1} | x_t) \right) | x_t, x_0 \right] \\
= & L_0 - L_T - \sum_{t=2}^{T} L_{t-1}
\end{align*}
The formula shows that maximizing the likelihood can be done by minimizing the KL divergence between the noise distribution and the data distribution for each timestep. After a lot of math, it can be proven that this can be simplified further to minimize the mean squared error between the predicted noise by the model and the actual noise added in each timestep.
\item \textbf{Conditioning} \\
The model can be conditioned on additional information. This can be used to guide the generation process. In the context of image generation, this can be used to generate images of a certain class or with certain attributes. This requires some changes in the model architecture and training process. A simple way to condition the model is to add additional information to the input of the model. This can be done by concatenating the additional information to the input of the model. The model can then learn to generate samples that follow the distribution of the data conditioned on the additional information.
\end{itemize}
The diffusion process can be seen in Figure \ref{fig:diffusion_process}. The model is trained to reverse this process. Starting from the noise, the model learns to generate samples that look like the data.
\begin{figure}[h]
\centering
\includegraphics[width=0.8\textwidth]{images/diffusion/diffusion_graphical_model.png}
TODO: fix citation
%\caption[Diffusion process]{Diffusion process (adapted from \cite{ho2020denoising}).}
\label{fig:diffusion_process}
\end{figure}
\subsection{Evaluation}
To evaluate the performance of the quantile regression models, multiple metrics can be used. The pinball loss itself can be used to compare models on the test set. Other metrics that can be used are the mean absolute error (MAE) and the mean squared error (MSE). This can be done by generating multiple full-day NRV samples for each day of the test set and calculating the error metrics for each of the samples. The mean can then be taken over the different samples to get a single value for the error metrics.
MAE does not consider the direction of the error. It is the average of the absolute differences between the predicted and actual values. The formula in our case with full-day NRV samples is:
\begin{equation}
MAE = \frac{1}{N} \sum_{i=1}^{N} \frac{1}{96} \sum_{j=1}^{96} |y_{ij} - \hat{y}_{ij}|
\end{equation}
\begin{align*}
\textbf{Where:} \\
N & = \text{Number of samples} \\
y_{ij} & = \text{Actual observed value of NRV for sample i and quarter j} \\
\hat{y}_{ij} & = \text{Sampled value of NRV for sample i and quarter j} \\
\end{align*}
MSE is more sensitive to outliers than MAE because it squares the error between the predicted and actual values. The formula in our case with full-day NRV samples is:
\begin{equation}
MSE = \frac{1}{N} \sum_{i=1}^{N} \frac{1}{96} \sum_{j=1}^{96} (y_{ij} - \hat{y}_{ij})^2
\end{equation}
The MAE and MSE metrics do not compare the distribution of the NRV to the real NRV value but only take into account the sampled values. Evaluating the outputted distribution for the NRV must be done differently. The Continuous Ranked Probability Score (CRPS) can be used to evaluate the distribution to the real NRV value. The CRPS metric is used to evaluate the accuracy of the predicted cumulative distribution function. The CRPS can be seen as a generalization of the MAE for probabilistic forecasts. The formula for the CRPS is:
\begin{equation}
CRPS(F, x) = \int_{-\infty}^{\infty} (F(y) - \mathbbm{1}(y - x))^2 \, dy
\end{equation}
\begin{align*}
\textbf{Where:} \\
F & = \text{Predicted cumulative distribution function} \\
x & = \text{Real NRV value} \\
\mathbbm{1}(x) & = \text{Heavyside function} = \begin{cases}
1 & \text{if } x \geq 0 \\
0 & \text{if } x < 0
\end{cases} \\
\end{align*}
The mean CRPS can be calculated over the different days to get a single value. The lower this value, the better the NRV is modeled. The CRPS metric can be visualized as shown in figure \ref{fig:crps_visualization}. The CRPS is the area between the predicted cumulative distribution function and the Heavyside function. The lower the area between the curves, the better the NRV is modeled.
TODO: improve visualisation? -> echte NRV + y as cummulative prob
\begin{figure}[H]
\centering
\includegraphics[width=0.8\textwidth]{images/quantile_regression/crps_visualization.png}
\caption{Visualization of the CRPS metric}
\label{fig:crps_visualization}
\end{figure}

View File

@@ -0,0 +1,164 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}{section.1}\protected@file@percent }
\@setckpt{sections/introduction}{
\setcounter{page}{3}
\setcounter{equation}{0}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{section}{1}
\setcounter{subsection}{0}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{0}
\setcounter{table}{0}
\setcounter{parentequation}{0}
\setcounter{float@type}{4}
\setcounter{caption@flags}{0}
\setcounter{continuedfloat}{0}
\setcounter{subfigure}{0}
\setcounter{subtable}{0}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{0}
\setcounter{maxnames}{2}
\setcounter{minnames}{1}
\setcounter{maxitems}{999}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{blx@maxsection}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{narrator}{0}
\setcounter{savednarrator}{0}
\setcounter{execproducer}{0}
\setcounter{savedexecproducer}{0}
\setcounter{execdirector}{0}
\setcounter{savedexecdirector}{0}
\setcounter{with}{0}
\setcounter{savedwith}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{citation}{0}
\setcounter{savedcitation}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{section@level}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{1}
\setcounter{g@acro@QR@int}{0}
\setcounter{g@acro@AQR@int}{0}
\setcounter{g@acro@NAQR@int}{0}
\setcounter{g@acro@MSE@int}{0}
\setcounter{g@acro@MAE@int}{0}
\setcounter{g@acro@CRPS@int}{0}
\setcounter{g@acro@NRV@int}{0}
\setcounter{g@acro@PV@int}{0}
\setcounter{g@acro@NP@int}{0}
}

View File

@@ -0,0 +1,12 @@
\section{Introduction}
The electricity market is a complex system influenced by numerous factors. The rise of renewable energy sources adds to this complexity, introducing greater volatility compared to traditional energy sources. Renewables, with their unpredictable nature, exacerbate the challenge of maintaining a stable balance between supply and demand. This critical balance is managed by the transmission system operator, Elia, which utilizes reserves to mitigate any potential shortages or surpluses, directly influencing electricity prices.
(TODO: Market participants met flexible assets (Groot genoeg), zij willen grote winst maken. Elia moet minder eigen reserves gebruiken -> goedkoper voor iedereen)
Forecasting the imbalance price is vital for market participants engaged in buying or selling electricity. It enables them to make informed decisions on the optimal times to buy or sell, aiming to maximize their profits. However, current industry practices often rely on simplistic policies, such as adhering to a fixed price for transactions. This approach is not optimal and overlooks the potential benefits of adaptive policies that consider the forecasted imbalance prices.
The goal of this thesis is to generatively model the Belgian electricity market. This allows the reconstruction of the imbalance price for a given day which can then be used by other simple policies to make decisions on when to buy or sell electricity. These policies can then be compared to the current industry practices to assess their performance.
Forecasting the system imbalance will become increasingly important as the share of renewable energy sources continues to grow.
This thesis can be divided into two main parts. The first part focuses on modeling the Net Regulation Volume (NRV) of the Belgian electricity market for the next day. This modeling is conditioned on multiple inputs that can be obtained from Elia (TODO: add citation to the open data of Elia). The second part of the thesis focuses on optimizing a simple policy using the NRV generations for the next day. The policy tries to maximize profit by charging and discharging a battery and thereby buying and selling electricity on the market. Multiple models are trained and tested to model the NRV and compared to each other based on their profit optimization.

View File

@@ -0,0 +1,166 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {5}Literature Study}{19}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Electricity Price Forecasting}{19}{subsection.5.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Policies for Battery Optimization}{20}{subsection.5.2}\protected@file@percent }
\@setckpt{sections/literature_study}{
\setcounter{page}{21}
\setcounter{equation}{7}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{section}{5}
\setcounter{subsection}{2}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{5}
\setcounter{table}{2}
\setcounter{parentequation}{0}
\setcounter{float@type}{4}
\setcounter{caption@flags}{2}
\setcounter{continuedfloat}{0}
\setcounter{subfigure}{0}
\setcounter{subtable}{0}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{5}
\setcounter{maxnames}{2}
\setcounter{minnames}{1}
\setcounter{maxitems}{999}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{blx@maxsection}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{narrator}{0}
\setcounter{savednarrator}{0}
\setcounter{execproducer}{0}
\setcounter{savedexecproducer}{0}
\setcounter{execdirector}{0}
\setcounter{savedexecdirector}{0}
\setcounter{with}{0}
\setcounter{savedwith}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{citation}{0}
\setcounter{savedcitation}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{section@level}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{20}
\setcounter{g@acro@QR@int}{0}
\setcounter{g@acro@AQR@int}{0}
\setcounter{g@acro@NAQR@int}{0}
\setcounter{g@acro@MSE@int}{0}
\setcounter{g@acro@MAE@int}{0}
\setcounter{g@acro@CRPS@int}{0}
\setcounter{g@acro@NRV@int}{0}
\setcounter{g@acro@PV@int}{0}
\setcounter{g@acro@NP@int}{0}
}

View File

@@ -0,0 +1,15 @@
\section{Literature Study}
% - Literatuur forecasting imbalance price
% - Literatuur policies adhv forecasts
\subsection{Electricity Price Forecasting}
Forecasting the electricity price is a challenging task that has been researched extensively. Knowing the future electricity price is crucial for market participants to make informed decisions and optimize their operations and profit. Already since the early 2000s, researchers have been trying to predict the electricity price. The first models were based on time series analysis, but with the rise of machine learning, more advanced models have been developed. A rise in publications on this topic can be observed since 2005. This is described in the literature review by \parencite{weron_electricity_2014}. An overview is given of the evolution of the methods used for electricity price forecasting. A significant shift can be observed towards integrating machine learning techniques with traditional statistical methods. The earliest models were based on time series analysis involving methods like autoregression, moving averages and their combinations (ARMA, ARIMA). These methods are not always able to capture the complex patterns in the electricity price. Therefore, researchers started to use more advanced models like neural networks, support vector machines, and random forests. The combination of statistical and machine learning models is more accurate. The statistical models are used to capture the linear patterns, while the machine learning models are used to capture the more complex non-linear patterns. This results in a more accurate and robust model. The more recent paper \parencite{poggi_electricity_2023} compares the performance of statistical and machine learning methods for electricity price forecasting. The authors use ARIMA and SARIMA as statistical methods and XGBoost as a machine learning method. They also compare the performance of Long Short-Term Memory (LSTM) networks for electricity price forecasting.
Because forecasting the electricity price is a challenging task with a lot of uncertainty, other generative methods to model the electricity price were researched. Generative modeling is a type of unsupervised learning that can be used to generate new samples from the same distribution as the training data. This can be used to generate new electricity price samples. The authors of \parencite{lu_scenarios_2022} use General Adversarial Networks (GANs) to generate new electricity price scenarios. They introduce a deep learning framework called Conditional Time Series Generative Adversarial Networks (CTSGAN) to generate electricity price scenarios. This enhances the traditional forecasting models by allowing the generation of a diverse set of potential future scenarios. This capability allows the modeling of the uncertainty in the electricity price. The authors show that the CTSGAN model outperforms traditional forecasting models in terms of forecasting accuracy. Other generative models like normalizing flows can also be used to generate new electricity price samples. The authors of \parencite{dumas_deep_2022} use normalizing flows to generate new electricity price samples. They show that normalizing flow models for electricity price forecasting are more accurate in quality than other generative models like GANs and Variational Autoencoders (VAEs). Not a lot of research has been done on using diffusion models for electricity price forecasting. The authors of \parencite{rasul_autoregressive_2021}, however, show that autoregressive diffusion models can be used for time series forecasting and achieve good results. They apply the model on multiple datasets which includes an electricity price dataset. The use of diffusion models for NRV modeling is further explored in this thesis.
Most research on forecasting for the electricity market focuses on the electricity price for consumers. Another important aspect of the electricity market is the imbalance price. Not many papers have been published on forecasting the imbalance price. One paper \parencite{dumas_deep_2022} describes the forecasting of the imbalance price. They do not forecast the price itself but rather forecast the NRV and use this to reconstruct the imbalance price. This approach will also be used in this thesis.
TODO: more information?
\subsection{Policies for Battery Optimization}

View File

@@ -0,0 +1,167 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {section}{\numberline {4}Policies}{17}{section.4}\protected@file@percent }
\newlabel{sec:policies}{{4}{17}{Policies}{section.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Baselines}{17}{subsection.4.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Policies based on NRV generations}{18}{subsection.4.2}\protected@file@percent }
\@setckpt{sections/policies}{
\setcounter{page}{19}
\setcounter{equation}{7}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{section}{4}
\setcounter{subsection}{2}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{5}
\setcounter{table}{2}
\setcounter{parentequation}{0}
\setcounter{float@type}{4}
\setcounter{caption@flags}{2}
\setcounter{continuedfloat}{0}
\setcounter{subfigure}{0}
\setcounter{subtable}{0}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{0}
\setcounter{maxnames}{2}
\setcounter{minnames}{1}
\setcounter{maxitems}{999}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{blx@maxsection}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{narrator}{0}
\setcounter{savednarrator}{0}
\setcounter{execproducer}{0}
\setcounter{savedexecproducer}{0}
\setcounter{execdirector}{0}
\setcounter{savedexecdirector}{0}
\setcounter{with}{0}
\setcounter{savedwith}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{citation}{0}
\setcounter{savedcitation}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{section@level}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{17}
\setcounter{g@acro@QR@int}{0}
\setcounter{g@acro@AQR@int}{0}
\setcounter{g@acro@NAQR@int}{0}
\setcounter{g@acro@MSE@int}{0}
\setcounter{g@acro@MAE@int}{0}
\setcounter{g@acro@CRPS@int}{0}
\setcounter{g@acro@NRV@int}{0}
\setcounter{g@acro@PV@int}{0}
\setcounter{g@acro@NP@int}{0}
}

View File

@@ -0,0 +1,18 @@
\section{Policies}
\label{sec:policies}
Organizations that own a battery and are active in the electricity market have to make decisions on when to charge and discharge their battery. These decisions are based on the current state of the battery, the current state of the market, and the future state of the market. The future state of the market can be predicted using generative models like the ones discussed in previous sections. The organizations want to maximize their profit by buying electricity when it is cheap and selling electricity when it is expensive. The policies used decide when to charge and discharge the battery. Another important aspect of these policies is to keep the battery in a healthy state. Charging and discharging a battery too much can reduce its lifetime. The policies have to take this into account.
In this thesis, a simple policy is used to optimize the profit made by charging and discharging a battery. The policy is based on the Net Regulation Volume (NRV) predictions for the next day. This shows the potential of using NRV predictions to optimize the policy. In the real world, more complex policies can be used to optimize the profit. These policies can be trained using reinforcement learning or other optimization techniques. Multiple baseline policies are defined to compare the performance of the policy based on NRV predictions.
The simple policy uses two thresholds to decide when to charge and discharge the battery based on the imbalance price. When the imbalance price is below the charging threshold, the battery is fully charged. When the imbalance price is above the discharging threshold, the battery is fully discharged again. This policy is very simple and does not take into account some important aspects.
\subsection{Baselines}
% Baseline fixed thresholds
The most simple baseline policy is to define two fixed thresholds for charging and discharging the battery. These thresholds can be determined by the historical data of the imbalance price. The thresholds can be found by doing a simple grid search for the best thresholds. The thresholds that maximize the profit on the historical data are used as the fixed thresholds. During the optimization, a penalty parameter can be added to the profit function to penalize when the battery is charged or discharged too much.
% Baseline thresholds determined on the previous day
Another baseline policy is to determine the thresholds for charging and discharging the battery based on the NRV of the previous day. This policy is based on the assumption that the NRV of the next day will be similar to the NRV of the previous day. The NRV of the previous day can be seen as the NRV prediction for the next day. The thresholds can then be determined by doing a simple grid search for the best thresholds over the NRV prediction. The same penalty parameter can be added to the profit function to reduce the charge cycles of the battery.
\subsection{Policies based on NRV generations}
% Policy based on NRV generations
The simple baseline policy can be used with the NRV predictions for the next day. First, multiple full-day NRV samples are generated using a generative model. Each of these samples will be seen as a prediction for the NRV of the next day. The charge and discharge thresholds are determined for each of these samples using a simple grid search like in the baseline policy. The mean is taken over all the thresholds to determine the final thresholds for the next day. This results in a policy that uses the NRV samples of the generative model. This policy also uses the penalty parameter to reduce the charge cycles of the battery.

View File

@@ -0,0 +1,62 @@
\section{Results \& Discussion}
As discussed in the background information, the imbalance prices are based on the Net Regulation Volume (NRV). This means that the imbalance prices can be reconstructed from the sampled NRV. Multiple baselines and models will be compared that forecast and model the NRV using different metrics. The data utilized in this thesis is provided by Elia. Elia makes a lot of data public and provides them in quarterly hour or minute intervals. The data used in this thesis is on a quarterly hourly basis. This makes the number of input features and output features way more manageable and makes the training more computationally efficient. A full-day sample of the NRV exists of 96 values. One value for every quarter. Further research could be done using smaller data intervals to see if this improves the models.
\subsection{Data}
Elia offers a lot of different data on their website (TODO: open data citation). They provide data for the following categories:
(TODO: Relevant? or too much information?)
\begin{itemize}
\item Balancing
\item Transmission
\item Power generations
\item Congestion management
\item Load
\item Studies
\end{itemize}
The data useful to model the NRV is scattered over multiple categories. The data used in this thesis is the following:
TODO: ask Jonas: add urls to the correct data? via citation?
\begin{itemize}
\item \textbf{Imbalance prices per quarter-hour (Historical data) } \\
% https://opendata.elia.be/explore/dataset/ods047/information/?sort=datetime
This dataset contains the NRV and system imbalance in a quarter-hour interval. The data is available from 01-01-2015 to the present day. The NRV is used as the target variable that needs to be modeled but can also be used as input features. The next day NRV modeling can be conditioned on the real NRV of the previous day.
\item \textbf{Measured and forecasted total load on the Belgian grid (Historical data)} \\
% https://opendata.elia.be/explore/dataset/ods001/table/?sort=datetime
Elia publishes what the total load on the Belgian grid is. This data is also provided in a quarter-hour interval. This data consists of the real load for a certain quarter but also the different forecasted loads. There are day-ahead and week-ahead forecasts available. The total load on the Belgian grid can be used as input features for the NRV modeling. The data is also available from 01-01-2015 to the present day.
\item \textbf{Photovoltaic power production estimation and forecast on Belgian grid (Historical)} \\
% https://opendata.elia.be/explore/dataset/ods032/table/?sort=datetime
The photovoltanic power production is also available in a quarter-hour interval. The production is also forecasted day-ahead and week-ahead. The data is provided for each of the provinces in Belgium. Forecasts are also available for the 3 Belgian regions (Flanders, Wallonia, Brussels) and the total Belgian production. The photovoltanic data has been provided since 01-04-2018 and is available to the present day.
\item \textbf{Wind power production estimation and forecast on Belgian grid (Historical)} \\
% https://opendata.elia.be/explore/dataset/ods031/information/
Just as the photovoltanic power production data, wind power production is available in a quarterly-hour interval for each of the provinces and regions in Belgium. This data also includes the real production and the forecasts. An additional column is available that shows if the power is generated offshore or onshore. During this thesis, the offshore and onshore data will be combined. The wind power production data has been provided since 01-01-2015 and is available to the present day.
\item \textbf{Day-ahead implicit net position (Belgium's balance)} \\
% https://opendata.elia.be/explore/dataset/ods022/information/?sort=datetime
The day-ahead implicit net position shows the total amount of electricity that will be imported or exported to neighboring countries. The trades are done on the day-ahead market and are thus known in advance. This data is available in a quarter-hour interval and has been provided since 01-11-2020 and is available to the present day. The data before 01-11-2020 is also available but only in hourly intervals.
\end{itemize}
A lot of data is available but only the most relevant data needs to be used. Experiments will be done to identify which data and features improve the NRV modeling. The data will be split into a training and test set. The training dataset starts depending on which data features are used but ends on 31-12-2022. The test set starts on 01-01-2023 and ends on (TODO: check the end date). This makes sure enough data is available to train the models and the test set is large enough to evaluate the models. The year 2023 is chosen as the test set because it is the most recent data available when the thesis experiments were conducted. Using data from 2022 in the test set also does not make a lot of sense because the trained models would be used to predict the future. Data from 2022 is not relevant anymore to evaluate the models.
\subsection{Quantile Regression}
\input{sections/results/models/linear}
\input{sections/results/models/non-linear}
\input{sections/results/models/gru}
\input{sections/results/models/diffusion}
\input{sections/results/models/comparison}
\newpage
\section{Policies for battery optimization}
The goal of this thesis is to model the NRV data and use this to optimize the buying and selling of electricity to make a profit. Different models and methods can be used to model the NRV data which can all result in different results. To evaluate the performance of the models, the generated profit on the test set can be used as a metric. First of all, baselines are needed to be able to compare the models to if adding NRV predictions to the policies improves the profit. The baselines are already discussed in the background section. It is very important to compare the baselines and other policies fairly. The profit depends on the number of charge cycles that are used. The more charge cycles a policy uses, the more profit it will be able to make. Using too many charge cycles is bad for the health of the battery. A penalty parameter can be used to penalize the policy when too many charge cycles are used in a day. To fairly compare the policies with different models and baselines, a maximum number of charge cycles is determined for the test period. The test period starts on 01-01-2023 and ends on (TODO: check the end date). Assuming a maximum of 400 charge cycles can be used in a year, only 293 charge cycles can be used during the test period. The penalty parameter is optimized using a simple gradient descent approach to make sure only 293 charge cycles are used during the test period. The profit is then calculated using the optimized penalty parameter.
To evaluate the policies, a battery of 2 MWh is used with a maximum charge and discharge power of 1 MW. The battery is charged and discharged in quarter-hour intervals at the price of that quarter-hour.
\input{sections/results/policies/baselines}
\input{sections/results/policies/nrv_samples_policy}

View File

@@ -0,0 +1,80 @@
\subsection{Comparison}
After training the different models and experimenting with various hyperparameters, the performance differences between the model architectures and methods can be compared using the \ac{MSE}, \ac{MAE}, and \ac{CRPS} metrics. Visual comparisons of some examples are also provided.
% Updated table using acronyms
\begin{table}[H]
\centering
\begin{adjustbox}{width=\textwidth}
\begin{tabular}{@{}clcccccc@{}}
\toprule
Features & Method & Model & \ac{MSE} & \ac{MAE} & \ac{CRPS} & Parameters \\
\midrule
NRV & & & & & \\
& \acs{AQR} & Linear & 39222.41 & 152.49 & 91.56 & 1,261 \\
& \acs{NAQR} & Linear & 41219.98 & 152.26 & 73.97 & 121,056 \\
& & & & & \\
& \acs{AQR} & Non-Linear & 36346.57 & 144.80 & 84.51 & 422,925 \\
& \acs{NAQR} & Non-Linear & 40200.92 & 152.00 & 74.37 & 501,728 \\
& & & & & \\
& \acs{AQR} & GRU & 37681.71 & 146.62 & 83.08 & 11,829,261 \\
& \acs{NAQR} & GRU & 40917.24 & 152.04 & 76.06 & 3,007,200 \\
\midrule
NRV + Load + Wind + PV + NP (+ QE) & & & & & \\
& \acs{AQR} & Linear & 34031.71 & 142.29 & 79.99 & 6,850 \\
& \acs{NAQR} & Linear & 49132.26 & 167.37 & 78.75 & 1,079,520 \\
& & & & & \\
& \acs{AQR} & Non-Linear & 32447.41 & 137.24 & 79.22 & 524,013 \\
& \acs{NAQR} & Non-Linear & 42588.16 & 157.20 & 73.75 & 673,760 \\
& Diffusion & Non-Linear & 47178.91 & 166.89 & 80.30 & 3,116,896 \\
& & & & & \\
& \acs{AQR} & GRU & 35238.98 & 141.02 & 80.92 & 11,843,565 \\
& \acs{NAQR} & GRU & 40613.54 & 151.17 & 75.33 & 6,165,216 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Comparison of the different models using the \ac{MSE}, \ac{MAE} and \ac{CRPS} metrics. The best-performing models for a certain type are selected based on the \ac{CRPS}.}
\label{tab:model_comparison}
\end{table}
A first recurring conclusion that can be made is that the \ac{NAQR} models have higher \ac{MSE} and \ac{MAE} errors but higher \ac{CRPS}. The reason for this behavior is not immediately clear. One reason for this could be the way the autoregressive quantile regression works. Autoregressive models use the previous predicted value as input to predict the next value. The autoregressive model does not know that it will be used to predict multiple values in the future. In the case of NRV modeling, a value is sampled as input for the next day. The sampling introduces an error which the model is not trained for. Because of this, the NRV distribution outputted by the model will be further away from the expected distribution. This error propagates further in the full-day NRV samples which results in a higher CRPS. The non-autoregressive models do not have this problem because they predict all values at once. The non-autoregressive models, however, have a higher \ac{MSE} and \ac{MAE} error. The model outputs a distribution for each quarter of the day. The full-day NRV sample is then generated by sampling from each of the distributions. The sampled values are independent of each other. This can result in unrealistic samples with large peaks which impact the \ac{MSE} and \ac{MAE} metrics.
Comparing the Linear model with the GRU model, the GRU model has a better performance when only using the NRV data. The autoregressive linear quantile regression model, however, outperforms the model using all available features. Some examples of the test set are shown in \ref{fig:ar_linear_gru_comparison}. A comparison is made between the autoregressive linear and GRU models. A clear difference in the confidence intervals can be observed. The confidence intervals almost have the same width over the whole day. This is not the case for the GRU model. The confidence intervals are wider in the middle of the day. This gives a more realistic insight into the uncertainty.
\begin{figure}[H]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Autoregressive linear model}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Autoregressive GRU model}
\end{subfigure}
\caption{Comparison of the autoregressive linear and GRU model}
\label{fig:ar_linear_gru_comparison}
\end{figure}
% other conclusion:

View File

@@ -0,0 +1,85 @@
\subsection{Diffusion}
Another type of model that can be used to generatively model the NRV is the diffusion model. This type of model is very popular for image generation. In the context of images, the diffusion model is trained by iteratively adding noise to a training image until there is only noise left. From this noise, the model tries to reverse the diffusion process to get the original image back. To sample new images using this model, a noise vector is sampled and iteratively denoised by the model. This process results in a new image.
This training process can also be used for other data types. An image is just a 2D grid of data points. A time series can be seen as a 1D sequence of data points. The diffusion model can thus be trained on the NRV data to generate new samples for a certain day based on a given input.
Once the diffusion model is trained, it can be used efficiently to generate new samples. The model can generate samples in parallel, which is not possible with autoregressive models. It combines the parallel sample generation of the non-autoregressive models while the quarter NRV values still depend on each other. A batch of noise vectors can be sampled and passed through the model in one batch to generate the new samples. The generated samples contain the 96 NRV values for the next day without needing to sample every quarter sequentially.
The model is trained in a completely different way than the quantile regression models. A simple implementation of the Denoising Diffusion Probabilistic Model (DDPM) is used to perform the experiments. More complex implementations with more advanced techniques could be used to improve the results. This is out of the scope of this thesis. The goal is to show that more recent generative models can also be used to model the NRV data. These results can then be compared to the quantile regression models to see if the diffusion model can generate better samples.
% TODO: In background information?
First of all, the model architecture needs to be chosen. The model takes multiple inputs which include the noisy NRV time series, the positional encoding of the current denoising step and the conditional input features. The model needs to predict the noise in the current time series. The time series can then be denoised by subtracting the predicted noise in every denoising step. Multiple model architectures can be used as long as the model can predict the noise in the time series. A simple feedforward neural network is used. The neural network exists of multiple linear layers with ReLu activation functions. To predict the noise in a noisy time series, the current denoising step index must also be provided. This integer is then transformed into a vector using sine and cosine functions. The positional encoding is then concatenated with the noisy time series and the conditional input features. This tensor is then passed through the first linear layer and activation function of the neural network. This results in a tensor of the hidden size that was chosen. Before passing this tensor to the next layer, the positional encoding and conditional input features are concatenated again. This process is repeated until the last layer is reached. This provides every layer in the neural network with the necessary information to predict the noise in the time series. The output of the last layer is then the predicted noise in the time series. The model is trained by minimizing the mean squared error between the predicted noise and the real noise in the time series.
Other hyperparameters that need to be chosen are the number of denoising steps, number of layers and hidden size of the neural network. Experiments are performed to get an insight into the influence these parameters have on the model performance. Results are shown in Table \ref{tab:diffusion_results}.
\begin{figure}[h]
\centering
\begin{tikzpicture}
% First row
% Node for Image 1
\node (img1) {\includegraphics[width=0.45\textwidth]{images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 1_00000000.jpeg}};
% Node for Image 2 with an arrow from Image 1
\node[right=of img1] (img2) {\includegraphics[width=0.45\textwidth]{images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 2_00000000.jpeg}};
\draw[-latex] (img1) -- (img2);
% Second row
% Node for Image 3 below Image 1 with an arrow from Image 2
\node[below=of img1] (img3) {\includegraphics[width=0.45\textwidth]{images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 3_00000000.jpeg}};
% Node for Image 4 with an arrow from Image 3
\node[right=of img3] (img4) {\includegraphics[width=0.45\textwidth]{images/diffusion/results/intermediates/Testing Intermediates 864_Sample intermediate 4_00000000.jpeg}};
\draw[-latex] (img3) -- (img4);
% Complex arrow from Image 2 to Image 3
% Calculate midpoint for the horizontal segment
\coordinate (Middle) at ($(img2.south)!0.5!(img3.north)$);
\draw[-latex] (img2.south) |- (Middle) -| (img3.north);
\end{tikzpicture}
\caption{Intermediate steps of the diffusion model for example 864 from the test set. The confidence intervals shown in the plots are made using 100 samples.}
\label{fig:diffusion_intermediates}0
\end{figure}
In Figure \ref{fig:diffusion_intermediates}, multiple intermediate steps of the denoising process are shown as an example from the test set. The model starts with noisy full-day NRV samples which can be seen in the first steps. These noisy samples are then denoised in multiple steps until realistic samples are generated. This can be seen in the last image in the figure. It can be observed that the confidence intervals get more narrow over time as the noise is removed from the samples.
\begin{table}[H]
\centering
\begin{adjustbox}{width=\textwidth,center}
\begin{tabular}{@{}cccccccc@{}}
\toprule
Features & Diffusion Steps & Layers & Hidden Size & MSE & MAE & CRPS \\
\midrule
NRV & & & & & & & \\
& 300 & 2 & 256 & 57129.71 & 185.56 & 81.00 \\
& 300 & 2 & 512 & 48364.77 & 169.39 & 79.13 \\
& 300 & 2 & 1024 & 43540.50 & 159.17 & 78.27 \\
& 300 & 2 & 2048 & 41946.52 & 155.85 & 78.19 \\
& 300 & 3 & 256 & 52741.73 & 177.09 & 79.55 \\
& 300 & 3 & 512 & 45048.05 & 161.89 & 78.46 \\
& 300 & 3 & 1024 & 42089.13 & 155.97 & 78.25 \\
& 300 & 3 & 2048 & 41797.63 & 154.69 & 78.05 \\
& 300 & 3 & 4096 & 39943.93 & 151.62 & 77.59 \\
& 300 & 4 & 256 & 56939.68 & 185.07 & 81.16 \\
& 300 & 4 & 512 & 46225.72 & 164.74 & 79.19 \\
& 300 & 4 & 1024 & 42984.02 & 157.54 & 77.92 \\
& 300 & 4 & 2048 & 41145.32 & 154.14 & 78.18 \\
\midrule
NRV + Load + Wind + PV + NP & & & & & & & \\
& 300 & 2 & 256 & 63337.36 & 196.21 & 84.29 \\
& 300 & 2 & 512 & 52745.92 & 177.16 & 81.57 \\
& 300 & 2 & 1024 & 47178.91 & 166.89 & 80.30 \\
& 300 & 3 & 256 & 66148.13 & 200.34 & 85.31 \\
& 300 & 3 & 512 & 53159.99 & 178.46 & 81.95 \\
& 300 & 3 & 1024 & 47815.13 & 167.22 & 81.16 \\
& 300 & 3 & 2048 & 46448.90 & 164.50 & 81.06 \\
& 300 & 4 & 1024 & 47483.05 & 166.97 & 81.32 \\
& 300 & 4 & 2048 & 47076.77 & 166.06 & 81.06 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Simple diffusion model results.}
\label{tab:diffusion_results}
\end{table}
In Table \ref{tab:diffusion_results}, the results of the experiments for the diffusion model can be seen. The diffusion model that was used is a simple implementation of the Denoising Diffusion Probabilistic Model (DDPM). The model itself exists of multiple linear layers with ReLU activation functions. The diffusion steps were set to 300 for the experiments. This number was determined by doing a few experiments with more and fewer steps. The model performance did not improve when more steps were used. This parameter could be further optimized together with the other parameters to find the best-performing model. This would take a lot of time and is not the goal of this thesis.
The first observation that can be made is the higher error metrics when more input features are used. This is counterintuitive because the model has more information to generate the samples. The reason for this behavior is not immediately clear. One reason could be that the model conditioning is not optimal. Now the input features are passed to every layer of the model together with the time series that needs to be denoised. The model could be improved by using a more advanced conditioning mechanism like classifier guidance and classifier-free guidance.

View File

@@ -0,0 +1,136 @@
\subsubsection{GRU Model}
Another popular architecture to model sequential data is a recurrent neural network. There exist two main types of recurrent neural networks, the Long Short-Term Memory (LSTM) and the Gated Recurrent Unit (GRU). The GRU is a simplified version of the LSTM, which has fewer parameters and is computationally less expensive. The GRU model can be trained for quantile regression in the same way as the linear and non-linear models using the pinball loss. There is, however, a difference in how the input data is structured and provided to the model. For linear and non-linear models, the data is provided in the shape of $(batch\_size, num\_features)$. The recurrent neural network, on the other hand, expects the input data to be structured as $(batch\_size, time\_steps, num\_features\_per\_timestep)$. This is also explained in the background section about the recurrent neural network.
The GRU model architecture to predict the NRV quantiles is shown in Table \ref{tab:gru_model_architecture}. The model starts with an embedding layer that converts the quarter of the day into an embedding. This layer concatenates the other input features with the quarter embedding. The input of the TimeEmbedding is of shape (Batch Size, Time Steps, Input Features Size). The output of this layer is then passed to the GRU layer. The GRU layer outputs the hidden state for every time step. This results in a tensor of shape (Batch Size, Time Steps, Hidden Size). Only the last hidden state is relevant for the prediction of the NRV quantiles for the next quarter. The last hidden state should contain all the necessary information from the previous quarters to make the prediction. The last hidden state is then passed through a linear layer to output the quantiles for the NRV prediction. The input and output of the model depend if the model is trained using an autoregressive or non-autoregressive way. The non-autoregressive variant of the GRU model has two days worth of time steps. This results in $92*2$ time steps. The model then needs to output $(96 * \text{number\_of\_quantiles})$ NRV quantile values.
TODO: Zielige visualisatie van model nu
\begin{table}[H]
\centering
\begin{tabularx}{\textwidth}{Xr} % Set the table width to the text width
\toprule
\textbf{Layer (Type)} & \textbf{Output Shape} \\ \midrule
\midrule
Time Embedding & [B, Time Steps, Input + Time Embedding Size] \\
\midrule
GRU & [B, Time Steps, Hidden Size] \\
\multicolumn{2}{c}{\textit{Last state of GRU passed [B, Hidden Size]}} \\
Linear & [B, Number of quantiles] \\
\bottomrule
\end{tabularx}
\caption{GRU Model Architecture}
\label{tab:gru_model_architecture}
\end{table}
Multiple experiments are conducted to find which hyperparameters and input features work best for the GRU model. The results of the GRU model are shown in Table \ref{tab:autoregressive_gru_model_results}.
\begin{table}[H]
\centering
\begin{adjustbox}{width=\textwidth,center}
\begin{tabular}{@{}cccccccccc@{}}
\toprule
Features & Layers & Hidden Size & \multicolumn{2}{c}{MSE} & \multicolumn{2}{c}{MAE} & \multicolumn{2}{c}{CRPS} \\
\cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9}
& & & AR & NAR & AR & NAR & AR & NAR \\
\midrule
NRV & & & & & & & & \\
& 2 & 256 & 39838.35 & 40097.62 & 150.81 & 150.37 & 85.04 & 76.12 \\
& 4 & 256 & 39506.55 & 39968.96 & 149.81 & 150.04 & 85.46 & 76.07 \\
& 8 & 256 & 37747.11 & 40400.37 & 146.67 & 151.03 & 83.67 & 76.59 \\
& 2 & 512 & 39955.79 & 40917.24 & 150.77 & 152.04 & 87.88 & 76.06 \\
& 4 & 512 & 43301.13 & 39954.62 & 156.73 & 150.14 & 89.78 & 76.25 \\
& 8 & 512 & 37681.71 & 40379.14 & 146.62 & 151.05 & 83.08 & 76.42 \\
\midrule
NRV + Load & & & & & & & & & \\
& 2 & 256 & 38427.91 & 40024.14 & 147.27 & 150.06 & 84.17 & 76.04 \\
& 4 & 256 & 38984.44 & 40480.73 & 147.91 & 151.24 & 85.91 & 75.82 \\
& 8 & 256 & 38343.98 & 39135.60 & 146.44 & 148.85 & 84.22 & 76.19 \\
& 2 & 512 & 41496.77 & 40808.04 & 153.53 & 151.89 & 88.26 & 75.43 \\
& 4 & 512 & 38000.40 & 40260.01 & 146.10 & 150.57 & 83.99 & 75.38 \\
& 8 & 512 & 41104.28 & 39907.44 & 152.13 & 150.11 & 89.13 & 76.42 \\
\midrule
NRV + Load + PV\\ + Wind & & & & & & & & & \\
& 4 & 256 & 39872.46 & 40708.93 & 149.34 & 151.32 & 85.91 & 75.93 \\
& 8 & 256 & 39704.37 & 40292.25 & 148.59 & 151.19 & 85.62 & 75.94 \\
& 4 & 512 & 39024.27 & 41580.29 & 147.91 & 153.39 & 84.18 & 75.84 \\
& 8 & 512 & 42397.86 & 41043.88 & 154.00 & 152.63 & 89.87 & 76.35 \\
\midrule
NRV + Load + PV\\ + Wind + Net Position \\+ QE (5 dim) & & & & & & & & & \\
& 4 & 256 & 39906.53 & 40881.92 & 149.78 & 152.34 & 84.88 & 76.15 \\
& 8 & 256 & 37675.15 & 40159.91 & 145.39 & 150.42 & 83.37 & 75.89 \\
& 4 & 512 & & 40613.54 & & 151.17 & & 75.33 \\
& 8 & 512 & 35238.98 & 39896.57 & 141.02 & 149.96 & 80.92 & 75.92 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Autoregressive GRU quantile regression model results. All the models used a dropout of 0.2 .}
\label{tab:autoregressive_gru_model_results}
\end{table}
The results show the same behavior for the GRU model as for the linear and non-linear models. The performance of the autoregressive model increases a bit when more features are added. The performance of the non-autoregressive model does not increase that much when adding new features. The reason for this is the same as for the linear and non-linear models. There is a large input size for the non-autoregressive model, which makes it harder to learn the dependencies between the features. The non-autoregressive model has to predict 96 quarters, which is a complex task. When comparing the results of the autoregressive GRU model and the non-autoregressive GRU model, the same observation can be made for the linear and non-linear models. The CRPS is always lower for the non-autoregressive model while the MSE and MAE are higher most of the time.
% TODO: explain from which models the examples come from
\begin{figure}[H]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_gru_model_examples/NAQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_gru_model_examples/NAQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_gru_model_examples/NAQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_gru_model_examples/AQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Autoregressive GRU model}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_gru_model_examples/NAQR_GRU_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Non-autoregressive GRU model}
\end{subfigure}
\caption{Comparison of the autoregressive and non-autoregressive GRU model examples.}
\label{fig:gru_model_sample_comparison}
\end{figure}
The examples from the test set using the GRU models are shown in Figure \ref{fig:gru_model_sample_comparison}. Again the same behavior can be observed for the linear and non-linear models. The non-autoregressive examples stay around zero and do not follow the trend of the real NRV values. The autoregressive examples look a lot better visually and follow the trend of the real NRV values much more.
\begin{figure}[ht]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/AQR_GRU_QP_Train.jpeg}
\caption{AR - Train}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/AQR_GRU_QP_Test.jpeg}
\caption{AR - Test}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/NAQR_GRU_QP_Train.jpeg}
\caption{NAR - Train}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/NAQR_GRU_QP_Test.jpeg}
\caption{NAR - Test}
\end{subfigure}
\caption{Over/underestimation of the quantiles for the autoregressive and non-autoregressive GRU models. Both the quantile performance for the training and test set are shown. The plots are generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding (only for the autoregressive model).}
\label{fig:gru_model_quantile_over_underestimation}
\end{figure}
The plots in Figure \ref{fig:gru_model_quantile_over_underestimation} show the over/underestimation of the learned quantiles for the GRU models. The fraction of real NRV values under the predicted quantiles of the training set is very close to the ideal fraction. The autoregressive model, however, shows a slight underestimation for almost all quantiles. Looking at the test set, the lower quantiles are overestimated for the autoregressive model while the higher quantiles are underestimated. The quantile predictions of the test set for the non-autoregressive model are all underestimated. This means a lower fraction of real NRV values is below the quantiles than wanted.

View File

@@ -0,0 +1,167 @@
\relax
\providecommand\hyper@newdestlabel[2]{}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {6.1.1}Linear Model}{22}{subsubsection.6.1.1}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Linear model results\relax }}{22}{table.caption.9}\protected@file@percent }
\newlabel{tab:linear_model_baseline_results}{{3}{22}{Linear model results\relax }{table.caption.9}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces Mean and standard deviation of the NRV values over the quarter of the day\relax }}{24}{figure.caption.10}\protected@file@percent }
\newlabel{fig:nrv_mean_std_over_quarter}{{6}{24}{Mean and standard deviation of the NRV values over the quarter of the day\relax }{figure.caption.10}{}}
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Autoregressive linear model results with time features\relax }}{25}{table.caption.11}\protected@file@percent }
\newlabel{tab:autoregressive_linear_model_quarter_embedding_baseline_results}{{4}{25}{Autoregressive linear model results with time features\relax }{table.caption.11}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces Comparison of the autoregressive and non-autoregressive linear model samples.\relax }}{25}{figure.caption.12}\protected@file@percent }
\newlabel{fig:linear_model_sample_comparison}{{7}{25}{Comparison of the autoregressive and non-autoregressive linear model samples.\relax }{figure.caption.12}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Samples for two examples from the test set for the autoregressive and non-autoregressive linear model. The real NRV is shown in orange.\relax }}{26}{figure.caption.13}\protected@file@percent }
\newlabel{fig:linear_model_samples_comparison}{{8}{26}{Samples for two examples from the test set for the autoregressive and non-autoregressive linear model. The real NRV is shown in orange.\relax }{figure.caption.13}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Over/underestimation of the quantiles for the autoregressive and non-autoregressive linear models. Both the quantile performance for the training and test set are shown. The plots are generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding (only for the autoregressive model).\relax }}{27}{figure.caption.14}\protected@file@percent }
\newlabel{fig:linear_model_quantile_over_underestimation}{{9}{27}{Over/underestimation of the quantiles for the autoregressive and non-autoregressive linear models. Both the quantile performance for the training and test set are shown. The plots are generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding (only for the autoregressive model).\relax }{figure.caption.14}{}}
\@setckpt{sections/results/linear}{
\setcounter{page}{29}
\setcounter{equation}{8}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{section}{6}
\setcounter{subsection}{1}
\setcounter{subsubsection}{1}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{9}
\setcounter{table}{4}
\setcounter{parentequation}{0}
\setcounter{float@type}{4}
\setcounter{caption@flags}{6}
\setcounter{continuedfloat}{0}
\setcounter{subfigure}{4}
\setcounter{subtable}{0}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{5}
\setcounter{maxnames}{2}
\setcounter{minnames}{1}
\setcounter{maxitems}{999}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{blx@maxsection}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{narrator}{0}
\setcounter{savednarrator}{0}
\setcounter{execproducer}{0}
\setcounter{savedexecproducer}{0}
\setcounter{execdirector}{0}
\setcounter{savedexecdirector}{0}
\setcounter{with}{0}
\setcounter{savedwith}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{citation}{0}
\setcounter{savedcitation}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{section@level}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{bookmark@seq@number}{23}
}

View File

@@ -0,0 +1,176 @@
\subsubsection{Linear Model}
% TODO: explainedi nsection reference ?
The simplest model to be trained for the NRV modeling is the linear model. The linear model is trained using the pinball loss function explained in the section above. The outputs of the model are values for the chosen quantiles. The linear model can be trained in an autoregressive and non-autoregressive way. Both methods will be compared to each other. The linear model is trained using the Adam optimizer with a learning rate of 1e-4. Early stopping is used with a patience of 5 epochs. The linear model is evaluated using the mean squared error (MSE), mean absolute error (MAE), and continuous ranked probability score (CRPS). The influence of the input features is also evaluated by training the models with different input feature sets.
There is a big difference in the number of parameters between the autoregressive linear model and the non-autoregressive linear model. The autoregressive model only needs to output the NRV quantiles for one value while the non-autoregressive model needs to output the NRV quantiles for all the quarters of the day. Assuming thirteen quantiles are used, the autoregressive has 13 output parameters while the non-autoregressive model has 13 * 96 = 1248 output parameters. The total number of parameters for the autoregressive model is 13 * (number of input features + 1) while the total number of parameters for the non-autoregressive model is 13 * 96 * (number of input features + 1). Assuming only the NRV history of the previous day is used as input features, the autoregressive model has 1261 trainable parameters while the non-autoregressive model has 121056 parameters. This is a huge difference in the number of parameters and thus the complexity of the model.
\begin{table}[ht]
\centering
\begin{tabular}{@{}lcccccc@{}}
\toprule
& \multicolumn{2}{c}{MSE} & \multicolumn{2}{c}{MAE} & \multicolumn{2}{c}{CRPS} \\
\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7}
& AR & NAR & AR & NAR & AR & NAR \\
\midrule
NRV & 39222.41 & 41219.98 & 152.49 & 152.26 & 91.56 & \textbf{73.97} \\
NRV + Load & 39266.29 & 47045.17 & 152.54 & 163.24 & 90.36 & 79.72 \\
% NRV + PV & 37489.68 & & 149.32 & & 89.42 & \\ No NAR experiment
NRV + Load + PV & 37642.66 & 46404.63 & 149.90 & 161.82 & 89.34 & 79.74 \\
NRV + Load + Wind & 39284.68 & 48148.10 & 152.32 & 164.84 & 88.60 & 79.51 \\
NRV + Load + PV + Wind & 36134.87 & 50312.85 & 146.22 & 169.06 & 84.56 & 79.85 \\
NRV + Load + Wind + NP & 37890.66 & 49442.48 & 149.37 & 167.90 & 86.19 & 76.72 \\
NRV + Load + PV + Wind + NP & \textbf{35725.42} & 49132.26 & \textbf{145.64} & 167.37 & 83.30 & 78.75 \\
\bottomrule
\end{tabular}
\caption{Linear model results}
\label{tab:linear_model_baseline_results}
\end{table}
Comparing the results of the autoregressive and non-autoregressive linear models, it can be seen that the non-autoregressive model has a higher MSE and MAE on the test set. The CRPS is, however, lower for the non-autoregressive model. The CRPS is calculated using the outputted quantiles while the MSE and MAE are calculated by sampling from the reconstructed distributions. Because of error propagation in the autoregressive model, the outputted quantiles also contain more error which leads to a higher CRPS. The non-autoregressive model does not suffer from this problem. During the training of the autoregressive model, the model does not take into account that it will be used to generate full-day samples and thus the error is propagated. This is one possible explanation for the higher CRPS of the autoregressive model.
The MSE and MAE of the non-autoregressive model are higher than the autoregressive model. This can be explained by the fact that the non-autoregressive model does not take into account the previous sampled value. Sampling is done for every quarter of the day independently. This can lead to large differences between the sampled values and thus can increase the MSE and MAE. The autoregressive model does take into account the previous sampled value and can adapt its quantile predictions based on this value so a smoother and more accurate sample can be generated.
% TODO: Check listing of features -> hoofdletters en shit
Another thing to note is the influence of the input features on the non-autoregressive linear model. When increasing the number of input features, the evaluation metrics are a lot worse in comparison with only using the NRV history of the previous day. A reason for this behavior could be that the model is not able to capture the patterns in the data because of the huge amount of input parameters. When using the NRV, Load, Photovoltanic power production, Wind power production, and the Net Position as input features, the non-autoregressive model has an input size of 864. This increases the complexity of the model as well. The total number of trainable parameters becomes 1,079,520. This is a huge number of parameters and the model is not able to learn the patterns in the data anymore.
The performance of the autoregressive linear model, however, improves with the addition of more input features. When using the NRV, Load, Photovoltanic power production, Wind power production, and the Net Position as input features, the autoregressive model has an input size of 484. This is almost half the size of the non-autoregressive model. The total number of trainable parameters becomes 6,305 which is way less than the non-autoregressive model.
An important thing to note is that the autoregressive model needs an additional feature to know which quarter of the day it is modeling. The quarter of the day also influences the value of the NRV. This can easily be seen in Figure \ref{fig:nrv_mean_std_over_quarter}. The figure shows the mean and standard deviation of the NRV values over the quarter of the day. These values change over the day which means the quarter is very valuable information for the model. The non-autoregressive on the other hand does not need this information because it models all the quarters of the day at once.
\begin{figure}[ht]
\centering
\includegraphics[width=\textwidth]{images/quantile_regression/nrv_mean_std_over_quarter.png}
\caption{Mean and standard deviation of the NRV values over the quarter of the day}
\label{fig:nrv_mean_std_over_quarter}
\end{figure}
Providing the autoregressive model with the quarter of the day can be done in multiple ways. The quarter of the day can be provided as a one-hot encoded vector. The cyclic nature of the quarter would not be captured using a one-hot encoded vector. The vectors for quarter 0 and quarter 95 would be very different while they should be very close to each other. Other methods exist that do take the cyclic property of the quarter into account. Trigonometric functions can be used to provide the quarter of the day information. The quarter of the day can be mapped to a sine and cosine value which can be used as input features. The sine and cosine values are calculated as follows:
\begin{equation}
\text{sin}(\frac{2\pi}{96} \times \text{quarter}) \quad \text{and} \quad \text{cos}(\frac{2\pi}{96} \times \text{quarter})
\end{equation}
The sine and cosine values are then concatenated with the input features. Another method that can be used is adding an embedding layer to the model. The discrete quarter of the day value can then be mapped to a vector. The embedding layer itself is learned during the training process which allows the model to learn patterns between quarters. The length of the embedding vector can be chosen and experimented with. The quarter-of-the-day information is then concatenated with the input features. Other information (eg. day of the week, month, year) can also easily be added to the model using this method by just increasing the size of the embedding layer. The results of the linear model with the quarter information are shown in Table \ref{tab:autoregressive_linear_model_quarter_embedding_baseline_results}.
% TODO: Ask Jonas: Find cleaner way to present this table (remove repitition)
% TODO: Add more time information like day of week, month
\begin{table}[ht]
\centering
\begin{tabular}{@{}lccc@{}}
\toprule
& \multicolumn{1}{c}{MSE} & \multicolumn{1}{c}{MAE} & \multicolumn{1}{c}{CRPS} \\
\midrule
NRV & 39222.41 & 152.49 & 91.56 \\
NRV + QT & 39069.96 & 152.06 & 90.90 \\
NRV + QE \textbf{(2 dim)} & \textbf{38216.27} & \textbf{150.41} & \textbf{89.69} \\
NRV + QE \textbf{(5 dim)} & 38617.17 & 151.20 & 89.72 \\
NRV + QE \textbf{(8 dim)} & 38423.30 & 150.89 & 89.81 \\
\midrule
NRV + Load + PV + Wind + NP & 35725.42 & 145.64 & 83.30 \\
NRV + Load + PV + Wind + NP + QT & 34783.13 & 143.98 & 84.21 \\
NRV + Load + PV + Wind + NP + QE \textbf{(2 dim)} & 35746.01 & 146.01 & 85.54 \\
NRV + Load + PV + Wind + NP + QE \textbf{(5 dim)} & \textbf{34031.71} & \textbf{142.29} & \textbf{79.99} \\
\bottomrule
\end{tabular}
\caption{Autoregressive linear model results with time features}
\label{tab:autoregressive_linear_model_quarter_embedding_baseline_results}
\end{table}
The results show that adding the quarter embedding to the model improves all evaluation metrics for the autoregressive linear model. The quarter embedding is a valuable feature for the model.
Some examples of the generated full-day NRV samples are shown in Figure \ref{fig:autoregressive_linear_model_samples}. The examples are taken from the test set. The figure shows the confidence intervals of the NRV generations and the mean NRV prediction. The confidence intervals and mean are calculated based on 1000 generated full-day NRV samples. The samples were generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding for the autoregressive model.
\begin{figure}[H]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_linear_model_samples/NAQR_NRV_Load_Wind_PV_NP-Sample_864.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_linear_model_samples/NAQR_NRV_Load_Wind_PV_NP-Sample_4320.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_linear_model_samples/NAQR_NRV_Load_Wind_PV_NP-Sample_6336.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Autoregressive linear model}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_linear_model_samples/NAQR_NRV_Load_Wind_PV_NP-Sample_7008.png}
\caption{Non-autoregressive linear model}
\end{subfigure}
\caption{Comparison of the autoregressive and non-autoregressive linear model samples.}
\label{fig:linear_model_sample_comparison}
\end{figure}
When looking at the examples in Figure \ref{fig:linear_model_sample_comparison}, it can be seen that the autoregressive linear model is already modeling the NRV quite well. The confidence intervals are quite small and the mean of the samples follows the trend of the real NRV. The mean of the samples, however, is way smoother than the real NRV. The real NRV has more peaks and fluctuations. The examples of the non-autoregressive model show another behavior. The confidence intervals are not as contained as the autoregressive model but fluctuates a lot more. A lot of peaks can be observed in the examples. The reason for this behavior is that the non-autoregressive model does not take into account the previous sampled value. The sampled value of the next quarter is not dependent on the sampled value of the previous quarter. This can lead to a large difference between these values which results in samples with a high variance. The mean of the samples of the non-autoregressive model, however, does not follow the trend of the real NRV as well as the autoregressive model. The mean stays in a narrow range around zero.
Some samples for the examples from the test set are visualized in Figure \ref{fig:linear_model_samples_comparison}. For the autoregressive model, the samples largely follow the trend of the real NRV while the non-autoregressive model has a lot of fluctuations and peaks. By visually looking at the samples themselves, the samples of the autoregressive model are more realistic than the samples of the non-autoregressive model. \\
\begin{figure}[ht]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP-QE-Example_864_samples.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_linear_model_samples/NAQR_NRV_Load_Wind_PV_NP-Example_864_samples.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_linear_model_samples/AQR_NRV_Load_Wind_PV_NP-QE-Example_4320_samples.png}
\caption{Autoregressive linear model}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_linear_model_samples/NAQR_NRV_Load_Wind_PV_NP-Example_4320_samples.png}
\caption{Non-autoregressive linear model}
\end{subfigure}
\caption{Samples for two examples from the test set for the autoregressive and non-autoregressive linear model. The real NRV is shown in orange.}
\label{fig:linear_model_samples_comparison}
\end{figure}
% TODO: Talk about the over/underestimation of the quantiles for the models. Plots have been made for this.
Another way to evaluate the performance of the models is to look at the over/underestimation of the quantiles. For each day and every quarter in the test set, the quantiles are predicted by the model. Then for every quantile, it is checked how many times the real NRV is below the predicted quantile. For example, for the 10\% quantile, around 10\% of the real NRV values should be below the predicted quantile. This can be plotted for every quantile. These can be seen in Figure \ref{fig:linear_model_quantile_over_underestimation}. The plots show the over/underestimation of the quantiles for the autoregressive and non-autoregressive linear models.
\begin{figure}[ht]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/AQR_Quantile_Performance_Training.jpeg}
\caption{AR - Train}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/AQR_Quantile_Performance_Test.jpeg}
\caption{AR - Test}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/NAQR_Quantile_Performance_Training.jpeg}
\caption{NAR - Train}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/NAQR_Quantile_Performance_Test.jpeg}
\caption{NAR - Test}
\end{subfigure}
\caption{Over/underestimation of the quantiles for the autoregressive and non-autoregressive linear models. Both the quantile performance for the training and test set are shown. The plots are generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding (only for the autoregressive model).}
\label{fig:linear_model_quantile_over_underestimation}
\end{figure}
% TODO: Over estimation and under estimation used correctly?
Multiple observations can be made when looking at the quantile performances in Figure \ref{fig:linear_model_quantile_over_underestimation}. The fraction of the real NRV values that are below the predicted quantiles is very close to the expected fraction for the non-autoregressive model on the training set. The autoregressive model has a bit more trouble in the quantile range of 0.4 to 0.6. There, the model underestimates the quantiles. This means the model is predicting the quantile values too high which results in a bigger fraction of the real NRV under the quantile prediction. The test set shows a similar behavior for the autoregressive model with an additional small overestimation at the 0.95 and 0.99 quantiles. The non-autoregressive model has another behavior on the test set. There it can be observed that the model is underestimating the quantiles in the quantile range of 0.15 to 0.99. The reason for this different behavior in comparison with the training set can be overfitting.
Overall, the linear model is a good baseline to compare more complex models. It is, however, not able to capture the complex patterns in the data. In particular, the non-autoregressive model has a lot of trouble when more input features are added and the complexity of the model increases.

View File

@@ -0,0 +1,140 @@
\subsubsection{Non-Linear Model}
Adding nonlinearity to the model can be done by adding some non-linear activations between linear layers. This improves the model's ability to learn more complex patterns in the data. The model is trained the same way as the linear model for quantile regression using the pinball loss. Because a non-linear model is more complex, it is more prone to overfitting the training data. Because of this, dropout layers are added to the model to prevent overfitting.
The architecture of the non-linear model is illustrated in Table \ref{tab:non_linear_model_architecture}. The autoregressive model begins with an input layer that converts the quarter of the day into an embedding. This layer concatenates the other input features with the quarter embedding. These combined features are then processed through a sequence of layers:
\begin{itemize}
\item Linear layer: Transforms input features to higher-dimensional space defined by hidden size.
\item ReLU Activation Function: Introduces non-linearity to the model to learn complex patterns. This also helps with the vanishing gradient problems with deep neural networks.
\item Dropout Layer: Regularizes the model to prevent overfitting. During training, random neurons are set to zero.
\end{itemize}
This sequence of layers is repeated N times to increase the depth of the model and enhance its ability to learn complex patterns. The final layer of the network is a linear layer that outputs the quantiles for the NRV prediction. For an autoregressive model, this is just the quantiles for a single quarter, whereas for a non-autoregressive model, the quantiles for every quarter of the day are outputted. The number of outputs is then the number of quarters in a day multiplied by the number of quantiles used.
\begin{table}[H]
\centering
\begin{tabularx}{\textwidth}{Xr} % Set the table width to the text width
\toprule
\textbf{Layer (Type)} & \textbf{Output Shape} \\ \midrule
\multicolumn{2}{c}{\textit{Only for autoregressive model}} \\
Time Embedding (Embedding) & [B, Input Features Size + Time Embedding Size] \\
\midrule
% Repeated Block
\multicolumn{2}{c}{\textit{Repeated Block (N times)}} \\
Linear (Linear) & [B, Hidden Size] \\
ReLU (Activation) & [B, Hidden Size] \\
Dropout (Regularization) & [B, Hidden Size] \\
% End of Repeated Block
\midrule
Linear (Linear) & [B, Number of quantiles] \\
\bottomrule
\end{tabularx}
\caption{Non-linear Quantile Regression Model Architecture}
\label{tab:non_linear_model_architecture}
\end{table}
While this non-linear model is still quite simple, it offers the flexibility in tuning a limited set of hyperparameters. The hidden size of the linear layers and the number of layers can be experimented with, which can significantly influence the model's performance. The experiments are executed with the same quantiles as the linear model. Multiple experiments are executed with different hyperparameters and input features. All results are shown in the Table \ref{tab:non_linear_model_results}.
\begin{table}[H]
\centering
\begin{adjustbox}{width=\textwidth,center}
\begin{tabular}{@{}cccccccccc@{}}
\toprule
Features & Layers & Hidden Size & \multicolumn{2}{c}{MSE} & \multicolumn{2}{c}{MAE} & \multicolumn{2}{c}{CRPS} \\
\cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9}
& & & AR & NAR & AR & NAR & AR & NAR \\
\midrule
NRV & & & & & & & & \\
& 2 & 256 & 38117.43 & 41574.38 & 147.55 & 153.83 & 86.42 & 75.61 \\
& 4 & 256 & 37817.78 & 40200.92 & 146.90 & 152.00 & 85.63 & 74.37 \\
& 8 & 256 & 36346.57 & 38746.81 & 144.80 & 148.82 & 84.51 & 74.55 \\
& 16 & 256 & 38624.83 & 39328.47 & 148.61 & 149.19 & 87.05 & 75.38 \\
\midrule
NRV + Load + PV\\ + Wind & & & & & & & & \\
& 2 & 256 & 42983.21 & 42950.17 & 156.65 & 156.88 & 92.15 & 76.21 \\
\midrule
NRV + Load + PV\\ + Wind + Net Position\\ + QE (dim 5) & & & & & & & & \\
& 2 & 256 & 37785.49 & 42828.61 & 146.99 & 157.03 & 85.22 & 76.36 \\
& 4 & 256 & 34232.57 & 42588.16 & 139.78 & 157.20 & 80.14 & 73.75 \\
& 8 & 256 & \textbf{32447.41} & 40541.92 & \textbf{137.24} & 151.60 & \textbf{79.22} & 75.52 \\
& 2 & 512 & 44281.20 & 44018.79 & 158.63 & 159.06 & 91.82 & 77.99 \\
& 4 & 512 & 34839.79 & 41999.79 & 140.67 & 154.86 & 80.21 & 75.70 \\
& 8 & 512 & 34925.46 & 39774.38 & 141.11 & 150.62 & 81.11 & 74.67 \\
\bottomrule
\end{tabular}
\end{adjustbox}
\caption{Non-linear quantile regression model results. All the models used a dropout of 0.2 .}
\label{tab:non_linear_model_results}
\end{table}
The same behavior as the linear model is observed when looking at the metric differences between the autoregressive and non-autoregressive models. The autoregressive model performs better in terms of MSE and MAE, while the non-autoregressive model performs better in terms of CRPS. The results also give insight into the importance of the input features and hyperparameters. The addition of more input features improves the performance of the autoregressive model. The non-autoregressive model, on the other hand, does not benefit from this. The metrics are worse when more input features are added. This was also seen in the linear model. A reason for this behavior could be that the non-autoregressive model is not able to learn the complex patterns in the large set of input features. The non-autoregressive is provided with all the values for each quarter for which the quantiles need to be predicted. This increases the input size with 96 values each time a new forecast feature is added. Capturing patterns in this large input space can be a challenging task.
% TODO: talk about hyperparameters?
\begin{figure}[H]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_non_linear_model_samples/AQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_non_linear_model_samples/NAQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_864.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_non_linear_model_samples/AQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_non_linear_model_samples/NAQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_4320.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_non_linear_model_samples/AQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_non_linear_model_samples/NAQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_6336.png}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/aqr_non_linear_model_samples/AQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Autoregressive linear model}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/naqr_non_linear_model_samples/NAQR_NL_NRV_Load_Wind_PV_NP_QE-Sample_7008.png}
\caption{Non-autoregressive linear model}
\end{subfigure}
\caption{Comparison of the autoregressive and non-autoregressive non-linear model examples.}
\label{fig:non_linear_model_examples}
\end{figure}
The examples from the test set for the non-linear model are shown in Figure \ref{fig:non_linear_model_examples}. A big difference can be observed between the examples of the autoregressive and non-autoregressive models. The autoregressive model examples follow the actual NRV trend more closely than the non-autoregressive model. The mean of the samples generated by the non-autoregressive model is around zero for every quarter of the day. No clear trend can be observed in the samples. This is a clear indication that the non-autoregressive model is not able to learn the patterns in the data despite having a lower CRPS.
\begin{figure}[ht]
\centering
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/AQR_NL_Quantile_Performance_Training.jpeg}
\caption{AR - Train}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/AQR_NL_Quantile_Performance_Test.jpeg}
\caption{AR - Test}
\end{subfigure}
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/NAQR_NL_Quantile_Performance_Training.jpeg}
\caption{NAR - Train}
\end{subfigure}
\hfill
\begin{subfigure}[b]{0.49\textwidth}
\includegraphics[width=\textwidth]{images/quantile_regression/quantile_performance/NAQR_NL_Quantile_Performance_Test.jpeg}
\caption{NAR - Test}
\end{subfigure}
\caption{Over/underestimation of the quantiles for the autoregressive and non-autoregressive non-linear models. Both the quantile performance for the training and test set are shown. The plots are generated using the input features NRV, Load, Wind, PV, Net Position, and the quarter embedding (only for the autoregressive model).}
\label{fig:non-linear_model_quantile_over_underestimation}
\end{figure}
% TODO: correct use of overestimation
The plots in Figure \ref{fig:non-linear_model_quantile_over_underestimation} show the over/underestimation of the quantiles outputted by the non-linear models. Looking at the plots for the autoregressive models, the observation can be made that the fraction of the real NRV values under the quantiles is too big most of the time in comparison with the ideal fraction. This means the model is estimating the quantiles too high which results in a bigger fraction of NRV values below this value. The model overestimates the quantiles. The non-autoregressive model also suffers from this problem for the training set. For the test set, the lower quantiles are estimated too high and the higher quantiles are estimated too low. The quantiles in the middle are estimated quite accurately.

View File

@@ -0,0 +1,20 @@
\subsection{Baselines}
As discussed earlier, the most simple baseline that can be used is choosing two fixed thresholds. One threshold is used to buy electricity and the other threshold is used to sell electricity. The thresholds can be chosen based on the available historical data. A simple grid search is done over multiple combinations of thresholds. For each combination, the penalty parameter is optimized to make sure only 400 charge cycles are used in a year. The thresholds that achieve the highest profit on the training set are then used to evaluate the policy on the test set. The thresholds found that maximize the profit on the training set are 100€/MWh for buying and 200€/MWh for selling. Evaluating these thresholds on the test set results in a profit of €266,294.15 but using 492.0 charge cycles. This can not be used to fairly compare other policies. The thresholds can also be determined on the test set itself to find what the maximum possible profit is for the fixed thresholds policy. This can not be used in practice because the thresholds are determined on future data. It is, however, useful for the comparison of the other policies. The best thresholds found on the test set are 200€/MWh for buying and 250€/MWh for selling. The profit achieved using these thresholds is €143,004.34 with 287.12 charge cycles.
Another baseline that can be used is using the real \ac{NRV} data of the previous day to determine the buying and selling thresholds for the next day. The \ac{NRV} data of the previous day can be seen as a prediction for the \ac{NRV} data of the next day. Now, the thresholds are not fixed anymore but are determined based on the \ac{NRV} predictions. The thresholds are determined separately for each day using a simple grid search which allows the policy to adapt to the changing \ac{NRV} data. The penalty parameter is also optimized to make sure only around 283 charge cycles are used. The profit achieved using this policy is €198,807.09 using 283.5 charge cycles.
The baseline policy using the \ac{NRV} data of the day before as a prediction can be adapted to use the real \ac{NRV} data of the day that is being optimized. This policy can show the maximum profit that can be achieved if a perfect \ac{NRV} prediction is made. The profit for the policy using a perfect prediction of the \ac{NRV} is €230,317.84 using 282.5 charge cycles. This policy can be seen as the upper bound for the other policies. To achieve better profit results than this, more complex policies need to be used. \\
\begin{table}[H]
\centering
\begin{tabular}{|l|c|c|}
\hline
\textbf{Baseline Policy} & \textbf{Profit (€)} & \textbf{Charge cycles} \\ \hline
Fixed thresholds & 143,004.34 & 287.12\\ \hline
Yesterday \ac{NRV} & 198,807.09 & 283.5\\ \hline
Perfect \ac{NRV} & 230,317.84 & 282.5\\ \hline
\end{tabular}
\caption{Results of the baseline policies on the test set. }
\label{tab:fixed_thresholds}
\end{table}

View File

@@ -0,0 +1,8 @@
\subsection{Policy using generated NRV samples}
The generated full-day samples can be used to improve the profit of the policy. For each day, the generated samples can be used to determine the buying and selling thresholds. Assume that there are 100 generated full-day NRV samples for the day for which the profit needs to be optimized. The thresholds are determined for each generated sample separately using a simple grid search. All these thresholds can then be reduced by taking the mean to get one value for the buying threshold and one value for the selling threshold. Again, the penalty parameter is optimized for the test set to make sure around 283 charge cycles are used for fair comparison. The policy is evaluated for the different types of models that were trained and discussed in the previous sections.
% TODO: Explain the results of the different models
% TODO: Compare with baselines
% TODO: explain further
An observation that can be made is that the metrics used to evaluate the NRV predictions, do not necessarily correlate with the profit that can be made using the predictions. This means the best model in terms of the metrics does not necessarily result in the best profit. During the training, the profit should be used as a metric to evaluate the models but this becomes computationally expensive. Because of this, the policy now will only be evaluated after the training is done. The profit can give a better insight if the model improves the profit compared to the baselines.

View File

@@ -0,0 +1,185 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
% Ghent University document class
% Created by DF Benoit, December 15, 2022
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\NeedsTeXFormat{LaTeX2e}
\ProvidesClass{ugent-doc}
% Required packages
\RequirePackage{kvoptions}
\RequirePackage{geometry}
\RequirePackage{calc}
\RequirePackage{graphicx}
\RequirePackage{xcolor}
% ugent-doc specific options (kvoptions)
\SetupKeyvalOptions{family=ugd,prefix=ugd@} %UGentArticle
% Declare the class specific options
\DeclareStringOption[eb]{faculty}[eb]
\DeclareStringOption[en]{language}[en]
\DeclareStringOption[article]{doctype}[article]
\DeclareBoolOption[true]{sftitles} % Default: true
\ProcessKeyvalOptions*
% Pass options not specified above to the parent class
% \@unusedoptionlist is a macro in kvoptions
\LoadClass[\@unusedoptionlist]{\ugd@doctype}
% All sections, subsections and subsubsections in sans serif
\ifugd@sftitles
\RequirePackage[sf]{titlesec}
\fi
% Define UGent colors
%= = = = = = = = = = =
% Base colors
% UGent blue
\definecolor{ugentblue}{RGB}{30,100,200}
% UGent yellow
\definecolor{ugentyellow}{RGB}{255,210,0}
% UGent white
\definecolor{ugentwhite}{RGB}{255,255,255}
% UGent black
\definecolor{ugentblack}{RGB}{0,0,0}
% Faculty specific colors
% Faculty of Literature & Philosophy
\definecolor{ugent-lw}{RGB}{241,164,43}
% Faculty of Law
\definecolor{ugent-re}{RGB}{220,78,40}
% Faculty of Science
\definecolor{ugent-we}{RGB}{45,140,168}
% Faculty of Medicine and Health Sciences
\definecolor{ugent-ge}{RGB}{232,94,113}
% Faculty of Engineering and Architecture
\definecolor{ugent-ea}{RGB}{139,190,232}
% Faculty of Economics and Business Administration
\definecolor{ugent-eb}{RGB}{174,176,80}
% Faculty of Veterinary Medicine
\definecolor{ugent-di}{RGB}{130,84,145}
% Faculty of Psychology and Educational Sciences
\definecolor{ugent-pp}{RGB}{251,126,58}
% Faculty of Bioscience Engineering
\definecolor{ugent-bw}{RGB}{39,171,173}
% Faculty of Pharmaceutical Sciences
\definecolor{ugent-fw}{RGB}{190,81,144}
% Faculty of Political and Social Sciences
\definecolor{ugent-ps}{RGB}{113,168,96}
% Define new commands
\def\thetitle#1{\def\@thetitle{#1}}
\def\thesubtitle#1{\def\@thesubtitle{#1}}
\def\infoboxa#1{\def\@infoboxa{#1}}
\def\infoboxb#1{\def\@infoboxb{#1}}
\def\infoboxc#1{\def\@infoboxc{#1}}
\def\infoboxd#1{\def\@infoboxd{#1}}
% Initialize new commands as 'empty'
\def\@thetitle{}
\def\@thesubtitle{}
\def\@infoboxa{}
\def\@infoboxb{}
\def\@infoboxc{}
\def\@infoboxd{}
% Define lengths based on UGent document grid
% See: https://styleguide.ugent.be/basic-principles/grid-and-layout.html
\newlength{\longedge}
\setlength{\longedge}{\maxof{\paperheight}{\paperwidth}}
\newlength{\gridunit}
\setlength{\gridunit}{\longedge/28} %Divide long edge by 7 and next by 4
\newlength{\subpaperheight}
\setlength{\subpaperheight}{\paperheight-7\gridunit} %Type area: 3 units for faculty logo, 4 units for UGent logo
\newlength{\subpaperwidth}
\setlength{\subpaperwidth}{\paperwidth-\gridunit} %Left margin of 1 gridunit
% Define strut based on \gridunit
\newcommand{\mystrut}[1][-.5]{\rule[#1\gridunit]{0pt}{0pt}}
% Set default page layout
% Can be overwritten in preamble of document
\renewcommand{\baselinestretch}{1.15} % line spacing
\geometry{bottom=2.5cm,top=2.5cm,left=3cm,right=2cm} % margins
% Redefine the titlepage in accordance with UGent styleguide
\renewcommand\maketitle{\begin{titlepage}%
\thispagestyle{empty} % by default, the pagestyle of title page is plain
\newgeometry{top=0cm, bottom=0cm, left=0cm, right=0cm} % set special margins
\setlength{\parindent}{0cm} % necessary to put minipages/boxes at extreme left of page
\setlength{\parsep}{0cm} % necessary to stack minipages/boxes without space
\setlength{\fboxsep}{0cm} % no border around minipages/boxes
\setlength{\parskip}{0cm}
\setlength{\lineskip}{0cm}
\ifugd@sftitles
\sffamily % Titlepage in sans serif font
\fi
\includegraphics[height=3\gridunit]{\ugd@faculty-\ugd@language.pdf}%
\makebox[\gridunit]{}% Left margin of 1 gridunit
\colorbox{ugent-\ugd@faculty!30}{%
%\colorbox{ugentwhite}{%
\begin{minipage}[c][\subpaperheight][t]{\subpaperwidth}%
\vskip 5\gridunit % top margin within minipage
\hskip \gridunit % left margin of 1 within the colorbox
%\fbox{%
\begin{minipage}{\subpaperwidth-2\gridunit} % tile minipage, right margin of 1
\raggedright\bfseries\huge
\textcolor{ugentblue}{\mystrut\@thetitle}\newline
\Large\textcolor{ugentblue}{\@thesubtitle}
\mystrut[1]
\end{minipage}%}
\vskip\fill % Push down to bottom of minipage
\ifx\@infoboxa\empty\else % ony put box if not empty
\hskip\gridunit % left margin of infobox
%\fbox{%
\begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
\@infoboxa
\end{minipage}%}
\baselineskip0pt\mystrut
\fi
\ifx\@infoboxb\empty\else % ony put box if not empty
\hskip\gridunit % left margin of infobox
%\fbox{%
\begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
\@infoboxb
\end{minipage}%}
\baselineskip0pt\mystrut
\fi
\ifx\@infoboxc\empty\else % ony put box if not empty
\hskip\gridunit % left margin of infobox
%\fbox{%
\begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
\@infoboxc
\end{minipage}%}
\baselineskip0pt\mystrut
\fi
\ifx\@infoboxd\empty\else % ony put box if not empty
\hskip\gridunit % left margin of infobox
%\fbox{%
\begin{minipage}[b]{\subpaperwidth-3\gridunit} % right margin of 1
\@infoboxd
\end{minipage}%}
\fi
\baselineskip0pt\mystrut[-1]
\end{minipage}
}%
\includegraphics[height=4\gridunit]{ugent-\ugd@language.pdf}%
\end{titlepage}
\restoregeometry
}

BIN
Reports/Thesis/ugent-en.pdf Normal file

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show More