782 lines
84 KiB
BibTeX
782 lines
84 KiB
BibTeX
|
||
@online{noauthor_zotero_nodate,
|
||
title = {Zotero {\textbar} Connectors},
|
||
url = {https://www.zotero.org/download/connectors},
|
||
urldate = {2022-10-12},
|
||
file = {Zotero | Connectors:/Users/victormylle/Zotero/storage/EPF3ZZRA/connectors.html:text/html},
|
||
}
|
||
|
||
@online{noauthor_elia_nodate,
|
||
title = {Elia: de electriciteitsmarkt en -systeem},
|
||
url = {https://www.elia.be/nl/elektriciteitsmarkt-en-systeem},
|
||
shorttitle = {Elia},
|
||
abstract = {Elia deelt de Europese ambitie om een geïntegreerde elektriciteitsmarkt tot stand te brengen en verschillende marktspelers aan te moedigen tot het aanbieden van systeemdiensten.},
|
||
urldate = {2023-06-23},
|
||
langid = {dutch},
|
||
file = {Snapshot:/Users/victormylle/Zotero/storage/7QY94WTW/elektriciteitsmarkt-en-systeem.html:text/html},
|
||
}
|
||
|
||
@misc{gao_easy--hard_2023,
|
||
title = {Easy-to-Hard Learning for Information Extraction},
|
||
url = {http://arxiv.org/abs/2305.09193},
|
||
abstract = {Information extraction ({IE}) systems aim to automatically extract structured information, such as named entities, relations between entities, and events, from unstructured texts. While most existing work addresses a particular {IE} task, universally modeling various {IE} tasks with one model has achieved great success recently. Despite their success, they employ a one-stage learning strategy, i.e., directly learning to extract the target structure given the input text, which contradicts the human learning process. In this paper, we propose a unified easy-to-hard learning framework consisting of three stages, i.e., the easy stage, the hard stage, and the main stage, for {IE} by mimicking the human learning process. By breaking down the learning process into multiple stages, our framework facilitates the model to acquire general {IE} task knowledge and improve its generalization ability. Extensive experiments across four {IE} tasks demonstrate the effectiveness of our framework. We achieve new state-of-the-art results on 13 out of 17 datasets. Our code is available at {\textbackslash}url\{https://github.com/{DAMO}-{NLP}-{SG}/{IE}-E2H\}.},
|
||
number = {{arXiv}:2305.09193},
|
||
publisher = {{arXiv}},
|
||
author = {Gao, Chang and Zhang, Wenxuan and Lam, Wai and Bing, Lidong},
|
||
urldate = {2023-07-10},
|
||
date = {2023-05-19},
|
||
eprinttype = {arxiv},
|
||
eprint = {2305.09193 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/5YBG5XYS/2305.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/D8LIDUE8/Gao et al. - 2023 - Easy-to-Hard Learning for Information Extraction.pdf:application/pdf},
|
||
}
|
||
|
||
@article{gaur_semi-supervised_2021,
|
||
title = {Semi-supervised deep learning based named entity recognition model to parse education section of resumes},
|
||
volume = {33},
|
||
issn = {1433-3058},
|
||
url = {https://doi.org/10.1007/s00521-020-05351-2},
|
||
doi = {10.1007/s00521-020-05351-2},
|
||
abstract = {A job seeker’s resume contains several sections, including educational qualifications. Educational qualifications capture the knowledge and skills relevant to the job. Machine processing of the education sections of resumes has been a difficult task. In this paper, we attempt to identify educational institutions’ names and degrees from a resume’s education section. Usually, a significant amount of annotated data is required for neural network-based named entity recognition techniques. A semi-supervised approach is used to overcome the lack of large annotated data. We trained a deep neural network model on an initial (seed) set of resume education sections. This model is used to predict entities of unlabeled education sections and is rectified using a correction module. The education sections containing the rectified entities are augmented to the seed set. The updated seed set is used for retraining, leading to better accuracy than the previously trained model. This way, it can provide a high overall accuracy without the need of large annotated data. Our model has achieved an accuracy of 92.06\% on the named entity recognition task.},
|
||
pages = {5705--5718},
|
||
number = {11},
|
||
journaltitle = {Neural Computing and Applications},
|
||
shortjournal = {Neural Comput \& Applic},
|
||
author = {Gaur, Bodhvi and Saluja, Gurpreet Singh and Sivakumar, Hamsa Bharathi and Singh, Sanjay},
|
||
urldate = {2023-07-10},
|
||
date = {2021-06-01},
|
||
langid = {english},
|
||
keywords = {Deep learning models, Named entity recognition ({NER}), Natural language processing, Resume information extraction, Semi-supervised learning},
|
||
file = {Full Text PDF:/Users/victormylle/Zotero/storage/4NK6IXHZ/Gaur et al. - 2021 - Semi-supervised deep learning based named entity r.pdf:application/pdf},
|
||
}
|
||
|
||
@article{landolsi_information_2023,
|
||
title = {Information extraction from electronic medical documents: state of the art and future research directions},
|
||
volume = {65},
|
||
issn = {0219-3116},
|
||
url = {https://doi.org/10.1007/s10115-022-01779-1},
|
||
doi = {10.1007/s10115-022-01779-1},
|
||
shorttitle = {Information extraction from electronic medical documents},
|
||
abstract = {In the medical field, a doctor must have a comprehensive knowledge by reading and writing narrative documents, and he is responsible for every decision he takes for patients. Unfortunately, it is very tiring to read all necessary information about drugs, diseases and patients due to the large amount of documents that are increasing every day. Consequently, so many medical errors can happen and even kill people. Likewise, there is such an important field that can handle this problem, which is the information extraction. There are several important tasks in this field to extract the important and desired information from unstructured text written in natural language. The main principal tasks are named entity recognition and relation extraction since they can structure the text by extracting the relevant information. However, in order to treat the narrative text we should use natural language processing techniques to extract useful information and features. In our paper, we introduce and discuss the several techniques and solutions used in these tasks. Furthermore, we outline the challenges in information extraction from medical documents. In our knowledge, this is the most comprehensive survey in the literature with an experimental analysis and a suggestion for some uncovered directions.},
|
||
pages = {463--516},
|
||
number = {2},
|
||
journaltitle = {Knowledge and Information Systems},
|
||
shortjournal = {Knowl Inf Syst},
|
||
author = {Landolsi, Mohamed Yassine and Hlaoua, Lobna and Ben Romdhane, Lotfi},
|
||
urldate = {2023-07-10},
|
||
date = {2023-02-01},
|
||
langid = {english},
|
||
keywords = {Electronic medical records, Information extraction, Medical named entities recognition, Medical relation extraction, Section detection},
|
||
file = {Full Text PDF:/Users/victormylle/Zotero/storage/KRTKZW3M/Landolsi et al. - 2023 - Information extraction from electronic medical doc.pdf:application/pdf},
|
||
}
|
||
|
||
@inproceedings{fu_spanner_2021,
|
||
location = {Online},
|
||
title = {{SpanNER}: Named Entity Re-/Recognition as Span Prediction},
|
||
url = {https://aclanthology.org/2021.acl-long.558},
|
||
doi = {10.18653/v1/2021.acl-long.558},
|
||
shorttitle = {{SpanNER}},
|
||
abstract = {Recent years have seen the paradigm shift of Named Entity Recognition ({NER}) systems from sequence labeling to span prediction. Despite its preliminary effectiveness, the span prediction model's architectural bias has not been fully understood. In this paper, we first investigate the strengths and weaknesses when the span prediction model is used for named entity recognition compared with the sequence labeling framework and how to further improve it, which motivates us to make complementary advantages of systems based on different paradigms. We then reveal that span prediction, simultaneously, can serve as a system combiner to re-recognize named entities from different systems' outputs. We experimentally implement 154 systems on 11 datasets, covering three languages, comprehensive results show the effectiveness of span prediction models that both serve as base {NER} systems and system combiners. We make all codes and datasets available: https://github.com/neulab/spanner, as well as an online system demo: http://spanner.sh. Our model also has been deployed into the {ExplainaBoard} platform, which allows users to flexibly perform a system combination of top-scoring systems in an interactive way: http://explainaboard.nlpedia.ai/leaderboard/task-ner/.},
|
||
eventtitle = {{ACL}-{IJCNLP} 2021},
|
||
pages = {7183--7195},
|
||
booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
|
||
publisher = {Association for Computational Linguistics},
|
||
author = {Fu, Jinlan and Huang, Xuanjing and Liu, Pengfei},
|
||
urldate = {2023-07-10},
|
||
date = {2021-08},
|
||
file = {Full Text PDF:/Users/victormylle/Zotero/storage/6JU4DR5Y/Fu et al. - 2021 - SpanNER Named Entity Re-Recognition as Span Pred.pdf:application/pdf},
|
||
}
|
||
|
||
@inproceedings{li_unified_2020,
|
||
location = {Online},
|
||
title = {A Unified {MRC} Framework for Named Entity Recognition},
|
||
url = {https://aclanthology.org/2020.acl-main.519},
|
||
doi = {10.18653/v1/2020.acl-main.519},
|
||
abstract = {The task of named entity recognition ({NER}) is normally divided into nested {NER} and flat {NER} depending on whether named entities are nested or not.Models are usually separately developed for the two tasks, since sequence labeling models, the most widely used backbone for flat {NER}, are only able to assign a single label to a particular token, which is unsuitable for nested {NER} where a token may be assigned several labels. In this paper, we propose a unified framework that is capable of handling both flat and nested {NER} tasks. Instead of treating the task of {NER} as a sequence labeling problem, we propose to formulate it as a machine reading comprehension ({MRC}) task. For example, extracting entities with the per label is formalized as extracting answer spans to the question “which person is mentioned in the text”.This formulation naturally tackles the entity overlapping issue in nested {NER}: the extraction of two overlapping entities with different categories requires answering two independent questions. Additionally, since the query encodes informative prior knowledge, this strategy facilitates the process of entity extraction, leading to better performances for not only nested {NER}, but flat {NER}. We conduct experiments on both nested and flat {NER} datasets.Experiment results demonstrate the effectiveness of the proposed formulation. We are able to achieve a vast amount of performance boost over current {SOTA} models on nested {NER} datasets, i.e., +1.28, +2.55, +5.44, +6.37,respectively on {ACE}04, {ACE}05, {GENIA} and {KBP}17, along with {SOTA} results on flat {NER} datasets, i.e., +0.24, +1.95, +0.21, +1.49 respectively on English {CoNLL} 2003, English {OntoNotes} 5.0, Chinese {MSRA} and Chinese {OntoNotes} 4.0.},
|
||
eventtitle = {{ACL} 2020},
|
||
pages = {5849--5859},
|
||
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
|
||
publisher = {Association for Computational Linguistics},
|
||
author = {Li, Xiaoya and Feng, Jingrong and Meng, Yuxian and Han, Qinghong and Wu, Fei and Li, Jiwei},
|
||
urldate = {2023-07-10},
|
||
date = {2020-07},
|
||
file = {Full Text PDF:/Users/victormylle/Zotero/storage/TIVIKNGN/Li et al. - 2020 - A Unified MRC Framework for Named Entity Recogniti.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{decorte_jobbert_2021,
|
||
title = {{JobBERT}: Understanding Job Titles through Skills},
|
||
url = {http://arxiv.org/abs/2109.09605},
|
||
shorttitle = {{JobBERT}},
|
||
abstract = {Job titles form a cornerstone of today's human resources ({HR}) processes. Within online recruitment, they allow candidates to understand the contents of a vacancy at a glance, while internal {HR} departments use them to organize and structure many of their processes. As job titles are a compact, convenient, and readily available data source, modeling them with high accuracy can greatly benefit many {HR} tech applications. In this paper, we propose a neural representation model for job titles, by augmenting a pre-trained language model with co-occurrence information from skill labels extracted from vacancies. Our {JobBERT} method leads to considerable improvements compared to using generic sentence encoders, for the task of job title normalization, for which we release a new evaluation benchmark.},
|
||
number = {{arXiv}:2109.09605},
|
||
publisher = {{arXiv}},
|
||
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Demeester, Thomas and Develder, Chris},
|
||
urldate = {2023-07-20},
|
||
date = {2021-09-20},
|
||
eprinttype = {arxiv},
|
||
eprint = {2109.09605 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
}
|
||
|
||
@misc{sun_retentive_2023,
|
||
title = {Retentive Network: A Successor to Transformer for Large Language Models},
|
||
url = {http://arxiv.org/abs/2307.08621},
|
||
shorttitle = {Retentive Network},
|
||
abstract = {In this work, we propose Retentive Network ({RetNet}) as a foundation architecture for large language models, simultaneously achieving training parallelism, low-cost inference, and good performance. We theoretically derive the connection between recurrence and attention. Then we propose the retention mechanism for sequence modeling, which supports three computation paradigms, i.e., parallel, recurrent, and chunkwise recurrent. Specifically, the parallel representation allows for training parallelism. The recurrent representation enables low-cost \$O(1)\$ inference, which improves decoding throughput, latency, and {GPU} memory without sacrificing performance. The chunkwise recurrent representation facilitates efficient long-sequence modeling with linear complexity, where each chunk is encoded parallelly while recurrently summarizing the chunks. Experimental results on language modeling show that {RetNet} achieves favorable scaling results, parallel training, low-cost deployment, and efficient inference. The intriguing properties make {RetNet} a strong successor to Transformer for large language models. Code will be available at https://aka.ms/retnet.},
|
||
number = {{arXiv}:2307.08621},
|
||
publisher = {{arXiv}},
|
||
author = {Sun, Yutao and Dong, Li and Huang, Shaohan and Ma, Shuming and Xia, Yuqing and Xue, Jilong and Wang, Jianyong and Wei, Furu},
|
||
urldate = {2023-07-25},
|
||
date = {2023-07-19},
|
||
eprinttype = {arxiv},
|
||
eprint = {2307.08621 [cs]},
|
||
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
|
||
}
|
||
|
||
@misc{zhang_generation-driven_2023,
|
||
title = {Generation-driven Contrastive Self-training for Zero-shot Text Classification with Instruction-tuned {GPT}},
|
||
url = {http://arxiv.org/abs/2304.11872},
|
||
abstract = {Moreover, {GPT}-based zero-shot classification models tend to make independent predictions over test instances, which can be sub-optimal as the instance correlations and the decision boundaries in the target space are ignored. To address these difficulties and limitations, we propose a new approach to zero-shot text classification, namely {\textbackslash}ourmodelshort, which leverages the strong generative power of {GPT} to assist in training a smaller, more adaptable, and efficient sentence encoder classifier with contrastive self-training. Specifically, {GenCo} applies {GPT} in two ways: firstly, it generates multiple augmented texts for each input instance to enhance the semantic embedding of the instance and improve the mapping to relevant labels; secondly, it generates augmented texts conditioned on the predicted label during self-training, which makes the generative process tailored to the decision boundaries in the target space. In our experiments, {GenCo} outperforms previous state-of-the-art methods on multiple benchmark datasets, even when only limited in-domain text data is available.},
|
||
number = {{arXiv}:2304.11872},
|
||
publisher = {{arXiv}},
|
||
author = {Zhang, Ruohong and Wang, Yau-Shian and Yang, Yiming},
|
||
urldate = {2023-08-01},
|
||
date = {2023-04-24},
|
||
eprinttype = {arxiv},
|
||
eprint = {2304.11872 [cs]},
|
||
keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence, interesting},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/2ULMRMN5/2304.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/D98MRNHP/Zhang et al. - 2023 - Generation-driven Contrastive Self-training for Ze.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{zhang_clusterllm_2023,
|
||
title = {{ClusterLLM}: Large Language Models as a Guide for Text Clustering},
|
||
url = {http://arxiv.org/abs/2305.14871},
|
||
shorttitle = {{ClusterLLM}},
|
||
abstract = {We introduce {ClusterLLM}, a novel text clustering framework that leverages feedback from an instruction-tuned large language model, such as {ChatGPT}. Compared with traditional unsupervised methods that builds upon "small" embedders, {ClusterLLM} exhibits two intriguing advantages: (1) it enjoys the emergent capability of {LLM} even if its embeddings are inaccessible; and (2) it understands the user's preference on clustering through textual instruction and/or a few annotated data. First, we prompt {ChatGPT} for insights on clustering perspective by constructing hard triplet questions {\textless}does A better correspond to B than C{\textgreater}, where A, B and C are similar data points that belong to different clusters according to small embedder. We empirically show that this strategy is both effective for fine-tuning small embedder and cost-efficient to query {ChatGPT}. Second, we prompt {ChatGPT} for helps on clustering granularity by carefully designed pairwise questions {\textless}do A and B belong to the same category{\textgreater}, and tune the granularity from cluster hierarchies that is the most consistent with the {ChatGPT} answers. Extensive experiments on 14 datasets show that {ClusterLLM} consistently improves clustering quality, at an average cost of {\textasciitilde}\$0.6 per dataset.},
|
||
number = {{arXiv}:2305.14871},
|
||
publisher = {{arXiv}},
|
||
author = {Zhang, Yuwei and Wang, Zihan and Shang, Jingbo},
|
||
urldate = {2023-08-08},
|
||
date = {2023-05-24},
|
||
eprinttype = {arxiv},
|
||
eprint = {2305.14871 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
}
|
||
|
||
@misc{zhang_clusterllm_2023-1,
|
||
title = {{ClusterLLM}: Large Language Models as a Guide for Text Clustering},
|
||
url = {http://arxiv.org/abs/2305.14871},
|
||
shorttitle = {{ClusterLLM}},
|
||
abstract = {We introduce {ClusterLLM}, a novel text clustering framework that leverages feedback from an instruction-tuned large language model, such as {ChatGPT}. Compared with traditional unsupervised methods that builds upon "small" embedders, {ClusterLLM} exhibits two intriguing advantages: (1) it enjoys the emergent capability of {LLM} even if its embeddings are inaccessible; and (2) it understands the user's preference on clustering through textual instruction and/or a few annotated data. First, we prompt {ChatGPT} for insights on clustering perspective by constructing hard triplet questions {\textless}does A better correspond to B than C{\textgreater}, where A, B and C are similar data points that belong to different clusters according to small embedder. We empirically show that this strategy is both effective for fine-tuning small embedder and cost-efficient to query {ChatGPT}. Second, we prompt {ChatGPT} for helps on clustering granularity by carefully designed pairwise questions {\textless}do A and B belong to the same category{\textgreater}, and tune the granularity from cluster hierarchies that is the most consistent with the {ChatGPT} answers. Extensive experiments on 14 datasets show that {ClusterLLM} consistently improves clustering quality, at an average cost of {\textasciitilde}\$0.6 per dataset.},
|
||
number = {{arXiv}:2305.14871},
|
||
publisher = {{arXiv}},
|
||
author = {Zhang, Yuwei and Wang, Zihan and Shang, Jingbo},
|
||
urldate = {2023-08-08},
|
||
date = {2023-05-24},
|
||
eprinttype = {arxiv},
|
||
eprint = {2305.14871 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
}
|
||
|
||
@misc{zhang_clusterllm_2023-2,
|
||
title = {{ClusterLLM}: Large Language Models as a Guide for Text Clustering},
|
||
url = {http://arxiv.org/abs/2305.14871},
|
||
shorttitle = {{ClusterLLM}},
|
||
abstract = {We introduce {ClusterLLM}, a novel text clustering framework that leverages feedback from an instruction-tuned large language model, such as {ChatGPT}. Compared with traditional unsupervised methods that builds upon "small" embedders, {ClusterLLM} exhibits two intriguing advantages: (1) it enjoys the emergent capability of {LLM} even if its embeddings are inaccessible; and (2) it understands the user's preference on clustering through textual instruction and/or a few annotated data. First, we prompt {ChatGPT} for insights on clustering perspective by constructing hard triplet questions {\textless}does A better correspond to B than C{\textgreater}, where A, B and C are similar data points that belong to different clusters according to small embedder. We empirically show that this strategy is both effective for fine-tuning small embedder and cost-efficient to query {ChatGPT}. Second, we prompt {ChatGPT} for helps on clustering granularity by carefully designed pairwise questions {\textless}do A and B belong to the same category{\textgreater}, and tune the granularity from cluster hierarchies that is the most consistent with the {ChatGPT} answers. Extensive experiments on 14 datasets show that {ClusterLLM} consistently improves clustering quality, at an average cost of {\textasciitilde}\$0.6 per dataset.},
|
||
number = {{arXiv}:2305.14871},
|
||
publisher = {{arXiv}},
|
||
author = {Zhang, Yuwei and Wang, Zihan and Shang, Jingbo},
|
||
urldate = {2023-08-08},
|
||
date = {2023-05-24},
|
||
eprinttype = {arxiv},
|
||
eprint = {2305.14871 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/68L6AESY/2305.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/VGWL9LRC/Zhang et al. - 2023 - ClusterLLM Large Language Models as a Guide for T.pdf:application/pdf},
|
||
}
|
||
|
||
@article{vijeikis_efficient_2022,
|
||
title = {Efficient Violence Detection in Surveillance},
|
||
volume = {22},
|
||
rights = {http://creativecommons.org/licenses/by/3.0/},
|
||
issn = {1424-8220},
|
||
url = {https://www.mdpi.com/1424-8220/22/6/2216},
|
||
doi = {10.3390/s22062216},
|
||
abstract = {Intelligent video surveillance systems are rapidly being introduced to public places. The adoption of computer vision and machine learning techniques enables various applications for collected video features; one of the major is safety monitoring. The efficacy of violent event detection is measured by the efficiency and accuracy of violent event detection. In this paper, we present a novel architecture for violence detection from video surveillance cameras. Our proposed model is a spatial feature extracting a U-Net-like network that uses {MobileNet} V2 as an encoder followed by {LSTM} for temporal feature extraction and classification. The proposed model is computationally light and still achieves good results—experiments showed that an average accuracy is 0.82 ± 2\% and average precision is 0.81 ± 3\% using a complex real-world security camera footage dataset based on {RWF}-2000.},
|
||
pages = {2216},
|
||
number = {6},
|
||
journaltitle = {Sensors},
|
||
author = {Vijeikis, Romas and Raudonis, Vidas and Dervinis, Gintaras},
|
||
urldate = {2023-08-08},
|
||
date = {2022-01},
|
||
langid = {english},
|
||
note = {Number: 6
|
||
Publisher: Multidisciplinary Digital Publishing Institute},
|
||
keywords = {computer vision, deep learning, intelligent video surveillance, {LSTM}, U-Net, violence detection, violent behavior},
|
||
file = {Full Text PDF:/Users/victormylle/Zotero/storage/PSYA8YSJ/Vijeikis et al. - 2022 - Efficient Violence Detection in Surveillance.pdf:application/pdf},
|
||
}
|
||
|
||
@article{toubeau_interpretable_2022,
|
||
title = {Interpretable Probabilistic Forecasting of Imbalances in Renewable-Dominated Electricity Systems},
|
||
volume = {13},
|
||
issn = {1949-3029, 1949-3037},
|
||
url = {https://ieeexplore.ieee.org/document/9464660/},
|
||
doi = {10.1109/TSTE.2021.3092137},
|
||
abstract = {High penetration of renewable energy such as wind power and photovoltaic ({PV}) requires large amounts of flexibility to balance their inherent variability. Making an accurate prediction of the future power system imbalance is an efficient approach to reduce these balancing costs. However, the imbalance is affected not only by renewables but also by complex market dynamics and technology constraints, for which the dependence structure is unknown. Therefore, this paper introduces a new architecture of sequence-to-sequence recurrent neural networks to efficiently process time-based information in an interpretable fashion. To that end, the selection of relevant variables is internalized into the model, which provides insights on the relative importance of individual inputs, while bypassing the cumbersome need for data preprocessing. Then, the model is further enriched with an attention mechanism that is tailored to focus on the relevant contextual information, which is useful to better understand the underlying dynamics such as seasonal patterns. Outcomes show that adding modules to generate explainable forecasts makes the model more efficient and robust, thus leading to enhanced performance.},
|
||
pages = {1267--1277},
|
||
number = {2},
|
||
journaltitle = {{IEEE} Transactions on Sustainable Energy},
|
||
shortjournal = {{IEEE} Trans. Sustain. Energy},
|
||
author = {Toubeau, Jean-Francois and Bottieau, Jeremie and Wang, Yi and Vallee, Francois},
|
||
urldate = {2023-09-28},
|
||
date = {2022-04},
|
||
langid = {english},
|
||
file = {Toubeau et al. - 2022 - Interpretable Probabilistic Forecasting of Imbalan.pdf:/Users/victormylle/Zotero/storage/WA7DZBXX/Toubeau et al. - 2022 - Interpretable Probabilistic Forecasting of Imbalan.pdf:application/pdf},
|
||
}
|
||
|
||
@online{noauthor_deep_nodate,
|
||
title = {Deep Generative Modelling: A Comparative Review of {VAEs}, {GANs}, Normalizing Flows, Energy-Based and Autoregressive Models {\textbar} {IEEE} Journals \& Magazine {\textbar} {IEEE} Xplore},
|
||
url = {https://ieeexplore.ieee.org/document/9555209},
|
||
urldate = {2023-10-11},
|
||
}
|
||
|
||
@article{bond-taylor_deep_2022,
|
||
title = {Deep Generative Modelling: A Comparative Review of {VAEs}, {GANs}, Normalizing Flows, Energy-Based and Autoregressive Models},
|
||
volume = {44},
|
||
issn = {0162-8828, 2160-9292, 1939-3539},
|
||
url = {https://ieeexplore.ieee.org/document/9555209/},
|
||
doi = {10.1109/TPAMI.2021.3116668},
|
||
shorttitle = {Deep Generative Modelling},
|
||
abstract = {Deep generative models are a class of techniques that train deep neural networks to model the distribution of training samples. Research has fragmented into various interconnected approaches, each of which make trade-offs including run-time, diversity, and architectural restrictions. In particular, this compendium covers energy-based models, variational autoencoders, generative adversarial networks, autoregressive models, normalizing flows, in addition to numerous hybrid approaches. These techniques are compared and contrasted, explaining the premises behind each and how they are interrelated, while reviewing current state-of-the-art advances and implementations.},
|
||
pages = {7327--7347},
|
||
number = {11},
|
||
journaltitle = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
|
||
shortjournal = {{IEEE} Trans. Pattern Anal. Mach. Intell.},
|
||
author = {Bond-Taylor, Sam and Leach, Adam and Long, Yang and Willcocks, Chris G.},
|
||
urldate = {2023-10-11},
|
||
date = {2022-11-01},
|
||
langid = {english},
|
||
file = {Bond-Taylor et al. - 2022 - Deep Generative Modelling A Comparative Review of.pdf:/Users/victormylle/Zotero/storage/UNAST9UC/Bond-Taylor et al. - 2022 - Deep Generative Modelling A Comparative Review of.pdf:application/pdf},
|
||
}
|
||
|
||
@article{lecun_tutorial_nodate,
|
||
title = {A Tutorial on Energy-Based Learning},
|
||
abstract = {Energy-Based Models ({EBMs}) capture dependencies between variables by associating a scalar energy to each configuration of the variables. Inference consists in clamping the value of observed variables and finding configurations of the remaining variables that minimize the energy. Learning consists in finding an energy function in which observed configurations of the variables are given lower energies than unobserved ones. The {EBM} approach provides a common theoretical framework for many learning models, including traditional discriminative and generative approaches, as well as graph-transformer networks, conditional random fields, maximum margin Markov networks, and several manifold learning methods.},
|
||
author = {{LeCun}, Yann and Chopra, Sumit and Hadsell, Raia and Ranzato, Marc’Aurelio and Huang, Fu Jie},
|
||
langid = {english},
|
||
file = {LeCun et al. - A Tutorial on Energy-Based Learning.pdf:/Users/victormylle/Zotero/storage/8932975Z/LeCun et al. - A Tutorial on Energy-Based Learning.pdf:application/pdf},
|
||
}
|
||
|
||
@article{gatta_neural_2022,
|
||
title = {Neural networks generative models for time series},
|
||
volume = {34},
|
||
issn = {1319-1578},
|
||
url = {https://www.sciencedirect.com/science/article/pii/S1319157822002361},
|
||
doi = {10.1016/j.jksuci.2022.07.010},
|
||
abstract = {Nowadays, time series are a widely-exploited methodology to describe phenomena belonging to different fields. In fact, electrical consumption can be explained, from a data analysis perspective, with a time series, as for healthcare, financial index, air pollution or parking occupancy rate. Applying time series to different areas of interest has contributed to the exponential rise in interest by both practitioners and academics. On the other side, especially regarding static data, a new trend is acquiring even more relevance in the data analysis community, namely neural network generative approaches. Generative approaches aim to generate new, fake samples given a dataset of real data by implicitly learning the probability distribution underlining data. In this way, several tasks can be addressed, such as data augmentation, class imbalance, anomaly detection or privacy. However, even if this topic is relatively well-established in the literature related to static data regarding time series, the debate is still open. This paper contributes to this debate by comparing four neural network-based generative approaches for time series belonging to the state-of-the-art methodologies in literature. The comparison has been carried out on five public and private datasets and on different time granularities, with a total number of 13 experimental scenario. Our work aims to provide a wide overview of the performances of the compared methodologies when working in different conditions like seasonality, strong autoregressive components and long or short sequences.},
|
||
pages = {7920--7939},
|
||
number = {10},
|
||
journaltitle = {Journal of King Saud University - Computer and Information Sciences},
|
||
shortjournal = {Journal of King Saud University - Computer and Information Sciences},
|
||
author = {Gatta, Federico and Giampaolo, Fabio and Prezioso, Edoardo and Mei, Gang and Cuomo, Salvatore and Piccialli, Francesco},
|
||
urldate = {2023-10-11},
|
||
date = {2022-11-01},
|
||
keywords = {Deep learning, Generative adversarial networks, Healthcare, Industry 4.0, Time series},
|
||
file = {Full Text:/Users/victormylle/Zotero/storage/ZU6BCM28/Gatta et al. - 2022 - Neural networks generative models for time series.pdf:application/pdf;ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/2HSHCJN7/S1319157822002361.html:text/html},
|
||
}
|
||
|
||
@article{dumas_deep_2022,
|
||
title = {A deep generative model for probabilistic energy forecasting in power systems: normalizing flows},
|
||
volume = {305},
|
||
issn = {03062619},
|
||
url = {https://linkinghub.elsevier.com/retrieve/pii/S0306261921011909},
|
||
doi = {10.1016/j.apenergy.2021.117871},
|
||
shorttitle = {A deep generative model for probabilistic energy forecasting in power systems},
|
||
abstract = {Greater direct electrification of end-use sectors with a higher share of renewables is one of the pillars to power a carbon-neutral society by 2050. However, in contrast to conventional power plants, renewable energy is subject to uncertainty raising challenges for their interaction with power systems. Scenario-based probabilistic forecasting models have become a vital tool to equip decision-makers. This paper presents to the power systems forecasting practitioners a recent deep learning technique, the normalizing flows, to produce accurate scenario-based probabilistic forecasts that are crucial to face the new challenges in power systems applications. The strength of this technique is to directly learn the stochastic multivariate distribution of the underlying process by maximizing the likelihood. Through comprehensive empirical evaluations using the open data of the Global Energy Forecasting Competition 2014, we demonstrate that this methodology is competitive with other state-of-the-art deep learning generative models: generative adversarial networks and variational autoencoders. The models producing weather-based wind, solar power, and load scenarios are properly compared in terms of forecast value by considering the case study of an energy retailer and quality using several complementary metrics. The numerical experiments are simple and easily reproducible. Thus, we hope it will encourage other forecasting practitioners to test and use normalizing flows in power system applications such as bidding on electricity markets, scheduling power systems with high renewable energy sources penetration, energy management of virtual power plan or microgrids, and unit commitment.},
|
||
pages = {117871},
|
||
journaltitle = {Applied Energy},
|
||
shortjournal = {Applied Energy},
|
||
author = {Dumas, Jonathan and Wehenkel, Antoine and Lanaspeze, Damien and Cornélusse, Bertrand and Sutera, Antonio},
|
||
urldate = {2023-10-11},
|
||
date = {2022-01},
|
||
langid = {english},
|
||
file = {Dumas et al. - 2022 - A deep generative model for probabilistic energy f.pdf:/Users/victormylle/Zotero/storage/3CW249QI/Dumas et al. - 2022 - A deep generative model for probabilistic energy f.pdf:application/pdf},
|
||
}
|
||
|
||
@article{lu_scenarios_2022,
|
||
title = {Scenarios modelling for forecasting day-ahead electricity prices: Case studies in Australia},
|
||
volume = {308},
|
||
issn = {0306-2619},
|
||
url = {https://www.sciencedirect.com/science/article/pii/S0306261921015555},
|
||
doi = {10.1016/j.apenergy.2021.118296},
|
||
shorttitle = {Scenarios modelling for forecasting day-ahead electricity prices},
|
||
abstract = {Electricity prices in spot markets are volatile and can be affected by various factors, such as generation and demand, system contingencies, local weather patterns, bidding strategies of market participants, and uncertain renewable energy outputs. Because of these factors, electricity price forecasting is challenging. This paper proposes a scenario modeling approach to improve forecasting accuracy, conditioning time series generative adversarial networks on external factors. After data pre-processing and condition selection, a conditional {TSGAN} or {CTSGAN} is designed to forecast electricity prices. Wasserstein Distance, weights limitation, and {RMSProp} optimizer are used to ensure that the {CTGAN} training process is stable. By changing the dimensionality of random noise input, the point forecasting model can be transformed into a probabilistic forecasting model. For electricity price point forecasting, the proposed {CTSGAN} model has better accuracy and has better generalization ability than the {TSGAN} and other deep learning methods. For probabilistic forecasting, the proposed {CTSGAN} model can significantly improve the continuously ranked probability score and Winkler score. The effectiveness and superiority of the proposed {CTSGAN} forecasting model are verified by case studies.},
|
||
pages = {118296},
|
||
journaltitle = {Applied Energy},
|
||
shortjournal = {Applied Energy},
|
||
author = {Lu, Xin and Qiu, Jing and Lei, Gang and Zhu, Jianguo},
|
||
urldate = {2023-10-13},
|
||
date = {2022-02-15},
|
||
keywords = {Generative adversarial networks, Conditions, Electricity Price, Point forecasting, Probabilistic forecasting},
|
||
file = {Lu et al. - 2022 - Scenarios modelling for forecasting day-ahead elec.pdf:/Users/victormylle/Zotero/storage/3XL3T253/Lu et al. - 2022 - Scenarios modelling for forecasting day-ahead elec.pdf:application/pdf;ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/9K2RFGGU/S0306261921015555.html:text/html},
|
||
}
|
||
|
||
@article{gabrielli_data-driven_2022,
|
||
title = {Data-driven modeling for long-term electricity price forecasting},
|
||
volume = {244},
|
||
issn = {03605442},
|
||
url = {https://linkinghub.elsevier.com/retrieve/pii/S036054422200010X},
|
||
doi = {10.1016/j.energy.2022.123107},
|
||
abstract = {Estimating the financial viability of renewable energy investments requires the availability of long-term, finely-resolved electricity prices over the investment lifespan. This entails, however, two major challenges: (i) the combination of extensive time horizons and fine time resolutions, and (ii) the prediction of out-of-sample electricity prices in future energy and market scenarios, or shifts in pricing regime, that were not observed in the past. This paper tackles such challenges by proposing a data-driven model for the long-term prediction of electricity market prices that is based on Fourier analysis. The electricity price is decomposed into components leading to its base evolution, which are described through the amplitudes of the main frequencies of the Fourier series, and components leading to high price volatility, which are described by the residual frequencies. The former are predicted via a regression model that uses as input annual values of relevant energy and market quantities, such as electricity generation, prices and demands. The proposed method shows capable of (i) predicting the most relevant dynamics of the electricity price; (ii) generalization by capturing the market mechanisms of previously unseen electricity markets. These findings support the relevance and validity of data-driven, finely-resolved, long-term predictions and highlight the potential for hybrid data-driven and market-based models.},
|
||
pages = {123107},
|
||
journaltitle = {Energy},
|
||
shortjournal = {Energy},
|
||
author = {Gabrielli, Paolo and Wüthrich, Moritz and Blume, Steffen and Sansavini, Giovanni},
|
||
urldate = {2023-10-15},
|
||
date = {2022-04},
|
||
langid = {english},
|
||
file = {Gabrielli et al. - 2022 - Data-driven modeling for long-term electricity pri.pdf:/Users/victormylle/Zotero/storage/YHDVP399/Gabrielli et al. - 2022 - Data-driven modeling for long-term electricity pri.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{kollovieh_predict_2023,
|
||
title = {Predict, Refine, Synthesize: Self-Guiding Diffusion Models for Probabilistic Time Series Forecasting},
|
||
url = {http://arxiv.org/abs/2307.11494},
|
||
shorttitle = {Predict, Refine, Synthesize},
|
||
abstract = {Diffusion models have achieved state-of-the-art performance in generative modeling tasks across various domains. Prior works on time series diffusion models have primarily focused on developing conditional models tailored to specific forecasting or imputation tasks. In this work, we explore the potential of task-agnostic, unconditional diffusion models for several time series applications. We propose {TSDiff}, an unconditionally trained diffusion model for time series. Our proposed self-guidance mechanism enables conditioning {TSDiff} for downstream tasks during inference, without requiring auxiliary networks or altering the training procedure. We demonstrate the effectiveness of our method on three different time series tasks: forecasting, refinement, and synthetic data generation. First, we show that {TSDiff} is competitive with several task-specific conditional forecasting methods (predict). Second, we leverage the learned implicit probability density of {TSDiff} to iteratively refine the predictions of base forecasters with reduced computational overhead over reverse diffusion (refine). Notably, the generative performance of the model remains intact -- downstream forecasters trained on synthetic samples from {TSDiff} outperform forecasters that are trained on samples from other state-of-the-art generative time series models, occasionally even outperforming models trained on real data (synthesize).},
|
||
number = {{arXiv}:2307.11494},
|
||
publisher = {{arXiv}},
|
||
author = {Kollovieh, Marcel and Ansari, Abdul Fatir and Bohlke-Schneider, Michael and Zschiegner, Jasper and Wang, Hao and Wang, Yuyang},
|
||
urldate = {2023-10-15},
|
||
date = {2023-07-21},
|
||
eprinttype = {arxiv},
|
||
eprint = {2307.11494 [cs, stat]},
|
||
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Statistics - Machine Learning, {TODO}},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/PBVHEPD9/2307.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/QIBWKG57/Kollovieh et al. - 2023 - Predict, Refine, Synthesize Self-Guiding Diffusio.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{rasul_autoregressive_2021,
|
||
title = {Autoregressive Denoising Diffusion Models for Multivariate Probabilistic Time Series Forecasting},
|
||
url = {http://arxiv.org/abs/2101.12072},
|
||
abstract = {In this work, we propose {\textbackslash}texttt\{{TimeGrad}\}, an autoregressive model for multivariate probabilistic time series forecasting which samples from the data distribution at each time step by estimating its gradient. To this end, we use diffusion probabilistic models, a class of latent variable models closely connected to score matching and energy-based methods. Our model learns gradients by optimizing a variational bound on the data likelihood and at inference time converts white noise into a sample of the distribution of interest through a Markov chain using Langevin sampling. We demonstrate experimentally that the proposed autoregressive denoising diffusion model is the new state-of-the-art multivariate probabilistic forecasting method on real-world data sets with thousands of correlated dimensions. We hope that this method is a useful tool for practitioners and lays the foundation for future research in this area.},
|
||
number = {{arXiv}:2101.12072},
|
||
publisher = {{arXiv}},
|
||
author = {Rasul, Kashif and Seward, Calvin and Schuster, Ingmar and Vollgraf, Roland},
|
||
urldate = {2023-10-15},
|
||
date = {2021-02-02},
|
||
eprinttype = {arxiv},
|
||
eprint = {2101.12072 [cs]},
|
||
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/8LIRWZ4G/2101.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/QPPFJVR5/Rasul et al. - 2021 - Autoregressive Denoising Diffusion Models for Mult.pdf:application/pdf},
|
||
}
|
||
|
||
@online{noauthor_spacy_nodate,
|
||
title = {{spaCy} · Industrial-strength Natural Language Processing in Python},
|
||
url = {https://spacy.io/},
|
||
abstract = {{spaCy} is a free open-source library for Natural Language Processing in Python. It features {NER}, {POS} tagging, dependency parsing, word vectors and more.},
|
||
urldate = {2023-10-17},
|
||
langid = {english},
|
||
file = {Snapshot:/Users/victormylle/Zotero/storage/8WWDDEH4/spacy.io.html:text/html},
|
||
}
|
||
|
||
@online{noauthor_intfloatmultilingual-e5-base_nodate,
|
||
title = {intfloat/multilingual-e5-base · Hugging Face},
|
||
url = {https://huggingface.co/intfloat/multilingual-e5-base},
|
||
abstract = {We’re on a journey to advance and democratize artificial intelligence through open source and open science.},
|
||
urldate = {2023-10-17},
|
||
file = {Snapshot:/Users/victormylle/Zotero/storage/LYSDP8CD/multilingual-e5-base.html:text/html},
|
||
}
|
||
|
||
@online{noauthor_googleflan-t5-base_nodate,
|
||
title = {google/flan-t5-base · Hugging Face},
|
||
url = {https://huggingface.co/google/flan-t5-base},
|
||
urldate = {2023-10-17},
|
||
file = {flan-t5-base · Hugging Face:/Users/victormylle/Zotero/storage/284DLNVT/flan-t5-base.html:text/html},
|
||
}
|
||
|
||
@online{noauthor_openai_nodate,
|
||
title = {{OpenAI} Platform},
|
||
url = {https://platform.openai.com},
|
||
abstract = {Explore developer resources, tutorials, {API} docs, and dynamic examples to get the most out of {OpenAI}'s platform.},
|
||
urldate = {2023-10-17},
|
||
langid = {english},
|
||
file = {Snapshot:/Users/victormylle/Zotero/storage/9NFW3FCP/gpt-3-5.html:text/html},
|
||
}
|
||
|
||
@article{cramer_normalizing_2022,
|
||
title = {Normalizing flow-based day-ahead wind power scenario generation for profitable and reliable delivery commitments by wind farm operators},
|
||
volume = {166},
|
||
issn = {0098-1354},
|
||
url = {https://www.sciencedirect.com/science/article/pii/S0098135422002617},
|
||
doi = {10.1016/j.compchemeng.2022.107923},
|
||
abstract = {We present a specialized scenario generation method that utilizes forecast information to generate scenarios for day-ahead scheduling problems. In particular, we use normalizing flows to generate wind power scenarios by sampling from a conditional distribution that uses wind speed forecasts to tailor the scenarios to a specific day. We apply the generated scenarios in a stochastic day-ahead bidding problem of a wind electricity producer and analyze whether the scenarios yield profitable decisions. Compared to Gaussian copulas and Wasserstein-generative adversarial networks, the normalizing flow successfully narrows the range of scenarios around the daily trends while maintaining a diverse variety of possible realizations. In the stochastic day-ahead bidding problem, the conditional scenarios from all methods lead to significantly more stable profitable results compared to an unconditional selection of historical scenarios. The normalizing flow consistently obtains the highest profits, even for small sets scenarios.},
|
||
pages = {107923},
|
||
journaltitle = {Computers \& Chemical Engineering},
|
||
shortjournal = {Computers \& Chemical Engineering},
|
||
author = {Cramer, Eike and Paeleke, Leonard and Mitsos, Alexander and Dahmen, Manuel},
|
||
urldate = {2023-10-18},
|
||
date = {2022-10-01},
|
||
keywords = {Scenario generation, Stability, Stochastic programming, Wind power},
|
||
file = {ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/PT76E9DL/S0098135422002617.html:text/html;Submitted Version:/Users/victormylle/Zotero/storage/M9KFSG3M/Cramer et al. - 2022 - Normalizing flow-based day-ahead wind power scenar.pdf:application/pdf},
|
||
}
|
||
|
||
@inproceedings{zhang_diffusion_2021,
|
||
title = {Diffusion Normalizing Flow},
|
||
volume = {34},
|
||
url = {https://proceedings.neurips.cc/paper/2021/hash/876f1f9954de0aa402d91bb988d12cd4-Abstract.html},
|
||
abstract = {We present a novel generative modeling method called diffusion normalizing flow based on stochastic differential equations ({SDEs}). The algorithm consists of two neural {SDEs}: a forward {SDE} that gradually adds noise to the data to transform the data into Gaussian random noise, and a backward {SDE} that gradually removes the noise to sample from the data distribution. By jointly training the two neural {SDEs} to minimize a common cost function that quantifies the difference between the two, the backward {SDE} converges to a diffusion process the starts with a Gaussian distribution and ends with the desired data distribution. Our method is closely related to normalizing flow and diffusion probabilistic models, and can be viewed as a combination of the two. Compared with normalizing flow, diffusion normalizing flow is able to learn distributions with sharp boundaries. Compared with diffusion probabilistic models, diffusion normalizing flow requires fewer discretization steps and thus has better sampling efficiency. Our algorithm demonstrates competitive performance in both high-dimension data density estimation and image generation tasks.},
|
||
pages = {16280--16291},
|
||
booktitle = {Advances in Neural Information Processing Systems},
|
||
publisher = {Curran Associates, Inc.},
|
||
author = {Zhang, Qinsheng and Chen, Yongxin},
|
||
urldate = {2023-10-18},
|
||
date = {2021},
|
||
keywords = {{TODO}},
|
||
file = {Full Text PDF:/Users/victormylle/Zotero/storage/U45EUFZU/Zhang and Chen - 2021 - Diffusion Normalizing Flow.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{rezende_variational_2016,
|
||
title = {Variational Inference with Normalizing Flows},
|
||
url = {http://arxiv.org/abs/1505.05770},
|
||
abstract = {The choice of approximate posterior distribution is one of the core problems in variational inference. Most applications of variational inference employ simple families of posterior approximations in order to allow for efficient inference, focusing on mean-field or other simple structured approximations. This restriction has a significant impact on the quality of inferences made using variational methods. We introduce a new approach for specifying flexible, arbitrarily complex and scalable approximate posterior distributions. Our approximations are distributions constructed through a normalizing flow, whereby a simple initial density is transformed into a more complex one by applying a sequence of invertible transformations until a desired level of complexity is attained. We use this view of normalizing flows to develop categories of finite and infinitesimal flows and provide a unified view of approaches for constructing rich posterior approximations. We demonstrate that the theoretical advantages of having posteriors that better match the true posterior, combined with the scalability of amortized variational approaches, provides a clear improvement in performance and applicability of variational inference.},
|
||
number = {{arXiv}:1505.05770},
|
||
publisher = {{arXiv}},
|
||
author = {Rezende, Danilo Jimenez and Mohamed, Shakir},
|
||
urldate = {2023-10-18},
|
||
date = {2016-06-14},
|
||
eprinttype = {arxiv},
|
||
eprint = {1505.05770 [cs, stat]},
|
||
note = {version: 6},
|
||
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Statistics - Machine Learning, Statistics - Computation, Statistics - Methodology},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/2J7MPVV5/1505.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/GQWIFAAN/Rezende and Mohamed - 2016 - Variational Inference with Normalizing Flows.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{gruver_large_2023,
|
||
title = {Large Language Models Are Zero-Shot Time Series Forecasters},
|
||
url = {http://arxiv.org/abs/2310.07820},
|
||
doi = {10.48550/arXiv.2310.07820},
|
||
abstract = {By encoding time series as a string of numerical digits, we can frame time series forecasting as next-token prediction in text. Developing this approach, we find that large language models ({LLMs}) such as {GPT}-3 and {LLaMA}-2 can surprisingly zero-shot extrapolate time series at a level comparable to or exceeding the performance of purpose-built time series models trained on the downstream tasks. To facilitate this performance, we propose procedures for effectively tokenizing time series data and converting discrete distributions over tokens into highly flexible densities over continuous values. We argue the success of {LLMs} for time series stems from their ability to naturally represent multimodal distributions, in conjunction with biases for simplicity, and repetition, which align with the salient features in many time series, such as repeated seasonal trends. We also show how {LLMs} can naturally handle missing data without imputation through non-numerical text, accommodate textual side information, and answer questions to help explain predictions. While we find that increasing model size generally improves performance on time series, we show {GPT}-4 can perform worse than {GPT}-3 because of how it tokenizes numbers, and poor uncertainty calibration, which is likely the result of alignment interventions such as {RLHF}.},
|
||
number = {{arXiv}:2310.07820},
|
||
author = {Gruver, Nate and Finzi, Marc and Qiu, Shikai and Wilson, Andrew Gordon},
|
||
urldate = {2023-10-13},
|
||
date = {2023-10-11},
|
||
eprinttype = {arxiv},
|
||
eprint = {2310.07820 [cs]},
|
||
keywords = {Computer Science - Machine Learning},
|
||
file = {Gruver et al. - 2023 - Large Language Models Are Zero-Shot Time Series Forecasters.pdf:/Users/victormylle/Zotero/storage/T5XZ227W/Gruver et al. - 2023 - Large Language Models Are Zero-Shot Time Series Forecasters.pdf:application/pdf},
|
||
}
|
||
|
||
@article{sweidan_probabilistic_nodate,
|
||
title = {Probabilistic Prediction in scikit-learn},
|
||
abstract = {Adding confidence measures to predictive models should increase the trustworthiness, but only if the models are well-calibrated. Historically, some algorithms like logistic regression, but also neural networks, have been considered to produce well-calibrated probability estimates off-the-shelf. Other techniques, like decision trees and Naive Bayes, on the other hand, are infamous for being significantly overconfident in their probabilistic predictions. In this paper, a large experimental study is conducted to investigate how well-calibrated models produced by a number of algorithms in the scikit-learn library are out-of-the-box, but also if either the built-in calibration techniques Platt scaling and isotonic regression, or Venn-Abers, can be used to improve the calibration. The results show that of the seven algorithms evaluated, the only one obtaining well-calibrated models without the external calibration is logistic regression. All other algorithms, i.e., decision trees, adaboost, gradient boosting, {kNN}, naive Bayes and random forest benefit from using any of the calibration techniques. In particular, decision trees, Naive Bayes and the boosted models are substantially improved using external calibration. From a practitioner’s perspective, the obvious recommendation becomes to incorporate calibration when using probabilistic prediction. Comparing the different calibration techniques, Platt scaling and {VennAbers} generally outperform isotonic regression, on these rather small datasets. Finally, the unique ability of Venn-Abers to output not only well-calibrated probability estimates, but also the confidence in these estimates is demonstrated.},
|
||
author = {Sweidan, Dirar and Johansson, Ulf},
|
||
langid = {english},
|
||
file = {Sweidan and Johansson - Probabilistic Prediction in scikit-learn.pdf:/Users/victormylle/Zotero/storage/8LDMB83T/Sweidan and Johansson - Probabilistic Prediction in scikit-learn.pdf:application/pdf},
|
||
}
|
||
|
||
@article{baskan_scenario-based_2023,
|
||
title = {A Scenario-Based Model Comparison for Short-Term Day-Ahead Electricity Prices in Times of Economic and Political Tension},
|
||
volume = {16},
|
||
issn = {1999-4893},
|
||
url = {https://www.mdpi.com/1999-4893/16/4/177},
|
||
doi = {10.3390/a16040177},
|
||
abstract = {In recent years, energy prices have become increasingly volatile, making it more challenging to predict them accurately. This uncertain market trend behavior makes it harder for market participants, e.g., power plant dispatchers, to make reliable decisions. Machine learning ({ML}) has recently emerged as a powerful artificial intelligence ({AI}) technique to get reliable predictions in particularly volatile and unforeseeable situations. This development makes {ML} models an attractive complement to other approaches that require more extensive human modeling effort and assumptions about market mechanisms. This study investigates the application of machine and deep learning approaches to predict day-ahead electricity prices for a 7-day horizon on the German spot market to give power plants enough time to ramp up or down. A qualitative and quantitative analysis is conducted, assessing model performance concerning the forecast horizon and their robustness depending on the selected hyperparameters. For evaluation purposes, three test scenarios with different characteristics are manually chosen. Various models are trained, optimized, and compared with each other using common performance metrics. This study shows that deep learning models outperform tree-based and statistical models despite or because of the volatile energy prices.},
|
||
pages = {177},
|
||
number = {4},
|
||
journaltitle = {Algorithms},
|
||
shortjournal = {Algorithms},
|
||
author = {Baskan, Denis E. and Meyer, Daniel and Mieck, Sebastian and Faubel, Leonhard and Klöpper, Benjamin and Strem, Nika and Wagner, Johannes A. and Koltermann, Jan J.},
|
||
urldate = {2023-10-22},
|
||
date = {2023-03-24},
|
||
langid = {english},
|
||
file = {Baskan et al. - 2023 - A Scenario-Based Model Comparison for Short-Term D.pdf:/Users/victormylle/Zotero/storage/TU5JX5D4/Baskan et al. - 2023 - A Scenario-Based Model Comparison for Short-Term D.pdf:application/pdf},
|
||
}
|
||
|
||
@online{tsaprounis_metrics_2023,
|
||
title = {Metrics for Distributional Forecasts},
|
||
url = {https://medium.com/trusted-data-science-haleon/metrics-for-distributional-forecasts-60e156c60177},
|
||
abstract = {How to evaluate distributional/probabilistic time series forecasts in Python.},
|
||
titleaddon = {Trusted Data Science @ Haleon},
|
||
author = {Tsaprounis, Leonidas},
|
||
urldate = {2023-10-24},
|
||
date = {2023-02-27},
|
||
langid = {english},
|
||
}
|
||
|
||
@misc{roy_recent_2021,
|
||
title = {Recent Trends in Named Entity Recognition ({NER})},
|
||
url = {http://arxiv.org/abs/2101.11420},
|
||
doi = {10.48550/arXiv.2101.11420},
|
||
abstract = {The availability of large amounts of computer-readable textual data and hardware that can process the data has shifted the focus of knowledge projects towards deep learning architecture. Natural Language Processing, particularly the task of Named Entity Recognition is no exception. The bulk of the learning methods that have produced state-of-the-art results have changed the deep learning model, the training method used, the training data itself or the encoding of the output of the {NER} system. In this paper, we review significant learning methods that have been employed for {NER} in the recent past and how they came about from the linear learning methods of the past. We also cover the progress of related tasks that are upstream or downstream to {NER}, e.g., sequence tagging, entity linking, etc., wherever the processes in question have also improved {NER} results.},
|
||
number = {{arXiv}:2101.11420},
|
||
publisher = {{arXiv}},
|
||
author = {Roy, Arya},
|
||
urldate = {2023-10-24},
|
||
date = {2021-01-25},
|
||
eprinttype = {arxiv},
|
||
eprint = {2101.11420 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
file = {arXiv Fulltext PDF:/Users/victormylle/Zotero/storage/AAZ3I43G/Roy - 2021 - Recent Trends in Named Entity Recognition (NER).pdf:application/pdf;arXiv.org Snapshot:/Users/victormylle/Zotero/storage/DWNPFLCX/2101.html:text/html},
|
||
}
|
||
|
||
@online{noauthor_sentencetransformers_nodate,
|
||
title = {{SentenceTransformers} Documentation — Sentence-Transformers documentation},
|
||
url = {https://www.sbert.net/},
|
||
urldate = {2023-10-29},
|
||
file = {SentenceTransformers Documentation — Sentence-Transformers documentation:/Users/victormylle/Zotero/storage/7ZPK2DIZ/www.sbert.net.html:text/html},
|
||
}
|
||
|
||
@online{noauthor_hugging_2023,
|
||
title = {Hugging Face – The {AI} community building the future.},
|
||
url = {https://huggingface.co/},
|
||
abstract = {We’re on a journey to advance and democratize artificial intelligence through open source and open science.},
|
||
urldate = {2023-10-29},
|
||
date = {2023-10-22},
|
||
file = {Snapshot:/Users/victormylle/Zotero/storage/8U9I2BD9/huggingface.co.html:text/html},
|
||
}
|
||
|
||
@misc{narayan_regularization_2021,
|
||
title = {Regularization Strategies for Quantile Regression},
|
||
url = {http://arxiv.org/abs/2102.05135},
|
||
abstract = {We investigate different methods for regularizing quantile regression when predicting either a subset of quantiles or the full inverse {CDF}. We show that minimizing an expected pinball loss over a continuous distribution of quantiles is a good regularizer even when only predicting a specific quantile. For predicting multiple quantiles, we propose achieving the classic goal of non-crossing quantiles by using deep lattice networks that treat the quantile as a monotonic input feature, and we discuss why monotonicity on other features is an apt regularizer for quantile regression. We show that lattice models enable regularizing the predicted distribution to a location-scale family. Lastly, we propose applying rate constraints to improve the calibration of the quantile predictions on specific subsets of interest and improve fairness metrics. We demonstrate our contributions on simulations, benchmark datasets, and real quantile regression problems.},
|
||
number = {{arXiv}:2102.05135},
|
||
publisher = {{arXiv}},
|
||
author = {Narayan, Taman and Wang, Serena and Canini, Kevin and Gupta, Maya},
|
||
urldate = {2023-11-14},
|
||
date = {2021-02-09},
|
||
eprinttype = {arxiv},
|
||
eprint = {2102.05135 [cs, stat]},
|
||
note = {version: 1},
|
||
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning, Statistics - Methodology},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/DQZGHBIS/2102.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/W6WTUZQ3/Narayan et al. - 2021 - Regularization Strategies for Quantile Regression.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{chung_beyond_2021,
|
||
title = {Beyond Pinball Loss: Quantile Methods for Calibrated Uncertainty Quantification},
|
||
url = {http://arxiv.org/abs/2011.09588},
|
||
shorttitle = {Beyond Pinball Loss},
|
||
abstract = {Among the many ways of quantifying uncertainty in a regression setting, specifying the full quantile function is attractive, as quantiles are amenable to interpretation and evaluation. A model that predicts the true conditional quantiles for each input, at all quantile levels, presents a correct and efficient representation of the underlying uncertainty. To achieve this, many current quantile-based methods focus on optimizing the so-called pinball loss. However, this loss restricts the scope of applicable regression models, limits the ability to target many desirable properties (e.g. calibration, sharpness, centered intervals), and may produce poor conditional quantiles. In this work, we develop new quantile methods that address these shortcomings. In particular, we propose methods that can apply to any class of regression model, allow for selecting a trade-off between calibration and sharpness, optimize for calibration of centered intervals, and produce more accurate conditional quantiles. We provide a thorough experimental evaluation of our methods, which includes a high dimensional uncertainty quantification task in nuclear fusion.},
|
||
number = {{arXiv}:2011.09588},
|
||
publisher = {{arXiv}},
|
||
author = {Chung, Youngseog and Neiswanger, Willie and Char, Ian and Schneider, Jeff},
|
||
urldate = {2023-12-14},
|
||
date = {2021-12-09},
|
||
eprinttype = {arxiv},
|
||
eprint = {2011.09588 [cs, stat]},
|
||
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/WWFHI3UN/2011.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/SHMRZ3Q7/Chung et al. - 2021 - Beyond Pinball Loss Quantile Methods for Calibrat.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{van_hautte_bad_2019,
|
||
title = {Bad Form: Comparing Context-Based and Form-Based Few-Shot Learning in Distributional Semantic Models},
|
||
url = {http://arxiv.org/abs/1910.00275},
|
||
shorttitle = {Bad Form},
|
||
abstract = {Word embeddings are an essential component in a wide range of natural language processing applications. However, distributional semantic models are known to struggle when only a small number of context sentences are available. Several methods have been proposed to obtain higher-quality vectors for these words, leveraging both this context information and sometimes the word forms themselves through a hybrid approach. We show that the current tasks do not suffice to evaluate models that use word-form information, as such models can easily leverage word forms in the training data that are related to word forms in the test data. We introduce 3 new tasks, allowing for a more balanced comparison between models. Furthermore, we show that hyperparameters that have largely been ignored in previous work can consistently improve the performance of both baseline and advanced models, achieving a new state of the art on 4 out of 6 tasks.},
|
||
number = {{arXiv}:1910.00275},
|
||
publisher = {{arXiv}},
|
||
author = {Van Hautte, Jeroen and Emerson, Guy and Rei, Marek},
|
||
urldate = {2024-03-09},
|
||
date = {2019-10-01},
|
||
eprinttype = {arxiv},
|
||
eprint = {1910.00275 [cs]},
|
||
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/PUBS4DRK/1910.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/VY5YGVXU/Van Hautte et al. - 2019 - Bad Form Comparing Context-Based and Form-Based F.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{decorte_jobbert_2021-1,
|
||
title = {{JobBERT}: Understanding Job Titles through Skills},
|
||
url = {http://arxiv.org/abs/2109.09605},
|
||
shorttitle = {{JobBERT}},
|
||
abstract = {Job titles form a cornerstone of today's human resources ({HR}) processes. Within online recruitment, they allow candidates to understand the contents of a vacancy at a glance, while internal {HR} departments use them to organize and structure many of their processes. As job titles are a compact, convenient, and readily available data source, modeling them with high accuracy can greatly benefit many {HR} tech applications. In this paper, we propose a neural representation model for job titles, by augmenting a pre-trained language model with co-occurrence information from skill labels extracted from vacancies. Our {JobBERT} method leads to considerable improvements compared to using generic sentence encoders, for the task of job title normalization, for which we release a new evaluation benchmark.},
|
||
number = {{arXiv}:2109.09605},
|
||
publisher = {{arXiv}},
|
||
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Demeester, Thomas and Develder, Chris},
|
||
urldate = {2024-03-09},
|
||
date = {2021-09-20},
|
||
eprinttype = {arxiv},
|
||
eprint = {2109.09605 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/6FMYQ68Y/2109.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/2SD3P252/Decorte et al. - 2021 - JobBERT Understanding Job Titles through Skills.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{van_hautte_leveraging_2020,
|
||
title = {Leveraging the Inherent Hierarchy of Vacancy Titles for Automated Job Ontology Expansion},
|
||
url = {http://arxiv.org/abs/2004.02814},
|
||
abstract = {Machine learning plays an ever-bigger part in online recruitment, powering intelligent matchmaking and job recommendations across many of the world's largest job platforms. However, the main text is rarely enough to fully understand a job posting: more often than not, much of the required information is condensed into the job title. Several organised efforts have been made to map job titles onto a hand-made knowledge base as to provide this information, but these only cover around 60{\textbackslash}\% of online vacancies. We introduce a novel, purely data-driven approach towards the detection of new job titles. Our method is conceptually simple, extremely efficient and competitive with traditional {NER}-based approaches. Although the standalone application of our method does not outperform a finetuned {BERT} model, it can be applied as a preprocessing step as well, substantially boosting accuracy across several architectures.},
|
||
number = {{arXiv}:2004.02814},
|
||
publisher = {{arXiv}},
|
||
author = {Van Hautte, Jeroen and Schelstraete, Vincent and Wornoo, Mikaël},
|
||
urldate = {2024-03-09},
|
||
date = {2020-04-06},
|
||
eprinttype = {arxiv},
|
||
eprint = {2004.02814 [cs]},
|
||
keywords = {Computer Science - Computation and Language, Computer Science - Machine Learning},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/6FAKZYDM/2004.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/EY3RNC6S/Van Hautte et al. - 2020 - Leveraging the Inherent Hierarchy of Vacancy Title.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{decorte_design_2022,
|
||
title = {Design of Negative Sampling Strategies for Distantly Supervised Skill Extraction},
|
||
url = {http://arxiv.org/abs/2209.05987},
|
||
abstract = {Skills play a central role in the job market and many human resources ({HR}) processes. In the wake of other digital experiences, today's online job market has candidates expecting to see the right opportunities based on their skill set. Similarly, enterprises increasingly need to use data to guarantee that the skills within their workforce remain future-proof. However, structured information about skills is often missing, and processes building on self- or manager-assessment have shown to struggle with issues around adoption, completeness, and freshness of the resulting data. Extracting skills is a highly challenging task, given the many thousands of possible skill labels mentioned either explicitly or merely described implicitly and the lack of finely annotated training corpora. Previous work on skill extraction overly simplifies the task to an explicit entity detection task or builds on manually annotated training data that would be infeasible if applied to a complete vocabulary of skills. We propose an end-to-end system for skill extraction, based on distant supervision through literal matching. We propose and evaluate several negative sampling strategies, tuned on a small validation dataset, to improve the generalization of skill extraction towards implicitly mentioned skills, despite the lack of such implicit skills in the distantly supervised data. We observe that using the {ESCO} taxonomy to select negative examples from related skills yields the biggest improvements, and combining three different strategies in one model further increases the performance, up to 8 percentage points in {RP}@5. We introduce a manually annotated evaluation benchmark for skill extraction based on the {ESCO} taxonomy, on which we validate our models. We release the benchmark dataset for research purposes to stimulate further research on the task.},
|
||
number = {{arXiv}:2209.05987},
|
||
publisher = {{arXiv}},
|
||
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Deleu, Johannes and Develder, Chris and Demeester, Thomas},
|
||
urldate = {2024-03-09},
|
||
date = {2022-09-13},
|
||
eprinttype = {arxiv},
|
||
eprint = {2209.05987 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/E79F2EV8/2209.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/SBEAYV66/Decorte et al. - 2022 - Design of Negative Sampling Strategies for Distant.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{decorte_extreme_2023,
|
||
title = {Extreme Multi-Label Skill Extraction Training using Large Language Models},
|
||
url = {http://arxiv.org/abs/2307.10778},
|
||
abstract = {Online job ads serve as a valuable source of information for skill requirements, playing a crucial role in labor market analysis and e-recruitment processes. Since such ads are typically formatted in free text, natural language processing ({NLP}) technologies are required to automatically process them. We specifically focus on the task of detecting skills (mentioned literally, or implicitly described) and linking them to a large skill ontology, making it a challenging case of extreme multi-label classification ({XMLC}). Given that there is no sizable labeled (training) dataset are available for this specific {XMLC} task, we propose techniques to leverage general Large Language Models ({LLMs}). We describe a cost-effective approach to generate an accurate, fully synthetic labeled dataset for skill extraction, and present a contrastive learning strategy that proves effective in the task. Our results across three skill extraction benchmarks show a consistent increase of between 15 to 25 percentage points in {\textbackslash}textit\{R-Precision@5\} compared to previously published results that relied solely on distant supervision through literal matches.},
|
||
number = {{arXiv}:2307.10778},
|
||
publisher = {{arXiv}},
|
||
author = {Decorte, Jens-Joris and Verlinden, Severine and Van Hautte, Jeroen and Deleu, Johannes and Develder, Chris and Demeester, Thomas},
|
||
urldate = {2024-03-09},
|
||
date = {2023-07-20},
|
||
eprinttype = {arxiv},
|
||
eprint = {2307.10778 [cs]},
|
||
keywords = {Computer Science - Computation and Language},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/VVZZQW45/2307.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/8U7P43IE/Decorte et al. - 2023 - Extreme Multi-Label Skill Extraction Training usin.pdf:application/pdf},
|
||
}
|
||
|
||
@misc{decorte_career_2023,
|
||
title = {Career Path Prediction using Resume Representation Learning and Skill-based Matching},
|
||
url = {http://arxiv.org/abs/2310.15636},
|
||
abstract = {The impact of person-job fit on job satisfaction and performance is widely acknowledged, which highlights the importance of providing workers with next steps at the right time in their career. This task of predicting the next step in a career is known as career path prediction, and has diverse applications such as turnover prevention and internal job mobility. Existing methods to career path prediction rely on large amounts of private career history data to model the interactions between job titles and companies. We propose leveraging the unexplored textual descriptions that are part of work experience sections in resumes. We introduce a structured dataset of 2,164 anonymized career histories, annotated with {ESCO} occupation labels. Based on this dataset, we present a novel representation learning approach, {CareerBERT}, specifically designed for work history data. We develop a skill-based model and a text-based model for career path prediction, which achieve 35.24\% and 39.61\% recall@10 respectively on our dataset. Finally, we show that both approaches are complementary as a hybrid approach achieves the strongest result with 43.01\% recall@10.},
|
||
number = {{arXiv}:2310.15636},
|
||
publisher = {{arXiv}},
|
||
author = {Decorte, Jens-Joris and Van Hautte, Jeroen and Deleu, Johannes and Develder, Chris and Demeester, Thomas},
|
||
urldate = {2024-03-09},
|
||
date = {2023-10-24},
|
||
eprinttype = {arxiv},
|
||
eprint = {2310.15636 [cs]},
|
||
keywords = {Computer Science - Computation and Language, Computer Science - Artificial Intelligence},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/I6AMKGVA/2310.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/AKTKCWBR/Decorte et al. - 2023 - Career Path Prediction using Resume Representation.pdf:application/pdf},
|
||
}
|
||
|
||
@online{noauthor_liberalised_nodate,
|
||
title = {The liberalised electricity market includes many parties who all have to work together and at the same time try to make a profit. An overview of the most...},
|
||
url = {https://www.next-kraftwerke.be/en/knowledge-hub/players-in-the-belgian-power-market/},
|
||
abstract = {The liberalised electricity market includes many parties who all have to work together and at the same time try to make a profit. An overview of the most...},
|
||
urldate = {2024-03-20},
|
||
file = {Snapshot:/Users/victormylle/Zotero/storage/M9XWVY6F/players-in-the-belgian-power-market.html:text/html},
|
||
}
|
||
|
||
@misc{ho_denoising_2020,
|
||
title = {Denoising Diffusion Probabilistic Models},
|
||
url = {http://arxiv.org/abs/2006.11239},
|
||
doi = {10.48550/arXiv.2006.11239},
|
||
abstract = {We present high quality image synthesis results using diffusion probabilistic models, a class of latent variable models inspired by considerations from nonequilibrium thermodynamics. Our best results are obtained by training on a weighted variational bound designed according to a novel connection between diffusion probabilistic models and denoising score matching with Langevin dynamics, and our models naturally admit a progressive lossy decompression scheme that can be interpreted as a generalization of autoregressive decoding. On the unconditional {CIFAR}10 dataset, we obtain an Inception score of 9.46 and a state-of-the-art {FID} score of 3.17. On 256x256 {LSUN}, we obtain sample quality similar to {ProgressiveGAN}. Our implementation is available at https://github.com/hojonathanho/diffusion},
|
||
number = {{arXiv}:2006.11239},
|
||
publisher = {{arXiv}},
|
||
author = {Ho, Jonathan and Jain, Ajay and Abbeel, Pieter},
|
||
urldate = {2024-04-02},
|
||
date = {2020-12-16},
|
||
eprinttype = {arxiv},
|
||
eprint = {2006.11239 [cs, stat]},
|
||
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
|
||
file = {arXiv Fulltext PDF:/Users/victormylle/Zotero/storage/CYMHCMUT/Ho et al. - 2020 - Denoising Diffusion Probabilistic Models.pdf:application/pdf;arXiv.org Snapshot:/Users/victormylle/Zotero/storage/CE8R84V5/2006.html:text/html},
|
||
}
|
||
|
||
@inproceedings{dumas_probabilistic_2019,
|
||
title = {Probabilistic Forecasting of Imbalance Prices in the Belgian Context},
|
||
url = {http://arxiv.org/abs/2106.07361},
|
||
doi = {10.1109/EEM.2019.8916375},
|
||
abstract = {Forecasting imbalance prices is essential for strategic participation in the short-term energy markets. A novel two-step probabilistic approach is proposed, with a particular focus on the Belgian case. The first step consists of computing the net regulation volume state transition probabilities. It is modeled as a matrix computed using historical data. This matrix is then used to infer the imbalance prices since the net regulation volume can be related to the level of reserves activated and the corresponding marginal prices for each activation level are published by the Belgian Transmission System Operator one day before electricity delivery. This approach is compared to a deterministic model, a multi-layer perceptron, and a widely used probabilistic technique, Gaussian Processes.},
|
||
pages = {1--7},
|
||
booktitle = {2019 16th International Conference on the European Energy Market ({EEM})},
|
||
author = {Dumas, Jonathan and Boukas, Ioannis and de Villena, Miguel Manuel and Mathieu, Sébastien and Cornélusse, Bertrand},
|
||
urldate = {2024-04-17},
|
||
date = {2019-09},
|
||
eprinttype = {arxiv},
|
||
eprint = {2106.07361 [cs, eess, q-fin]},
|
||
keywords = {Computer Science - Machine Learning, Electrical Engineering and Systems Science - Signal Processing, Quantitative Finance - Statistical Finance},
|
||
file = {arXiv.org Snapshot:/Users/victormylle/Zotero/storage/3N56FPYP/2106.html:text/html;Full Text PDF:/Users/victormylle/Zotero/storage/958MBH5M/Dumas et al. - 2019 - Probabilistic Forecasting of Imbalance Prices in t.pdf:application/pdf},
|
||
}
|
||
|
||
@online{noauthor_ghent_nodate,
|
||
title = {Ghent University: Master of Science in Computer Science Engineering},
|
||
url = {https://studiekiezer.ugent.be/2024/master-of-science-in-computer-science-engineering-en},
|
||
urldate = {2024-04-17},
|
||
file = {master-of-science-in-computer-science-engineering-en:/Users/victormylle/Zotero/storage/JCELQ9VV/master-of-science-in-computer-science-engineering-en.html:text/html},
|
||
}
|
||
|
||
@article{gunduz_transfer_2023,
|
||
title = {Transfer learning for electricity price forecasting},
|
||
volume = {34},
|
||
issn = {2352-4677},
|
||
url = {https://www.sciencedirect.com/science/article/pii/S2352467723000048},
|
||
doi = {10.1016/j.segan.2023.100996},
|
||
abstract = {Electricity price forecasting is an essential task in all the deregulated markets of the world. The accurate prediction of day-ahead electricity prices is an active research field and available data from various markets can be used as input for forecasting. A collection of models have been proposed for this task, but the fundamental question on how to use the available big data is often neglected. In this paper, we propose to use transfer learning as a tool for utilizing information from other electricity price markets for forecasting. We pre-train a neural network model on source markets and finally do a fine-tuning for the target market. Moreover, we test different ways to use the rich input data from various electricity price markets to forecast 24 steps ahead in hourly frequency. Our experiments on four different day-ahead markets indicate that transfer learning improves the electricity price forecasting performance in a statistically significant manner. Furthermore, we compare our results with state-of-the-art methods in a rolling window scheme to demonstrate the performance of the transfer learning approach. Our method improves the performance of the state-of-the-art algorithms by 7\% for the French market and 3\% for the German market.},
|
||
pages = {100996},
|
||
journaltitle = {Sustainable Energy, Grids and Networks},
|
||
shortjournal = {Sustainable Energy, Grids and Networks},
|
||
author = {Gunduz, Salih and Ugurlu, Umut and Oksuz, Ilkay},
|
||
urldate = {2024-04-17},
|
||
date = {2023-06-01},
|
||
keywords = {Artificial neural networks, Electricity price forecasting, Market integration, Transfer learning},
|
||
file = {ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/BWI5FHS4/S2352467723000048.html:text/html;Submitted Version:/Users/victormylle/Zotero/storage/62FHBWJ8/Gunduz et al. - 2023 - Transfer learning for electricity price forecastin.pdf:application/pdf},
|
||
}
|
||
|
||
@article{lago_forecasting_2018,
|
||
title = {Forecasting spot electricity prices: Deep learning approaches and empirical comparison of traditional algorithms},
|
||
volume = {221},
|
||
issn = {0306-2619},
|
||
url = {https://www.sciencedirect.com/science/article/pii/S030626191830196X},
|
||
doi = {10.1016/j.apenergy.2018.02.069},
|
||
shorttitle = {Forecasting spot electricity prices},
|
||
abstract = {In this paper, a novel modeling framework for forecasting electricity prices is proposed. While many predictive models have been already proposed to perform this task, the area of deep learning algorithms remains yet unexplored. To fill this scientific gap, we propose four different deep learning models for predicting electricity prices and we show how they lead to improvements in predictive accuracy. In addition, we also consider that, despite the large number of proposed methods for predicting electricity prices, an extensive benchmark is still missing. To tackle that, we compare and analyze the accuracy of 27 common approaches for electricity price forecasting. Based on the benchmark results, we show how the proposed deep learning models outperform the state-of-the-art methods and obtain results that are statistically significant. Finally, using the same results, we also show that: (i) machine learning methods yield, in general, a better accuracy than statistical models; (ii) moving average terms do not improve the predictive accuracy; (iii) hybrid models do not outperform their simpler counterparts.},
|
||
pages = {386--405},
|
||
journaltitle = {Applied Energy},
|
||
shortjournal = {Applied Energy},
|
||
author = {Lago, Jesus and De Ridder, Fjo and De Schutter, Bart},
|
||
urldate = {2024-04-17},
|
||
date = {2018-07-01},
|
||
keywords = {Deep learning, Electricity price forecasting, Benchmark study},
|
||
file = {Full Text:/Users/victormylle/Zotero/storage/SZAAF5RK/Lago et al. - 2018 - Forecasting spot electricity prices Deep learning.pdf:application/pdf;ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/5JH9JLSM/S030626191830196X.html:text/html},
|
||
}
|
||
|
||
@article{weron_electricity_2014,
|
||
title = {Electricity price forecasting: A review of the state-of-the-art with a look into the future},
|
||
volume = {30},
|
||
issn = {0169-2070},
|
||
url = {https://www.sciencedirect.com/science/article/pii/S0169207014001083},
|
||
doi = {10.1016/j.ijforecast.2014.08.008},
|
||
shorttitle = {Electricity price forecasting},
|
||
abstract = {A variety of methods and ideas have been tried for electricity price forecasting ({EPF}) over the last 15 years, with varying degrees of success. This review article aims to explain the complexity of available solutions, their strengths and weaknesses, and the opportunities and threats that the forecasting tools offer or that may be encountered. The paper also looks ahead and speculates on the directions {EPF} will or should take in the next decade or so. In particular, it postulates the need for objective comparative {EPF} studies involving (i) the same datasets, (ii) the same robust error evaluation procedures, and (iii) statistical testing of the significance of one model’s outperformance of another.},
|
||
pages = {1030--1081},
|
||
number = {4},
|
||
journaltitle = {International Journal of Forecasting},
|
||
shortjournal = {International Journal of Forecasting},
|
||
author = {Weron, Rafał},
|
||
urldate = {2024-05-02},
|
||
date = {2014-10-01},
|
||
keywords = {Autoregression, Day-ahead market, Electricity price forecasting, Factor model, Forecast combination, Neural network, Probabilistic forecast, Seasonality},
|
||
file = {ScienceDirect Snapshot:/Users/victormylle/Zotero/storage/DDGF263F/S0169207014001083.html:text/html},
|
||
}
|
||
|
||
@article{poggi_electricity_2023,
|
||
title = {Electricity Price Forecasting via Statistical and Deep Learning Approaches: The German Case},
|
||
volume = {3},
|
||
rights = {http://creativecommons.org/licenses/by/3.0/},
|
||
issn = {2673-9909},
|
||
url = {https://www.mdpi.com/2673-9909/3/2/18},
|
||
doi = {10.3390/appliedmath3020018},
|
||
shorttitle = {Electricity Price Forecasting via Statistical and Deep Learning Approaches},
|
||
abstract = {Our research involves analyzing the latest models used for electricity price forecasting, which include both traditional inferential statistical methods and newer deep learning techniques. Through our analysis of historical data and the use of multiple weekday dummies, we have proposed an innovative solution for forecasting electricity spot prices. This solution involves breaking down the spot price series into two components: a seasonal trend component and a stochastic component. By utilizing this approach, we are able to provide highly accurate predictions for all considered time frames.},
|
||
pages = {316--342},
|
||
number = {2},
|
||
journaltitle = {{AppliedMath}},
|
||
author = {Poggi, Aurora and Di Persio, Luca and Ehrhardt, Matthias},
|
||
urldate = {2024-05-02},
|
||
date = {2023-06},
|
||
langid = {english},
|
||
note = {Number: 2
|
||
Publisher: Multidisciplinary Digital Publishing Institute},
|
||
keywords = {autoregressive, deep learning, electricity price forecasting, machine learning, neural network, statistical method, univariate model},
|
||
file = {Full Text PDF:/Users/victormylle/Zotero/storage/3IR29RU3/Poggi et al. - 2023 - Electricity Price Forecasting via Statistical and .pdf:application/pdf},
|
||
}
|