Learning transferable representations from unlabeled time series is crucial for improving performance in data-scarce classification. Existing self-supervised methods often operate at the point level and rely on unidirectional encoding, leading to low semantic density and a mismatch between pre-training and downstream optimization. In this paper, we propose TimeMAE, a self-supervised framework that reformulates masked modeling for time series via semantic unit elevation and decoupled representation learning. Instead of modeling individual time steps, TimeMAE segments time series into non-overlapping sub-series to form semantically enriched units, enabling more informative masked reconstruction while reducing computational cost. To address the representation discrepancy introduced by masking, we design a decoupled masked autoencoder that separately encodes visible and masked regions, avoiding artificial masked tokens in the main encoder. To guide pre-training, we introduce two complementary objectives: masked codeword classification, which discretizes sub-series semantics via a learned tokenizer and masked representation regression, which aligns continuous representations through a momentum-updated target encoder. Extensive experiments on five datasets demonstrate that TimeMAE outperforms competitive baselines, particularly in label-scarce scenarios and transfer learning scenarios.
@inproceedings{cheng2026timemae,
author = {Cheng, Mingyue and Tao, Xiaoyu and Liu, Zhiding and
Liu, Qi and Zhang, Hao and Zhang, Rujiao and Chen, Enhong},
title = {{TimeMAE}: Self-Supervised Representations of Time Series
with Decoupled Masked Autoencoders},
booktitle = {Proceedings of the Nineteenth {ACM} International Conference
on Web Search and Data Mining},
series = {{WSDM} '26},
year = {2026},
publisher = {{ACM}},
doi = {10.1145/3773966.3778007},
note = {arXiv:2303.00320}
}