@inproceedings{ab366d937c3742e2bb5d645ae7790a4a,
title = "A Factorial Deep Markov Model for Unsupervised Disentangled Representation Learning from Speech",
abstract = "We present the Factorial Deep Markov Model (FDMM) for representation learning of speech. The FDMM learns disentangled, interpretable and lower dimensional latent representations from speech without supervision. We use a static and dynamic latent variable to exploit the fact that information in a speech signal evolves at different time scales. Latent representations learned by the FDMM outperform a baseline i-vector system on speaker verification and dialect identification while also reducing the error rate of a phone recognition system in a domain mismatch scenario.",
keywords = "Disentangled Representation Learning, Factorial Deep Markov Model, Variational Inference",
author = "Sameer Khurana and Joty, \{Shafiq Rayhan\} and Ahmed Ali and James Glass",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 44th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2019 ; Conference date: 12-05-2019 Through 17-05-2019",
year = "2019",
month = may,
doi = "10.1109/ICASSP.2019.8683131",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6540--6544",
booktitle = "2019 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2019 - Proceedings",
address = "United States",
}