Abstract

Kaldi has become a very popular toolkit for automatic speech recognition, showing considerable improvements through the combination of hidden Markov models (HMM) and deep neural networks (DNN). However, in spite of its great performance for some languages (e.g. English, Italian, Serbian, etc.), the resources for Brazilian Portuguese (BP) are still quite limited. This work describes what appears to be the first attempt to create Kaldi-based scripts and baseline acoustic models for BP using Kaldi tools. Experiments were carried out for dictation tasks and a comparison to CMU Sphinx toolkit in terms of word error rate (WER) was performed. Results seem promising, since Kaldi achieved the absolute lowest WER of 4.75% with HMM-DNN and outperformed CMU Sphinx even when using Gaussian mixture models only.

BibTeX Citation

@inproceedings{Batista18b,
    abstract  = {Kaldi has become a very popular toolkit for automatic speech recognition, showing considerable improvements through the combination of hidden Markov models (HMM) and deep neural networks (DNN). However, in spite of its great performance for some languages (e.g. English, Italian, Serbian, etc.), the resources for Brazilian Portuguese (BP) are still quite limited. This work describes what appears to be the first attempt to create Kaldi-based scripts and baseline acoustic models for BP using Kaldi tools. Experiments were carried out for dictation tasks and a comparison to CMU Sphinx toolkit in terms of word error rate (WER) was performed. Results seem promising, since Kaldi achieved the absolute lowest WER of 4.75% with HMM-DNN and outperformed CMU Sphinx even when using Gaussian mixture models only.},
    author    = {Cassio Batista and Ana Larissa Dias and Nelson {Sampaio Neto}},
    booktitle = {Proc. IberSPEECH 2018},
    doi       = {10.21437/IberSPEECH.2018-17},
    pages     = {77--81},
    title     = {Baseline Acoustic Models for Brazilian Portuguese Using Kaldi Tools},
    url       = {http://dx.doi.org/10.21437/IberSPEECH.2018-17},
    year      = {2018}
}

Abstract

This paper describes a cloud speech recognition service based on Julius decoder running in server mode. The system was set up to recognize speech in Brazilian Portuguese. The support to the language was developed by the authors with FalaBrasil research group tools, which are free and available on the group’s site. Julius uses the FalaBrasil cloud to provide online and distributed speech recognition via Internet. The client side was built on the Android 2.2 platform. The application can record and send audio, detect the end of the user speech and listen to the decoder result. To test the system efficiency, recognition time and accuracy rate were estimated by comparing it to SpeechRecognizer API provided by Google.

BibTeX Citation

@inproceedings{Batista14,
    author    = {Cassio Batista and Thiago Coelho and Bruno Haick and Nelson Neto and Aldebaro Klautau},
    booktitle = {XIX International Scientific Conference for Young Engineers},
    location  = {Cluj-Napoca, Romania},
    title     = {{LaPS CSR}: A Free Distributed Cloud Speech Recognition System},
    url       = {http://hdl.handle.net/10598/28297},
    year      = {2014}
}

Abstract

Este trabalho compara dois sistemas de reconhecimento de fala que podem ser usados no desenvolvimento de aplicativos para Android: Julius em modo servidor e Google. Parte do suporte a Português Brasileiro para o Julius foi desenvolvido pelos autores no contexto do projeto FalaBrasil. O Julius também utilizou o servidor do FalaBrasil para prover reconhecimento distribuído via Internet, de maneira similar ao sistema da empresa Google. São apresentadas comparações entre os mesmos em termos de taxa de acerto (acurácia) e custo computacional.

BibTeX Citation

@inproceedings{Batista13,
    author    = {Cassio Batista and Thiago Coelho and Bruno Haick and Nelson Neto and Aldebaro Klautau},
    booktitle = {XXXI Brazilian Telecommunications Symposium},
    location  = {Fortaleza, Brazil},
    title     = {Desenvolvimento e Compara\c{c}\~{a}o De Reconhecedores De Fala Embarcados e Distribu\'{i}dos Para Android},
    url       = {http://gestao.sbrt.org.br/simposios/artigo/visualizar/a/325},
    doi       = {10.14209/sbrt.2013.219},
    year      = {2013}
}

Abstract

Unavailable :(

BibTeX Citation

@article{rafael12,
    author    = {Rafael Oliveira and Pedro Batista and Nelson Neto and Aldebaro Klautau},
    journal   = {12th International Conference on Computational Processing of the Portuguese Language},
    pages     = {375-380},
    title     = {Baseline Acoustic Models for {B}razilian {P}ortuguese Using {CMU} Sphinx Tools},
    year      = {2012}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Monte11,
    author    = {A. Monte and D. Ribeiro and N. Neto and R. Cruz and A. Klautau},
    journal   = {17th International Congress of Phonetic Sciences},
    pages     = {1418-1421},
    title     = {A Rule-based Syllabification Algorithm with Stress Determination for {B}razilian {P}ortuguese Natural Language Processing},
    year      = {2011}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Neto10,
    author    = {Nelson Neto and Carlos Patrick and Aldebaro Klautau and Isabel Trancoso},
    journal   = {Journal of the Brazilian Computer Society},
    pages     = {53-68},
    title     = {Free tools and resources for {B}razilian {P}ortuguese speech recognition},
    volume    = {17},
    year      = {2011}
}

Abstract

An automatic speech recognition system has modules that depend on the language and, while there are many public resources for some languages (e.g., English and Japanese), the resources for Brazilian Portuguese (BP) are still limited. This work describes the development of resources and free tools for BP speech recognition, consisting of text and audio corpora, phonetic dictionary, grapheme-to-phone converter, language and acoustic models. All of them are publicly available and, together with a proposed application programming interface, have been used for the development of several new applications, including a speech module for the OpenOffice suite. Performance tests are presented, comparing the developed BP system with a commercial software. The paper also describes an application that uses synthesis and speech recognition together with a natural language processing module dedicated to statistical machine translation. This application allows the translation of spoken conversations from BP to English and vice versa. The resources make easier the adoption of BP speech technologies by other academic groups and industry.

BibTeX Citation

@article{Neto11,
    abstract  = {An automatic speech recognition system has modules that depend on the language and, while there are many public resources for some languages (e.g., English and Japanese), the resources for Brazilian Portuguese (BP) are still limited. This work describes the development of resources and free tools for BP speech recognition, consisting of text and audio corpora, phonetic dictionary, grapheme-to-phone converter, language and acoustic models. All of them are publicly available and, together with a proposed application programming interface, have been used for the development of several new applications, including a speech module for the OpenOffice suite. Performance tests are presented, comparing the developed BP system with a commercial software. The paper also describes an application that uses synthesis and speech recognition together with a natural language processing module dedicated to statistical machine translation. This application allows the translation of spoken conversations from BP to English and vice versa. The resources make easier the adoption of BP speech technologies by other academic groups and industry.},
    author    = {Neto, Nelson and Patrick, Carlos and Klautau, Aldebaro and Trancoso, Isabel},
    day       = {01},
    doi       = {10.1007/s13173-010-0023-1},
    issn      = {1678-4804},
    journal   = {Journal of the Brazilian Computer Society},
    month     = {Mar},
    number    = {1},
    pages     = {53--68},
    title     = {Free tools and resources for Brazilian Portuguese speech recognition},
    url       = {https://doi.org/10.1007/s13173-010-0023-1},
    volume    = {17},
    year      = {2011}
}

Abstract

Unavailable :(

BibTeX Citation

@article{rafael11,
    author    = {Rafael Oliveira and Pedro Batista and Nelson Neto and Aldebaro Klautau},
    journal   = {XII Workshop de Software Livre},
    title     = {Recursos para Desenvolvimento de Aplicativos com Suporte a Reconhecimento de Voz para Desktop e Sistemas Embarcados},
    year      = {2011}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Pedro10propor,
    author    = {Pedro Batista and Patrick Silva and Nelson Neto and Aldebaro Klautau},
    journal   = {The International Conference on Computational Processing of Portuguese - Demos Session},
    title     = {A non-Visual Web-Browsing System using Speech Recognition for {B}razilian {P}ortuguese},
    year      = {2010}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Igor10,
    author    = {I. Couto and N. Neto and V. Tadaiesky and A. Klautau and R. Maia},
    journal   = {7th International Telecommunications Symposium},
    title     = {An Open Source {HMM}-based Text-to-Speech System for {B}razilian {P}ortuguese},
    year      = {2010}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Patrick10,
    author    = {Patrick Silva and Pedro Batista and Nelson Neto and Aldebaro Klautau},
    journal   = {Computational Processing of the Portuguese Language},
    pages     = {128-131},
    title     = {An Open-Source Speech Recognizer for {B}razilian {P}ortuguese with a Windows Programming Interface},
    volume    = {6001},
    year      = {2010}
}

Abstract

Unavailable :(

BibTeX Citation

@article{denise10,
    author    = {Denise Alves and Renan Moura and Aldebaro Klautau},
    journal   = {XII Workshop de Software Livre},
    title     = {M\'{o}dulo de Adapta\c{c}\~{a}o de Locutor utilizando Regress\~{a}o Linear de M\'{a}xima Verossimilhan\c{c}a para Sistemas de Reconhecimento de Voz},
    year      = {2010}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Siravenha09,
    author    = {Ana Siravenha and Nelson Neto and Valquiria Macedo and Aldebaro Klautau},
    journal   = {Interactive Computer Aided Blended Learning},
    title     = {A Computer-assisted Learning Software Using Speech Synthesis and Recognition in Brazilian Portuguese},
    year      = {2009}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Patrick09,
    author    = {Patrick Silva and Nelson Neto and Aldebaro Klautau},
    journal   = {In XXVII Simp\'{o}sio Brasileiro de Telecomunica\c{c}\~{o}es},
    title     = {Novos Recursos e Utiliza\c{c}\~{a}o de Adapta\c{c}\~{a}o de Locutor no Desenvolvimento de um Sistema de Reconhecimento de Voz para o {P}ortugu\^{e}s {B}rasileiro},
    year      = {2009}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Trancoso09,
    author    = {Alberto Abad and Isabel Trancoso and Nelson Neto and Maria Ribeiro},
    journal   = {In Interspeech, Brighton, UK},
    title     = {Porting an {E}uropean {P}ortuguese Broadcast News Recognition System to {B}razilian {P}ortuguese},
    year      = {2009}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Morais09,
    author    = {Jefferson Morais and Nelson Neto and Aldebaro Klautau},
    journal   = {7th Brazilian Symposium in Information and Human Language
Technology},
    title     = {Tecnologias para o Desenvolvimento de Sistemas de Di\'{a}logo Falado em
{P}ortugu\^{e}s {B}rasileiro},
    year      = {2009}
}

Abstract

Unavailable :(

BibTeX Citation

@article{NelsonCALL08,
    author    = {Nelson Neto and Carolina Siravenha and V\'{a}lquiria Macedo and Aldebaro Klautau},
    journal   = {International Conference on Computational Processing of the Portuguese Language - Special Session},
    title     = {A Computer-Assisted Learning Software to Help Teaching English to Brazilians},
    year      = {2008}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Patrick08,
    author    = {Patrick Silva and Nelson Neto and Aldebaro Klautau and Andre Adami and Isabel Trancoso},
    journal   = {XXVI Simp\'{o}sio Brasileiro de Telecomunica\c{c}\~{o}es},
    title     = {Speech Recognition for {B}razilian {P}ortuguese using the {S}poltech and {OGI}-22 Corpora},
    year      = {2008}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Nelson08,
    author    = {N. Neto and P. Silva and A. Klautau and A. Adami},
    journal   = {International Conference on Computational Processing of Portuguese Language - PROPOR},
    title     = {Spoltech and {OGI}-22 Baseline Systems for Speech Recognition in {B}razilian {P}ortuguese},
    year      = {2008}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Siravenha08,
    author    = {Ana Siravenha and Nelson Neto and Valqu\'{i}ria Macedo and Aldebaro Klautau},
    journal   = {7th International Information and Telecommunication Technologies Symposium},
    title     = {Uso de Regras Fonol\'{o}gicas com Determina\c{c}\~{a}o de Vogal T\^{o}nica para Convers\~{a}o Grafema-Fone em {P}ortugu\^{e}s {B}rasileiro},
    year      = {2008}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Hosn06a,
    author    = {Chadia Hosn and Luiz Baptista and Tales Imbiriba and Aldebaro Klautau},
    journal   = {In VI International Telecommunications Symposium},
    title     = {New Resources for {B}razilian {P}ortuguese: Results for Grapheme-to-Phoneme and Phone Classification},
    year      = {2006}
}

Abstract

Speech processing is a data-driven technology that relies on public corpora and associated resources. In contrast to languages such as English, there are few resources for Brazilian Portuguese (BP). Consequently, there are no publicly available scripts to design baseline BP systems. This work discusses some efforts towards decreasing this gap and presents results for two speech processing tasks for BP: phone classification and grapheme to phoneme (G2P) conversion. The former task used hidden Markov models to classify phones from the Spoltech and TIMIT corpora. The G2P module adopted machine learning methods such as decision trees and was tested on a new BP pronunciation dictionary and the following languages: British English, American English and French.

BibTeX Citation

@inproceedings{Hosn06,
    author    = {C. {Hosn} and L. A. {Baptista} and T. {Imbiriba} and A. {Klautau}},
    booktitle = {2006 International Telecommunications Symposium},
    doi       = {10.1109/ITS.2006.4433322},
    issn      = {},
    keywords  = {hidden Markov models;learning (artificial intelligence);natural languages;speech processing;Brazilian Portuguese;grapheme-to-phoneme conversion;phone classification;speech processing;data-driven technology;baseline BP system;hidden Markov model;machine learning method;Natural languages;Speech processing;Hidden Markov models;Decision trees;Dictionaries;Speaker recognition;Learning systems;Testing;Speech recognition;Classification tree analysis;Grapheme-to-phoneme;letter-to-sound;decision trees;phone classification;hidden Markov models},
    month     = {Sep.},
    number    = {},
    pages     = {477-482},
    title     = {New resources for Brazilian Portuguese: Results for grapheme-to-phoneme and phone classification},
    volume    = {},
    year      = {2006}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Neto05,
    author    = {N. Neto and E. Sousa and V. Macedo and A. Adami and A. Klautau},
    journal   = {6th F\'{o}rum Internacional Software Livre},
    title     = {Desenvolvimento de Software Livre Usando Reconhecimento e S\'{i}ntese de Voz: O Estado da Arte para o {P}ortugu\^{e}s {B}rasileiro},
    year      = {2005}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Silva05,
    author    = {Enio Silva and Luiz Baptista and Helane Fernandes and Aldebaro Klautau},
    journal   = {XXV Congresso da Sociedade Brasileira de Computa\c{c}\~{a}o},
    title     = {Desenvolvimento de um Sistema de Reconhecimento Autom\'{a}tico de Voz Cont\'{i}nua com Grande Vocabul\'{a}rio para o {P}ortugu\^{e}s {B}rasileiro},
    year      = {2005}
}

Abstract

Unavailable :(

BibTeX Citation

@inproceedings{Imbiriba04,
    author    = {T. Imbiriba and A. Klautau and N. Parihar and S. Raghavan and J. Picone},
    booktitle = {Proceedings of the 2004 IEEE International Workshop on Machine Learning for Signal Processing},
    city      = {Sao Luis},
    country   = {Brazil},
    month     = {September},
    pages     = {371-380},
    title     = {{GMM} And Kernel-Based Speaker Recognition with the {ISIP} Toolkit},
    year      = {2004}
}

Abstract

Unavailable :(

BibTeX Citation

@inproceedings{Silva04,
    author    = {Enio Silva and Marcus Pantoja and Jackline Celid\^{o}nio and Aldebaro Klautau},
    booktitle = {III Workshop em Tecnologia da Informa\c{c}\~{a}o e da Linguagem Humana},
    city      = {Salvador},
    title     = {Modelos de Linguagem N-grama para Reconhecimento de Voz com Grande Vocabul\'{a}rio},
    year      = {2004}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Klautau03-ieee,
    author    = {A. Klautau and N. Jevti\'c and A. Orlitsky},
    journal   = {submitted to IEEE Transactions on Education},
    title     = {A gentle introduction to maximum mutual information estimation},
    year      = {2003}
}

Abstract

Unavailable :(

BibTeX Citation

@inproceedings{Klautau03-rbf,
    author    = {A. Klautau},
    booktitle = {SBT, Rio de Janeiro, Brazil},
    title     = {A new algorithm for training {RBF} networks},
    year      = {2003}
}

Abstract

Unavailable :(

BibTeX Citation

@inproceedings{Klautau03-icassp,
    author    = {A. Klautau},
    booktitle = {ICASSP},
    title     = {Mining speech: {A}utomatic selection of heterogeneous features using boosting},
    year      = {2003}
}

Abstract

Unavailable :(

BibTeX Citation

@article{Klautau03-jmlr,
    author    = {Klautau, A. and Jevti\'c, N. and Orlitsky, A.},
    journal   = {Journal of Machine Learning Research},
    title     = {On nearest-neighbor {ECOC} with application to all-pairs multiclass {SVM}},
    year      = {2003}
}

Abstract

Unavailable :(

BibTeX Citation

@inproceedings{Klautau03-sbt,
    author    = {A. Klautau and N. Jevti\'c and A. Orlitsky},
    booktitle = {SBT, Rio de Janeiro, Brazil},
    title     = {Speech recognition based on discriminative classifiers},
    year      = {2003}
}

Abstract

Unavailable :(

BibTeX Citation

@techreport{Klautau02d,
    author      = {A. Klautau},
    institution = {UFPA, \emph{http://www.laps.ufpa.br/aldebaro/papers}},
    title       = {Classification of {P}eterson and {B}arney's vowels using {W}eka},
    year        = {2002}
}

Abstract

Unavailable :(

BibTeX Citation

@inproceedings{Klautau02,
    author    = {A. Klautau and N. Jevti\'c and A. Orlitsky},
    booktitle = {ICSLP},
    title     = {Combined binary classifiers with applications to speech recognition},
    year      = {2002}
}

Abstract

Unavailable :(

BibTeX Citation

@inproceedings{Jevtic01,
    author    = {N. Jevti\'c and A. Klautau and A. Orlitsky},
    booktitle = {Automatic Speech Recognition and Understanding Workshop},
    title     = {Estimated rank pruning and {J}ava-based speech recognition},
    year      = {2001}
}