YARP
Yet Another Robot Platform
 
Loading...
Searching...
No Matches
GoogleSpeechTranscription.cpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2023 Istituto Italiano di Tecnologia (IIT)
3 * SPDX-License-Identifier: BSD-3-Clause
4 */
5
6#ifndef _USE_MATH_DEFINES
7#define _USE_MATH_DEFINES
8#endif
9
11
12#include <yarp/sig/SoundFile.h>
13
15#include <yarp/os/LogStream.h>
16#include <fstream>
17
18#include <cmath>
19
20using namespace yarp::os;
21using namespace yarp::dev;
22
23
25
26
31
33{
34 if(config.check("__offline"))
35 {
36 m_offline = config.find("__offline").asInt32() == 1;
37 }
38 if(!parseParams(config))
39 {
40 yCError(GOOGLESPEECHTRANSCR) << "Unable to correctly parse device params. Check previous errors for more info";
41 return false;
42 }
43 m_audioConfig.set_language_code(m_language_code);
44 m_audioConfig.set_encoding(google::cloud::speech::v1::RecognitionConfig::LINEAR16);
45 m_audioConfig.set_sample_rate_hertz(m_sample_rate_hertz);
46 m_client = std::make_shared<google::cloud::speech_v1::SpeechClient>(google::cloud::speech_v1::MakeSpeechConnection());
47
48 return true;
49}
50
52{
53 return true;
54}
55
57{
58 if(language == "auto")
59 {
60 yCError(GOOGLESPEECHTRANSCR) << "The \"auto\" option is not supported by this device";
61
63 }
64
65 m_audioConfig.set_language_code(language);
67}
68
70{
71 language = m_audioConfig.language_code();
72
74}
75
76yarp::dev::ReturnValue GoogleSpeechTranscription::transcribe(const yarp::sig::Sound& sound, std::string& transcription, double& score)
77{
78 transcription="";
79 score = 0.0;
80
81 if (sound.getSamples() == 0 ||
82 sound.getChannels() == 0)
83 {
84 yCError(GOOGLESPEECHTRANSCR) << "Invalid Sound sample received";
86 }
87
88 google::cloud::speech::v1::RecognitionAudio audio;
90 auto rawData = std::vector<short>(rawData_tmp.begin(), rawData_tmp.end());
91 audio.set_content((char*)rawData.data(),rawData.size()*2);
92
93 auto response = m_client->Recognize(m_audioConfig,audio);
94
95 if(!response)
96 {
97 yCError(GOOGLESPEECHTRANSCR) << "Could not perform audio transcription:" << response.status().message();
99 }
100
101 yCDebug(GOOGLESPEECHTRANSCR) << "Results size:" << response->results_size();
102 for(int i=0; i<response->results_size(); i++)
103 {
104 auto result = response->results(i);
105 yCDebug(GOOGLESPEECHTRANSCR) << i << "Alternative size:" << result.alternatives_size();
106 for(int j=0; j<result.alternatives_size(); j++)
107 {
108 auto alternative = result.alternatives(j);
109 float tempConf = alternative.confidence();
110 yCDebug(GOOGLESPEECHTRANSCR) << "Alternative:" << alternative.SerializeAsString() << "Confidence:" << tempConf;
111 if(tempConf > score)
112 {
113 score = tempConf;
114 transcription = alternative.transcript();
115 }
116 }
117 }
118 yCDebug(GOOGLESPEECHTRANSCR) << "Transcription:" << transcription << "Score:" << score;
120}
const yarp::os::LogComponent & GOOGLESPEECHTRANSCR()
bool parseParams(const yarp::os::Searchable &config) override
Parse the DeviceDriver parameters.
yarp::dev::ReturnValue setLanguage(const std::string &language="auto") override
Sets the language for speech transcription.
yarp::dev::ReturnValue getLanguage(std::string &language) override
Gets the current language set for speech transcription.
bool close() override
Close the DeviceDriver.
yarp::dev::ReturnValue transcribe(const yarp::sig::Sound &sound, std::string &transcription, double &score) override
Performs the speech transcription.
bool open(yarp::os::Searchable &config) override
Open the DeviceDriver.
@ return_value_error_generic
Method was successfully executed.
A mini-server for performing network communication in the background.
@ TraceType
Definition Log.h:92
A base class for nested structures that can be searched.
Definition Searchable.h:31
virtual bool check(const std::string &key) const =0
Check if there exists a property of the given name.
virtual Value & find(const std::string &key) const =0
Gets a value corresponding to a given keyword.
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
Definition Sound.h:25
std::vector< std::reference_wrapper< audio_sample > > getNonInterleavedAudioRawData() const
Returns a serialized version of the sound, in non-interleaved format, e.g.
Definition Sound.cpp:556
size_t getChannels() const
Get the number of channels of the sound.
Definition Sound.cpp:603
size_t getSamples() const
Get the number of samples contained in the sound.
Definition Sound.cpp:598
#define yCError(component,...)
#define yCDebug(component,...)
#define YARP_LOG_COMPONENT(name,...)
For streams capable of holding different kinds of content, check what they actually have.
An interface to the operating system, including Port based communication.