6#ifndef _USE_MATH_DEFINES
7#define _USE_MATH_DEFINES
86 std::cerr <<
"Failed to initialize cURL" << std::endl;
94 std::vector<uint8_t> wavHeader = _createWavHeader(sampleRate, sound.
getSamples());
95 std::vector<uint8_t>
audioData(wavHeader.begin(), wavHeader.end());
97 for (
size_t i = 0; i < sound.
getSamples(); ++i) {
100 audioData.push_back((sample >> 8) & 0xFF);
113 std::string response;
124 std::cout <<
"Transcription response: " << response << std::endl;
131 transcription =
jsonResponse[
"text"].get<std::string>();
133 std::cerr <<
"No 'text' field in the response" << std::endl;
134 return ReturnValue::return_code::return_value_error_generic;
136 }
catch (
const nlohmann::json::parse_error&
e) {
137 std::cerr <<
"JSON parse error: " <<
e.what() << std::endl;
138 return ReturnValue::return_code::return_value_error_generic;
145 return ReturnValue::return_code::return_value_ok;
148std::vector<uint8_t> WhisperDevice::_createWavHeader(
int sampleRate,
int numSamples)
155 std::vector<uint8_t> header(44);
156 std::memcpy(header.data(),
"RIFF", 4);
157 std::memcpy(header.data() + 4, &
chunkSize, 4);
158 std::memcpy(header.data() + 8,
"WAVE", 4);
159 std::memcpy(header.data() + 12,
"fmt ", 4);
162 short numChannels = 1;
165 std::memcpy(header.data() + 22, &numChannels, 2);
166 std::memcpy(header.data() + 24, &sampleRate, 4);
167 std::memcpy(header.data() + 28, &
byteRate, 4);
168 std::memcpy(header.data() + 32, &
blockAlign, 2);
171 std::memcpy(header.data() + 36,
"data", 4);
177size_t WhisperDevice::_writeCallback(
void *
contents,
size_t size,
size_t nmemb, std::string *output) {
#define YARP_METHOD_NOT_YET_IMPLEMENTED()
const yarp::os::LogComponent & WHISPERDEVICE()
std::string m_ENVS_end_point_name
std::string m_ENVS_api_key_name
bool parseParams(const yarp::os::Searchable &config) override
Parse the DeviceDriver parameters.
std::string m_ENVS_api_version_name
std::string m_ENVS_deployment_id_name
yarp::dev::ReturnValue getLanguage(std::string &language) override
Gets the current language set for speech transcription.
yarp::dev::ReturnValue transcribe(const yarp::sig::Sound &sound, std::string &transcription, double &score) override
Performs the speech transcription.
yarp::dev::ReturnValue setLanguage(const std::string &language="auto") override
Sets the language for speech transcription.
bool close() override
Close the DeviceDriver.
bool open(yarp::os::Searchable &config) override
Open the DeviceDriver.
@ return_value_error_generic
Method was successfully executed.
A mini-server for performing network communication in the background.
A base class for nested structures that can be searched.
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
int getFrequency() const
Get the frequency of the sound (i.e.
audio_sample get(size_t sample, size_t channel=0) const
size_t getSamples() const
Get the number of samples contained in the sound.
#define yCInfo(component,...)
#define yCError(component,...)
#define yCWarning(component,...)
#define YARP_LOG_COMPONENT(name,...)
For streams capable of holding different kinds of content, check what they actually have.
An interface to the operating system, including Port based communication.