6#ifndef _USE_MATH_DEFINES
7#define _USE_MATH_DEFINES
95 std::vector<uint8_t> wavHeader = _createWavHeader(sampleRate, sound.
getSamples());
96 std::vector<uint8_t>
audioData(wavHeader.begin(), wavHeader.end());
98 for (
size_t i = 0; i < sound.
getSamples(); ++i) {
101 audioData.push_back((sample >> 8) & 0xFF);
118 std::string response;
136 transcription =
jsonResponse[
"text"].get<std::string>();
139 return ReturnValue::return_code::return_value_error_generic;
145 yCWarning(
WHISPERDEVICE) <<
"No 'avg_logprob' or 'no_speech_prob' field in the first segment, setting score to 0.0";
155 }
catch (
const nlohmann::json::parse_error&
e) {
157 return ReturnValue::return_code::return_value_error_generic;
164 return ReturnValue::return_code::return_value_ok;
167std::vector<uint8_t> WhisperDevice::_createWavHeader(
int sampleRate,
int numSamples)
174 std::vector<uint8_t> header(44);
175 std::memcpy(header.data(),
"RIFF", 4);
176 std::memcpy(header.data() + 4, &
chunkSize, 4);
177 std::memcpy(header.data() + 8,
"WAVE", 4);
178 std::memcpy(header.data() + 12,
"fmt ", 4);
181 short numChannels = 1;
184 std::memcpy(header.data() + 22, &numChannels, 2);
185 std::memcpy(header.data() + 24, &sampleRate, 4);
186 std::memcpy(header.data() + 28, &
byteRate, 4);
187 std::memcpy(header.data() + 32, &
blockAlign, 2);
190 std::memcpy(header.data() + 36,
"data", 4);
196size_t WhisperDevice::_writeCallback(
void *
contents,
size_t size,
size_t nmemb, std::string *output) {
#define YARP_METHOD_NOT_YET_IMPLEMENTED()
const yarp::os::LogComponent & WHISPERDEVICE()
std::string m_ENVS_end_point_name
std::string m_ENVS_api_key_name
bool parseParams(const yarp::os::Searchable &config) override
Parse the DeviceDriver parameters.
std::string m_ENVS_api_version_name
std::string m_ENVS_deployment_id_name
yarp::dev::ReturnValue getLanguage(std::string &language) override
Gets the current language set for speech transcription.
yarp::dev::ReturnValue transcribe(const yarp::sig::Sound &sound, std::string &transcription, double &score) override
Performs the speech transcription.
yarp::dev::ReturnValue setLanguage(const std::string &language="auto") override
Sets the language for speech transcription.
bool close() override
Close the DeviceDriver.
bool open(yarp::os::Searchable &config) override
Open the DeviceDriver.
@ return_value_error_generic
Method was successfully executed.
A mini-server for performing network communication in the background.
A base class for nested structures that can be searched.
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
int getFrequency() const
Get the frequency of the sound (i.e.
audio_sample get(size_t sample, size_t channel=0) const
size_t getSamples() const
Get the number of samples contained in the sound.
#define yCInfo(component,...)
#define yCError(component,...)
#define yCWarning(component,...)
#define yCDebug(component,...)
#define YARP_LOG_COMPONENT(name,...)
For streams capable of holding different kinds of content, check what they actually have.
An interface to the operating system, including Port based communication.