24 #if defined (YARP_HAS_FFMPEG)
27 #include <libavutil/opt.h>
28 #include <libavcodec/avcodec.h>
29 #include <libavcodec/version.h>
30 #include <libavutil/channel_layout.h>
31 #include <libavutil/common.h>
32 #include <libavutil/imgutils.h>
33 #include <libavutil/mathematics.h>
34 #include <libavutil/samplefmt.h>
48 #if defined (YARP_HAS_FFMPEG)
49 #define AUDIO_INBUF_SIZE 20480
50 #define AUDIO_REFILL_THRESH 4096
54 #if defined (YARP_HAS_FFMPEG)
55 bool decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame,
Sound& sound_data)
60 ret = avcodec_send_packet(dec_ctx, pkt);
63 yCError(SOUNDFILE_MP3,
"Error submitting the packet to the decoder");
69 ret = avcodec_receive_frame(dec_ctx, frame);
70 if (
ret == AVERROR(EAGAIN) ||
ret == AVERROR_EOF)
76 yCError(SOUNDFILE_MP3,
"Error during decoding");
80 data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
84 yCError(SOUNDFILE_MP3,
"Failed to calculate data size\n");
89 frame_sound.
resize(frame->nb_samples, dec_ctx->channels);
92 for (i = 0; i < frame->nb_samples; i++)
94 for (ch = 0; ch < dec_ctx->channels; ch++)
96 short int val = *((
short int*)frame->data[ch] + i);
97 frame_sound.
set(val,i,ch);
100 sound_data += frame_sound;
105 int check_sample_fmt(
const AVCodec * codec,
enum AVSampleFormat sample_fmt)
107 const enum AVSampleFormat* p = codec->sample_fmts;
109 while (*p != AV_SAMPLE_FMT_NONE)
111 if (*p == sample_fmt) {
119 int select_sample_rate(
const AVCodec * codec)
122 int best_samplerate = 0;
124 if (!codec->supported_samplerates) {
128 p = codec->supported_samplerates;
131 if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) {
132 best_samplerate = *p;
136 return best_samplerate;
139 bool encode(AVCodecContext* ctx, AVFrame* frame, AVPacket* pkt, std::fstream& os)
144 ret = avcodec_send_frame(ctx, frame);
147 yCError(SOUNDFILE_MP3,
"Error sending the frame to the encoder\n");
155 ret = avcodec_receive_packet(ctx, pkt);
156 if (
ret == AVERROR(EAGAIN) ||
ret == AVERROR_EOF)
162 yCError(SOUNDFILE_MP3,
"Error encoding audio frame\n");
165 os.write((
const char*)(pkt->data), pkt->size);
166 av_packet_unref(pkt);
172 int select_channel_layout(
const AVCodec * codec)
175 uint64_t best_ch_layout = 0;
176 int best_nb_channels = 0;
178 if (!codec->channel_layouts) {
179 return AV_CH_LAYOUT_STEREO;
182 p = codec->channel_layouts;
185 int nb_channels = av_get_channel_layout_nb_channels(*p);
187 if (nb_channels > best_nb_channels)
190 best_nb_channels = nb_channels;
194 return best_ch_layout;
201 #if !defined (YARP_HAS_FFMPEG)
203 yCError(SOUNDFILE_MP3) <<
"write_mp3_file() not supported: lib ffmpeg not found";
206 const AVCodec * codec =
nullptr;
207 AVCodecContext * c =
nullptr;
208 AVFrame * frame =
nullptr;
209 AVPacket * pkt =
nullptr;
214 #if LIBAVCODEC_VERSION_MAJOR < 58
216 avcodec_register_all();
220 codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
223 yCError(SOUNDFILE_MP3,
"Codec not found");
227 c = avcodec_alloc_context3(codec);
230 yCError(SOUNDFILE_MP3,
"Could not allocate audio codec context");
235 c->bit_rate = bitrate;
238 c->sample_fmt = AV_SAMPLE_FMT_S16;
239 if (!check_sample_fmt(codec, c->sample_fmt))
241 yCError(SOUNDFILE_MP3,
"Encoder does not support sample format %s",
242 av_get_sample_fmt_name(c->sample_fmt));
247 c->sample_rate = select_sample_rate(codec);
248 c->channel_layout = select_channel_layout(codec);
249 c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
252 if (avcodec_open2(c, codec, NULL) < 0)
254 yCError(SOUNDFILE_MP3,
"Could not open codec");
258 fos.open(filename, std::fstream::out | std::fstream::binary);
259 if (fos.is_open()==
false)
261 yCError(SOUNDFILE_MP3,
"Cannot open %s for writing", filename);
266 pkt = av_packet_alloc();
269 yCError(SOUNDFILE_MP3,
"could not allocate the packet");
275 frame = av_frame_alloc();
278 yCError(SOUNDFILE_MP3,
"Could not allocate audio frame");
283 frame->nb_samples = c->frame_size;
284 frame->format = c->sample_fmt;
285 frame->channel_layout = c->channel_layout;
288 ret = av_frame_get_buffer(frame, 0);
291 yCError(SOUNDFILE_MP3,
"Could not allocate audio data buffers");
298 size_t nframes = soundsize / c->frame_size;
299 size_t rem_lastframe = soundsize % c->frame_size;
301 for (
size_t i = 0; i < nframes; i++)
303 ret = av_frame_make_writable(frame);
308 samples = (uint16_t*)frame->data[0];
309 for (
int j = 0; j < c->frame_size; j++)
311 for (
int k = 0; k < c->channels; k++) {
312 samples[j * c->channels + k] = sound_data.
get(j + i * c->frame_size, k);
315 if (
encode(c, frame, pkt, fos) ==
false)
317 yCError(SOUNDFILE_MP3,
"Encode failed, memory could be corrupted, should I exit?");
322 if (
encode(c, NULL, pkt, fos) ==
false)
324 yCError(SOUNDFILE_MP3,
"Encode failed, memory could be corrupted, should I exit?");
329 av_frame_free(&frame);
330 av_packet_free(&pkt);
331 avcodec_free_context(&c);
339 #if !defined (YARP_HAS_FFMPEG)
340 yCError(SOUNDFILE_MP3) <<
"read_mp3_istream() not supported: lib ffmpeg not found";
343 const AVCodec* codec =
nullptr;
344 AVCodecContext* c =
nullptr;
345 AVCodecParserContext* parser =
nullptr;
347 uint8_t inbuf[AUDIO_INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
348 uint8_t* data =
nullptr;
350 AVPacket* pkt =
nullptr;
351 AVFrame* decoded_frame =
nullptr;
353 pkt = av_packet_alloc();
355 #if LIBAVCODEC_VERSION_MAJOR < 58
357 avcodec_register_all();
361 codec = avcodec_find_decoder(AV_CODEC_ID_MP2);
364 yCError(SOUNDFILE_MP3,
"Codec not found");
367 parser = av_parser_init(codec->id);
370 yCError(SOUNDFILE_MP3,
"Parser not found");
373 c = avcodec_alloc_context3(codec);
376 yCError(SOUNDFILE_MP3,
"Could not allocate audio codec context");
380 if (avcodec_open2(c, codec, NULL) < 0)
382 yCError(SOUNDFILE_MP3,
"Could not open codec");
388 istream.read((
char*)(inbuf), AUDIO_INBUF_SIZE);
389 data_size = istream.gcount();
392 yCError(SOUNDFILE_MP3,
"Cannot process invalid (empty) stream");
395 while (data_size > 0)
399 if (!(decoded_frame = av_frame_alloc()))
401 yCError(SOUNDFILE_MP3,
"Could not allocate audio frame");
405 ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
408 yCError(SOUNDFILE_MP3,
"Error while parsing");
414 decode(c, pkt, decoded_frame, sound_data);
416 if (data_size < AUDIO_REFILL_THRESH)
418 memmove(inbuf, data, data_size);
420 istream.read((
char*)(data + data_size), AUDIO_INBUF_SIZE - data_size);
421 len = istream.gcount();
430 decode(c, pkt, decoded_frame, sound_data);
436 avcodec_free_context(&c);
437 av_parser_close(parser);
438 av_frame_free(&decoded_frame);
439 av_packet_free(&pkt);
447 fis.open(filename, std::fstream::in | std::fstream::binary);
448 if (fis.is_open() ==
false)
450 yCError(SOUNDFILE_MP3,
"Cannot open %s for reading", filename);
461 std::istringstream iss(std::string(bytestream, streamsize));
bool read_mp3_istream(Sound &sound_data, std::istream &istream)
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
void setFrequency(int freq)
Set the frequency of the sound (i.e.
size_t getChannels() const
Get the number of channels of the sound.
void resize(size_t samples, size_t channels=1)
Set the sound size.
audio_sample get(size_t sample, size_t channel=0) const
void set(audio_sample value, size_t sample, size_t channel=0)
size_t getSamples() const
Get the number of samples contained in the sound.
#define yCError(component,...)
#define YARP_LOG_COMPONENT(name,...)
NetInt32 encode(const std::string &str)
Convert a string into a vocabulary identifier.
std::string decode(NetInt32 code)
Convert a vocabulary identifier into a string.
An interface to the operating system, including Port based communication.
bool write_mp3_file(const Sound &data, const char *filename, size_t bitrate=64000)
Write a sound to a mp3 file.
bool read_mp3_file(Sound &data, const char *filename)
Read a sound from a .mp3 audio file.
bool read_mp3_bytestream(Sound &data, const char *bytestream, size_t streamsize)
Read a sound from a byte array.