YARP
Yet Another Robot Platform
 
Loading...
Searching...
No Matches
SoundFileMp3.cpp
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: 2006-2021 Istituto Italiano di Tecnologia (IIT)
3 * SPDX-FileCopyrightText: 2006-2010 RobotCub Consortium
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
8
9#include <yarp/conf/system.h>
10
11#include <yarp/os/NetInt16.h>
12#include <yarp/os/NetInt32.h>
14#include <yarp/os/Vocab.h>
15
16#include <yarp/sig/Sound.h>
17#include <yarp/os/Log.h>
18#include <yarp/os/LogStream.h>
19
20#include <cstdio>
21#include <cstring>
22#include <fstream>
23
24#if defined (YARP_HAS_FFMPEG)
25extern "C"
26{
27 #include <libavutil/opt.h>
28 #include <libavcodec/avcodec.h>
29 #include <libavcodec/version.h>
30 #include <libavutil/channel_layout.h>
31 #include <libavutil/common.h>
32 #include <libavutil/imgutils.h>
33 #include <libavutil/mathematics.h>
34 #include <libavutil/samplefmt.h>
35}
36#endif
37
38using namespace yarp::os;
39using namespace yarp::sig;
40using namespace yarp::sig::file;
41
42namespace
43{
44 YARP_LOG_COMPONENT(SOUNDFILE_MP3, "yarp.sig.SoundFileMp3")
45}
46
47//#######################################################################################################
48#if defined (YARP_HAS_FFMPEG)
49#define AUDIO_INBUF_SIZE 20480
50#define AUDIO_REFILL_THRESH 4096
51#endif
52
53//#######################################################################################################
54#if defined (YARP_HAS_FFMPEG)
55bool decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame, Sound& sound_data)
56{
57 int i, ch;
58 int ret, data_size;
59 /* send the packet with the compressed data to the decoder */
61 if (ret < 0)
62 {
63 yCError(SOUNDFILE_MP3, "Error submitting the packet to the decoder");
64 return false;
65 }
66 /* read all the output frames (in general there may be any number of them */
67 while (ret >= 0)
68 {
70 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
71 {
72 return false;
73 }
74 else if (ret < 0)
75 {
76 yCError(SOUNDFILE_MP3, "Error during decoding");
77 return false;
78 }
79 //this seems to be 2: S16P
81 if (data_size < 0)
82 {
83 /* This should not occur, checking just for paranoia */
84 yCError(SOUNDFILE_MP3, "Failed to calculate data size\n");
85 return false;
86 }
87
89#if LIBAVCODEC_VERSION_MAJOR >= 61
90 int num_channels = dec_ctx->ch_layout.nb_channels;
91#else
92 int num_channels = dec_ctx->channels;
93#endif
94 frame_sound.resize(frame->nb_samples, num_channels);
95 if (sound_data.getChannels()==0) { sound_data.resize(0, num_channels);}
96
97 for (i = 0; i < frame->nb_samples; i++) //1152
98 {
99 for (ch = 0; ch < num_channels; ch++) //2
100 {
101 short int val = *((short int*)frame->data[ch] + i);
102 frame_sound.set(val,i,ch);
103 }
104 }
105 sound_data += frame_sound;
106 }
107 return true;
108}
109
110int check_sample_fmt(const AVCodec * codec, enum AVSampleFormat sample_fmt)
111{
112 const enum AVSampleFormat* p = codec->sample_fmts;
113
114 while (*p != AV_SAMPLE_FMT_NONE)
115 {
116 if (*p == sample_fmt) {
117 return 1;
118 }
119 p++;
120 }
121 return 0;
122}
123
124int select_sample_rate(const AVCodec * codec)
125{
126 const int* p;
127 int best_samplerate = 0;
128
129 if (!codec->supported_samplerates) {
130 return 44100;
131 }
132
133 p = codec->supported_samplerates;
134 while (*p)
135 {
136 if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) {
137 best_samplerate = *p;
138 }
139 p++;
140 }
141 return best_samplerate;
142}
143
144bool encode(AVCodecContext* ctx, AVFrame* frame, AVPacket* pkt, std::fstream& os)
145{
146 int ret;
147
148 // send the frame for encoding
149 ret = avcodec_send_frame(ctx, frame);
150 if (ret < 0)
151 {
152 yCError(SOUNDFILE_MP3, "Error sending the frame to the encoder\n");
153 return false;
154 }
155
156 // read all the available output packets (in general there may be any
157 // number of them
158 while (ret >= 0)
159 {
161 if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
162 {
163 return true;
164 }
165 else if (ret < 0)
166 {
167 yCError(SOUNDFILE_MP3, "Error encoding audio frame\n");
168 return false;
169 }
170 os.write((const char*)(pkt->data), pkt->size);
172 }
173 return true;
174}
175
176#if LIBAVCODEC_VERSION_MAJOR >= 61
177// Taken from https://github.com/FFmpeg/FFmpeg/blob/f5ef91e02080316f50d606f5b0b03333bb627ed7/doc/examples/encode_audio.c#L72C1-L92C2
178/* select layout with the highest channel count */
179static int select_channel_layout(const AVCodec *codec, AVChannelLayout *dst)
180{
181 const AVChannelLayout *p, *best_ch_layout=nullptr;
182 int best_nb_channels = 0;
183
184 if (!codec->ch_layouts)
185 {
188 }
189
190 p = codec->ch_layouts;
191 while (p->nb_channels)
192 {
193 int nb_channels = p->nb_channels;
194
196 {
197 best_ch_layout = p;
199 }
200 p++;
201 }
202
203 if (!best_ch_layout)
204 {
205 return -1;
206 }
207
209}
210#else
211// Taken from https://github.com/FFmpeg/FFmpeg/blob/50e9e11316064ecdee889b18a0b6681a248edcf4/doc/examples/encode_audio.c#L72C1-L93C2
212/* select layout with the highest channel count */
213int select_channel_layout(const AVCodec * codec)
214{
215 const uint64_t * p;
217 int best_nb_channels = 0;
218
219 if (!codec->channel_layouts) {
220 return AV_CH_LAYOUT_STEREO;
221 }
222
223 p = codec->channel_layouts;
224 while (*p)
225 {
227
229 {
230 best_ch_layout = *p;
232 }
233 p++;
234 }
235 return best_ch_layout;
236}
237#endif /* LIBAVCODEC_VERSION_MAJOR >= 61 */
238#endif /* defined (YARP_HAS_FFMPEG) */
239
240//#######################################################################################################
241bool yarp::sig::file::write_mp3_file(const Sound& sound_data, const char* filename, size_t bitrate)
242{
243#if !defined (YARP_HAS_FFMPEG)
244
245 yCError(SOUNDFILE_MP3) << "write_mp3_file() not supported: lib ffmpeg not found";
246 return false;
247#else
248 const AVCodec * codec = nullptr;
249 AVCodecContext * c = nullptr;
250 AVFrame * frame = nullptr;
251 AVPacket * pkt = nullptr;
252 int ret;
253 std::fstream fos;
254 uint16_t * samples = nullptr;
255
256#if LIBAVCODEC_VERSION_MAJOR < 58
257 //register all the codecs, deprecated and useless in libffmpeg4.0
259#endif
260
261 // find the MP3 encoder
263 if (!codec)
264 {
265 yCError(SOUNDFILE_MP3, "Codec not found");
266 return false;
267 }
268
269 c = avcodec_alloc_context3(codec);
270 if (!c)
271 {
272 yCError(SOUNDFILE_MP3, "Could not allocate audio codec context");
273 return false;
274 }
275
276 // the compressed output bitrate
277 c->bit_rate = bitrate;
278
279 // check that the encoder supports s16 pcm input
280 c->sample_fmt = AV_SAMPLE_FMT_S16;
281 if (!check_sample_fmt(codec, c->sample_fmt))
282 {
283 yCError(SOUNDFILE_MP3, "Encoder does not support sample format %s",
284 av_get_sample_fmt_name(c->sample_fmt));
285 return false;
286 }
287
288 // select other audio parameters supported by the encoder
289 c->sample_rate = select_sample_rate(codec);
290#if LIBAVCODEC_VERSION_MAJOR >= 61
291 // from https://github.com/FFmpeg/FFmpeg/commit/f5ef91e02080316f50d606f5b0b03333bb627ed7#diff-85abeaf18e8c74a972fa1f5ab3c2fdfa7ddc818f9048196c6bd5f63a837b076aL167
292 ret = select_channel_layout(codec, &c->ch_layout);
293 if (ret < 0)
294 {
295 yCError(SOUNDFILE_MP3, "Could not select_channel_layout");
296 return false;
297 }
298#else
299 c->channel_layout = select_channel_layout(codec);
300 c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
301#endif
302
303 // open it
304 if (avcodec_open2(c, codec, NULL) < 0)
305 {
306 yCError(SOUNDFILE_MP3, "Could not open codec");
307 return false;
308 }
309
310 fos.open(filename, std::fstream::out | std::fstream::binary);
311 if (fos.is_open()==false)
312 {
313 yCError(SOUNDFILE_MP3, "Cannot open %s for writing", filename);
314 return false;
315 }
316
317 // packet for holding encoded output
319 if (!pkt)
320 {
321 yCError(SOUNDFILE_MP3, "could not allocate the packet");
322 fos.close();
323 return false;
324 }
325
326 // frame containing input raw audio
327 frame = av_frame_alloc();
328 if (!frame)
329 {
330 yCError(SOUNDFILE_MP3, "Could not allocate audio frame");
331 fos.close();
332 return false;
333 }
334
335 frame->nb_samples = c->frame_size;
336 frame->format = c->sample_fmt;
337
338#if LIBAVCODEC_VERSION_MAJOR >= 61
339 // See https://github.com/FFmpeg/FFmpeg/commit/f5ef91e02080316f50d606f5b0b03333bb627ed7#diff-85abeaf18e8c74a972fa1f5ab3c2fdfa7ddc818f9048196c6bd5f63a837b076aL198
340 ret = av_channel_layout_copy(&frame->ch_layout, &c->ch_layout);
341 if (ret < 0)
342 {
343 yCError(SOUNDFILE_MP3, "Could not copy channel layout");
344 fos.close();
345 return false;
346 }
347#else
348 frame->channel_layout = c->channel_layout;
349#endif
350
351 // allocate the data buffers
352 ret = av_frame_get_buffer(frame, 0);
353 if (ret < 0)
354 {
355 yCError(SOUNDFILE_MP3, "Could not allocate audio data buffers");
356 fos.close();
357 return false;
358 }
359
360 // encode
361 size_t soundsize = sound_data.getSamples();
362 size_t nframes = soundsize / c->frame_size;
363 size_t rem_lastframe = soundsize % c->frame_size;
365 for (size_t i = 0; i < nframes; i++)
366 {
368 if (ret < 0) {
369 exit(1);
370 }
371
372#if LIBAVCODEC_VERSION_MAJOR >= 61
373 // See https://github.com/FFmpeg/FFmpeg/commit/f5ef91e02080316f50d606f5b0b03333bb627ed7#diff-85abeaf18e8c74a972fa1f5ab3c2fdfa7ddc818f9048196c6bd5f63a837b076aL221
374 int ch_layout_nb_channels = c->ch_layout.nb_channels;
375#else
376 int ch_layout_nb_channels = c->channels;
377#endif
378
379
380 samples = (uint16_t*)frame->data[0];
381 for (int j = 0; j < c->frame_size; j++)
382 {
383 for (int k = 0; k < ch_layout_nb_channels; k++) {
384 samples[j * ch_layout_nb_channels + k] = sound_data.get(j + i * c->frame_size, k);
385 }
386 }
387 if (encode(c, frame, pkt, fos) == false)
388 {
389 yCError(SOUNDFILE_MP3, "Encode failed, memory could be corrupted, should I exit?");
390 }
391 }
392
393 // flush the encoder
394 if (encode(c, NULL, pkt, fos) == false)
395 {
396 yCError(SOUNDFILE_MP3, "Encode failed, memory could be corrupted, should I exit?");
397 }
398
399 fos.close();
400
401 av_frame_free(&frame);
404
405 return true;
406#endif
407}
408
409bool read_mp3_istream(Sound& sound_data, std::istream& istream)
410{
411#if !defined (YARP_HAS_FFMPEG)
412 yCError(SOUNDFILE_MP3) << "read_mp3_istream() not supported: lib ffmpeg not found";
413 return false;
414#else
415 const AVCodec* codec = nullptr;
416 AVCodecContext* c = nullptr;
417 AVCodecParserContext* parser = nullptr;
418 int len, ret;
420 uint8_t* data = nullptr;
421 size_t data_size;
422 AVPacket* pkt = nullptr;
423 AVFrame* decoded_frame = nullptr;
424
426
427#if LIBAVCODEC_VERSION_MAJOR < 58
428 //register all the codecs, deprecated and useless in libffmpeg4.0
430#endif
431
432 // find the MPEG audio decoder
434 if (!codec)
435 {
436 yCError(SOUNDFILE_MP3, "Codec not found");
437 return false;
438 }
439 parser = av_parser_init(codec->id);
440 if (!parser)
441 {
442 yCError(SOUNDFILE_MP3, "Parser not found");
443 return false;
444 }
445 c = avcodec_alloc_context3(codec);
446 if (!c)
447 {
448 yCError(SOUNDFILE_MP3, "Could not allocate audio codec context");
449 return false;
450 }
451 //open the codec
452 if (avcodec_open2(c, codec, NULL) < 0)
453 {
454 yCError(SOUNDFILE_MP3, "Could not open codec");
455 return false;
456 }
457
458 // decode until eof
459 data = inbuf;
460 istream.read((char*)(inbuf), AUDIO_INBUF_SIZE);
461 data_size = istream.gcount();
462 if (data_size == 0)
463 {
464 yCError(SOUNDFILE_MP3, "Cannot process invalid (empty) stream");
465 return false;
466 }
467 while (data_size > 0)
468 {
469 if (!decoded_frame)
470 {
471 if (!(decoded_frame = av_frame_alloc()))
472 {
473 yCError(SOUNDFILE_MP3, "Could not allocate audio frame");
474 return false;
475 }
476 }
477 ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
478 if (ret < 0)
479 {
480 yCError(SOUNDFILE_MP3, "Error while parsing");
481 return false;
482 }
483 data += ret;
484 data_size -= ret;
485 if (pkt->size) {
486 decode(c, pkt, decoded_frame, sound_data);
487 }
489 {
490 memmove(inbuf, data, data_size);
491 data = inbuf;
492 istream.read((char*)(data + data_size), AUDIO_INBUF_SIZE - data_size);
493 len = istream.gcount();
494 if (len > 0) {
495 data_size += len;
496 }
497 }
498 }
499 // flush the decoder
500 pkt->data = NULL;
501 pkt->size = 0;
502 decode(c, pkt, decoded_frame, sound_data);
503
504 //set the sample rate (is it ok? maybe some codecs allow variable sample rate?)
505 sound_data.setFrequency(c->sample_rate);
506
507 //cleanup
509 av_parser_close(parser);
512 return true;
513#endif
514}
515
516bool yarp::sig::file::read_mp3_file(Sound& sound_data, const char* filename)
517{
518 std::fstream fis;
519 fis.open(filename, std::fstream::in | std::fstream::binary);
520 if (fis.is_open() == false)
521 {
522 yCError(SOUNDFILE_MP3, "Cannot open %s for reading", filename);
523 return false;
524 }
525
526 bool b = read_mp3_istream(sound_data, fis);
527 fis.close();
528 return b;
529}
530
532{
533 std::istringstream iss(std::string(bytestream, streamsize));
534 return read_mp3_istream(data, iss);
535}
bool ret
bool read_mp3_istream(Sound &sound_data, std::istream &istream)
A mini-server for performing network communication in the background.
void close() override
Stop port activity.
bool open(const std::string &name) override
Start port operation, with a specific name, with automatically-chosen network parameters.
T * read(bool shouldWait=true) override
Read an available object from the port.
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
Definition Sound.h:25
void setFrequency(int freq)
Set the frequency of the sound (i.e.
Definition Sound.cpp:361
size_t getChannels() const
Get the number of channels of the sound.
Definition Sound.cpp:603
void resize(size_t samples, size_t channels=1)
Set the sound size.
Definition Sound.cpp:270
audio_sample get(size_t sample, size_t channel=0) const
Definition Sound.cpp:294
size_t getSamples() const
Get the number of samples contained in the sound.
Definition Sound.cpp:598
#define yCError(component,...)
#define YARP_LOG_COMPONENT(name,...)
NetInt32 encode(const std::string &str)
Convert a string into a vocabulary identifier.
Definition Vocab.cpp:11
std::string decode(NetInt32 code)
Convert a vocabulary identifier into a string.
Definition Vocab.cpp:33
An interface to the operating system, including Port based communication.
bool write_mp3_file(const Sound &data, const char *filename, size_t bitrate=64000)
Write a sound to a mp3 file.
bool read_mp3_file(Sound &data, const char *filename)
Read a sound from a .mp3 audio file.
bool read_mp3_bytestream(Sound &data, const char *bytestream, size_t streamsize)
Read a sound from a byte array.
#define YARP_UNUSED(var)
Definition api.h:162