YARP
Yet Another Robot Platform
SoundFileMp3.cpp
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: 2006-2021 Istituto Italiano di Tecnologia (IIT)
3  * SPDX-FileCopyrightText: 2006-2010 RobotCub Consortium
4  * SPDX-License-Identifier: BSD-3-Clause
5  */
6 
8 
9 #include <yarp/conf/system.h>
10 
11 #include <yarp/os/NetInt16.h>
12 #include <yarp/os/NetInt32.h>
13 #include <yarp/os/ManagedBytes.h>
14 #include <yarp/os/Vocab.h>
15 
16 #include <yarp/sig/Sound.h>
17 #include <yarp/os/Log.h>
18 #include <yarp/os/LogStream.h>
19 
20 #include <cstdio>
21 #include <cstring>
22 #include <fstream>
23 
24 #if defined (YARP_HAS_FFMPEG)
25 extern "C"
26 {
27  #include <libavutil/opt.h>
28  #include <libavcodec/avcodec.h>
29  #include <libavcodec/version.h>
30  #include <libavutil/channel_layout.h>
31  #include <libavutil/common.h>
32  #include <libavutil/imgutils.h>
33  #include <libavutil/mathematics.h>
34  #include <libavutil/samplefmt.h>
35 }
36 #endif
37 
38 using namespace yarp::os;
39 using namespace yarp::sig;
40 using namespace yarp::sig::file;
41 
42 namespace
43 {
44  YARP_LOG_COMPONENT(SOUNDFILE_MP3, "yarp.sig.SoundFileMp3")
45 }
46 
47 //#######################################################################################################
48 #if defined (YARP_HAS_FFMPEG)
49 #define AUDIO_INBUF_SIZE 20480
50 #define AUDIO_REFILL_THRESH 4096
51 #endif
52 
53 //#######################################################################################################
54 #if defined (YARP_HAS_FFMPEG)
55 bool decode(AVCodecContext* dec_ctx, AVPacket* pkt, AVFrame* frame, Sound& sound_data)
56 {
57  int i, ch;
58  int ret, data_size;
59  /* send the packet with the compressed data to the decoder */
60  ret = avcodec_send_packet(dec_ctx, pkt);
61  if (ret < 0)
62  {
63  yCError(SOUNDFILE_MP3, "Error submitting the packet to the decoder");
64  return false;
65  }
66  /* read all the output frames (in general there may be any number of them */
67  while (ret >= 0)
68  {
69  ret = avcodec_receive_frame(dec_ctx, frame);
70  if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
71  {
72  return false;
73  }
74  else if (ret < 0)
75  {
76  yCError(SOUNDFILE_MP3, "Error during decoding");
77  return false;
78  }
79  //this seems to be 2: S16P
80  data_size = av_get_bytes_per_sample(dec_ctx->sample_fmt);
81  if (data_size < 0)
82  {
83  /* This should not occur, checking just for paranoia */
84  yCError(SOUNDFILE_MP3, "Failed to calculate data size\n");
85  return false;
86  }
87 
88  yarp::sig::Sound frame_sound;
89  frame_sound.resize(frame->nb_samples, dec_ctx->channels);
90  if (sound_data.getChannels()==0) { sound_data.resize(0, dec_ctx->channels);}
91 
92  for (i = 0; i < frame->nb_samples; i++) //1152
93  {
94  for (ch = 0; ch < dec_ctx->channels; ch++) //2
95  {
96  short int val = *((short int*)frame->data[ch] + i);
97  frame_sound.set(val,i,ch);
98  }
99  }
100  sound_data += frame_sound;
101  }
102  return true;
103 }
104 
105 int check_sample_fmt(const AVCodec * codec, enum AVSampleFormat sample_fmt)
106 {
107  const enum AVSampleFormat* p = codec->sample_fmts;
108 
109  while (*p != AV_SAMPLE_FMT_NONE)
110  {
111  if (*p == sample_fmt) {
112  return 1;
113  }
114  p++;
115  }
116  return 0;
117 }
118 
119 int select_sample_rate(const AVCodec * codec)
120 {
121  const int* p;
122  int best_samplerate = 0;
123 
124  if (!codec->supported_samplerates) {
125  return 44100;
126  }
127 
128  p = codec->supported_samplerates;
129  while (*p)
130  {
131  if (!best_samplerate || abs(44100 - *p) < abs(44100 - best_samplerate)) {
132  best_samplerate = *p;
133  }
134  p++;
135  }
136  return best_samplerate;
137 }
138 
139 bool encode(AVCodecContext* ctx, AVFrame* frame, AVPacket* pkt, std::fstream& os)
140 {
141  int ret;
142 
143  // send the frame for encoding
144  ret = avcodec_send_frame(ctx, frame);
145  if (ret < 0)
146  {
147  yCError(SOUNDFILE_MP3, "Error sending the frame to the encoder\n");
148  return false;
149  }
150 
151  // read all the available output packets (in general there may be any
152  // number of them
153  while (ret >= 0)
154  {
155  ret = avcodec_receive_packet(ctx, pkt);
156  if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
157  {
158  return true;
159  }
160  else if (ret < 0)
161  {
162  yCError(SOUNDFILE_MP3, "Error encoding audio frame\n");
163  return false;
164  }
165  os.write((const char*)(pkt->data), pkt->size);
166  av_packet_unref(pkt);
167  }
168  return true;
169 }
170 
171 /* select layout with the highest channel count */
172 int select_channel_layout(const AVCodec * codec)
173 {
174  const uint64_t * p;
175  uint64_t best_ch_layout = 0;
176  int best_nb_channels = 0;
177 
178  if (!codec->channel_layouts) {
179  return AV_CH_LAYOUT_STEREO;
180  }
181 
182  p = codec->channel_layouts;
183  while (*p)
184  {
185  int nb_channels = av_get_channel_layout_nb_channels(*p);
186 
187  if (nb_channels > best_nb_channels)
188  {
189  best_ch_layout = *p;
190  best_nb_channels = nb_channels;
191  }
192  p++;
193  }
194  return best_ch_layout;
195 }
196 #endif
197 
198 //#######################################################################################################
199 bool yarp::sig::file::write_mp3_file(const Sound& sound_data, const char* filename, size_t bitrate)
200 {
201 #if !defined (YARP_HAS_FFMPEG)
202 
203  yCError(SOUNDFILE_MP3) << "write_mp3_file() not supported: lib ffmpeg not found";
204  return false;
205 #else
206  const AVCodec * codec = nullptr;
207  AVCodecContext * c = nullptr;
208  AVFrame * frame = nullptr;
209  AVPacket * pkt = nullptr;
210  int ret;
211  std::fstream fos;
212  uint16_t * samples = nullptr;
213 
214 #if LIBAVCODEC_VERSION_MAJOR < 58
215  //register all the codecs, deprecated and useless in libffmpeg4.0
216  avcodec_register_all();
217 #endif
218 
219  // find the MP3 encoder
220  codec = avcodec_find_encoder(AV_CODEC_ID_MP2);
221  if (!codec)
222  {
223  yCError(SOUNDFILE_MP3, "Codec not found");
224  return false;
225  }
226 
227  c = avcodec_alloc_context3(codec);
228  if (!c)
229  {
230  yCError(SOUNDFILE_MP3, "Could not allocate audio codec context");
231  return false;
232  }
233 
234  // the compressed output bitrate
235  c->bit_rate = bitrate;
236 
237  // check that the encoder supports s16 pcm input
238  c->sample_fmt = AV_SAMPLE_FMT_S16;
239  if (!check_sample_fmt(codec, c->sample_fmt))
240  {
241  yCError(SOUNDFILE_MP3, "Encoder does not support sample format %s",
242  av_get_sample_fmt_name(c->sample_fmt));
243  return false;
244  }
245 
246  // select other audio parameters supported by the encoder
247  c->sample_rate = select_sample_rate(codec);
248  c->channel_layout = select_channel_layout(codec);
249  c->channels = av_get_channel_layout_nb_channels(c->channel_layout);
250 
251  // open it
252  if (avcodec_open2(c, codec, NULL) < 0)
253  {
254  yCError(SOUNDFILE_MP3, "Could not open codec");
255  return false;
256  }
257 
258  fos.open(filename, std::fstream::out | std::fstream::binary);
259  if (fos.is_open()==false)
260  {
261  yCError(SOUNDFILE_MP3, "Cannot open %s for writing", filename);
262  return false;
263  }
264 
265  // packet for holding encoded output
266  pkt = av_packet_alloc();
267  if (!pkt)
268  {
269  yCError(SOUNDFILE_MP3, "could not allocate the packet");
270  fos.close();
271  return false;
272  }
273 
274  // frame containing input raw audio
275  frame = av_frame_alloc();
276  if (!frame)
277  {
278  yCError(SOUNDFILE_MP3, "Could not allocate audio frame");
279  fos.close();
280  return false;
281  }
282 
283  frame->nb_samples = c->frame_size;
284  frame->format = c->sample_fmt;
285  frame->channel_layout = c->channel_layout;
286 
287  // allocate the data buffers
288  ret = av_frame_get_buffer(frame, 0);
289  if (ret < 0)
290  {
291  yCError(SOUNDFILE_MP3, "Could not allocate audio data buffers");
292  fos.close();
293  return false;
294  }
295 
296  // encode
297  size_t soundsize = sound_data.getSamples();
298  size_t nframes = soundsize / c->frame_size;
299  size_t rem_lastframe = soundsize % c->frame_size;
300  YARP_UNUSED(rem_lastframe);
301  for (size_t i = 0; i < nframes; i++)
302  {
303  ret = av_frame_make_writable(frame);
304  if (ret < 0) {
305  exit(1);
306  }
307 
308  samples = (uint16_t*)frame->data[0];
309  for (int j = 0; j < c->frame_size; j++)
310  {
311  for (int k = 0; k < c->channels; k++) {
312  samples[j * c->channels + k] = sound_data.get(j + i * c->frame_size, k);
313  }
314  }
315  if (encode(c, frame, pkt, fos) == false)
316  {
317  yCError(SOUNDFILE_MP3, "Encode failed, memory could be corrupted, should I exit?");
318  }
319  }
320 
321  // flush the encoder
322  if (encode(c, NULL, pkt, fos) == false)
323  {
324  yCError(SOUNDFILE_MP3, "Encode failed, memory could be corrupted, should I exit?");
325  }
326 
327  fos.close();
328 
329  av_frame_free(&frame);
330  av_packet_free(&pkt);
331  avcodec_free_context(&c);
332 
333  return true;
334 #endif
335 }
336 
337 bool read_mp3_istream(Sound& sound_data, std::istream& istream)
338 {
339 #if !defined (YARP_HAS_FFMPEG)
340  yCError(SOUNDFILE_MP3) << "read_mp3_istream() not supported: lib ffmpeg not found";
341  return false;
342 #else
343  const AVCodec* codec = nullptr;
344  AVCodecContext* c = nullptr;
345  AVCodecParserContext* parser = nullptr;
346  int len, ret;
347  uint8_t inbuf[AUDIO_INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
348  uint8_t* data = nullptr;
349  size_t data_size;
350  AVPacket* pkt = nullptr;
351  AVFrame* decoded_frame = nullptr;
352 
353  pkt = av_packet_alloc();
354 
355 #if LIBAVCODEC_VERSION_MAJOR < 58
356  //register all the codecs, deprecated and useless in libffmpeg4.0
357  avcodec_register_all();
358 #endif
359 
360  // find the MPEG audio decoder
361  codec = avcodec_find_decoder(AV_CODEC_ID_MP2);
362  if (!codec)
363  {
364  yCError(SOUNDFILE_MP3, "Codec not found");
365  return false;
366  }
367  parser = av_parser_init(codec->id);
368  if (!parser)
369  {
370  yCError(SOUNDFILE_MP3, "Parser not found");
371  return false;
372  }
373  c = avcodec_alloc_context3(codec);
374  if (!c)
375  {
376  yCError(SOUNDFILE_MP3, "Could not allocate audio codec context");
377  return false;
378  }
379  //open the codec
380  if (avcodec_open2(c, codec, NULL) < 0)
381  {
382  yCError(SOUNDFILE_MP3, "Could not open codec");
383  return false;
384  }
385 
386  // decode until eof
387  data = inbuf;
388  istream.read((char*)(inbuf), AUDIO_INBUF_SIZE);
389  data_size = istream.gcount();
390  if (data_size == 0)
391  {
392  yCError(SOUNDFILE_MP3, "Cannot process invalid (empty) stream");
393  return false;
394  }
395  while (data_size > 0)
396  {
397  if (!decoded_frame)
398  {
399  if (!(decoded_frame = av_frame_alloc()))
400  {
401  yCError(SOUNDFILE_MP3, "Could not allocate audio frame");
402  return false;
403  }
404  }
405  ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size, data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
406  if (ret < 0)
407  {
408  yCError(SOUNDFILE_MP3, "Error while parsing");
409  return false;
410  }
411  data += ret;
412  data_size -= ret;
413  if (pkt->size) {
414  decode(c, pkt, decoded_frame, sound_data);
415  }
416  if (data_size < AUDIO_REFILL_THRESH)
417  {
418  memmove(inbuf, data, data_size);
419  data = inbuf;
420  istream.read((char*)(data + data_size), AUDIO_INBUF_SIZE - data_size);
421  len = istream.gcount();
422  if (len > 0) {
423  data_size += len;
424  }
425  }
426  }
427  // flush the decoder
428  pkt->data = NULL;
429  pkt->size = 0;
430  decode(c, pkt, decoded_frame, sound_data);
431 
432  //set the sample rate (is it ok? maybe some codecs allow variable sample rate?)
433  sound_data.setFrequency(c->sample_rate);
434 
435  //cleanup
436  avcodec_free_context(&c);
437  av_parser_close(parser);
438  av_frame_free(&decoded_frame);
439  av_packet_free(&pkt);
440  return true;
441 #endif
442 }
443 
444 bool yarp::sig::file::read_mp3_file(Sound& sound_data, const char* filename)
445 {
446  std::fstream fis;
447  fis.open(filename, std::fstream::in | std::fstream::binary);
448  if (fis.is_open() == false)
449  {
450  yCError(SOUNDFILE_MP3, "Cannot open %s for reading", filename);
451  return false;
452  }
453 
454  bool b = read_mp3_istream(sound_data, fis);
455  fis.close();
456  return b;
457 }
458 
459 bool yarp::sig::file::read_mp3_bytestream(Sound& data, const char* bytestream, size_t streamsize)
460 {
461  std::istringstream iss(std::string(bytestream, streamsize));
462  return read_mp3_istream(data, iss);
463 }
int16_t * samples
bool ret
bool read_mp3_istream(Sound &sound_data, std::istream &istream)
Class for storing sounds See Audio in YARP for additional documentation on YARP audio.
Definition: Sound.h:26
void setFrequency(int freq)
Set the frequency of the sound (i.e.
Definition: Sound.cpp:229
size_t getChannels() const
Get the number of channels of the sound.
Definition: Sound.cpp:424
void resize(size_t samples, size_t channels=1)
Set the sound size.
Definition: Sound.cpp:168
audio_sample get(size_t sample, size_t channel=0) const
Definition: Sound.cpp:175
void set(audio_sample value, size_t sample, size_t channel=0)
Definition: Sound.cpp:209
size_t getSamples() const
Get the number of samples contained in the sound.
Definition: Sound.cpp:419
#define yCError(component,...)
Definition: LogComponent.h:154
#define YARP_LOG_COMPONENT(name,...)
Definition: LogComponent.h:77
NetInt32 encode(const std::string &str)
Convert a string into a vocabulary identifier.
Definition: Vocab.cpp:11
std::string decode(NetInt32 code)
Convert a vocabulary identifier into a string.
Definition: Vocab.cpp:33
An interface to the operating system, including Port based communication.
Image file operations.
Definition: ImageFile.h:21
bool write_mp3_file(const Sound &data, const char *filename, size_t bitrate=64000)
Write a sound to a mp3 file.
bool read_mp3_file(Sound &data, const char *filename)
Read a sound from a .mp3 audio file.
bool read_mp3_bytestream(Sound &data, const char *bytestream, size_t streamsize)
Read a sound from a byte array.
Signal processing.
Definition: Image.h:22
#define YARP_UNUSED(var)
Definition: api.h:162