diff --git a/subprojects/gst-plugins-good/sys/osxaudio/gstatenc.c b/subprojects/gst-plugins-good/sys/osxaudio/gstatenc.c new file mode 100644 index 0000000000..4a8d04c4b9 --- /dev/null +++ b/subprojects/gst-plugins-good/sys/osxaudio/gstatenc.c @@ -0,0 +1,943 @@ +/* + * Copyright (C) 2024 Piotr Brzeziński + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +/** + * SECTION:element-atenc + * @title: atenc + * + * AudioToolbox based encoder. + * ## Example launch line + * |[ + * gst-launch-1.0 -v audiotestsrc ! atenc ! mp4mux ! filesink location=test.m4a + * ]| + * Encodes audio from audiotestsrc and writes it to a file. + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstatenc.h" + +enum +{ + PROP_0, + PROP_BITRATE, + PROP_RATE_CONTROL, + PROP_VBR_QUALITY, +}; + +#define DEFAULT_BITRATE 0 +#define DEFAULT_RATE_CONTROL GST_ATENC_RATE_CONTROL_CONSTANT +#define DEFAULT_VBR_QUALITY 65 + +#define ES_DESCRIPTOR_TAG 0x03 +#define DECODER_CONFIG_DESC_TAG 0x04 +#define DECODER_SPECIFIC_INFO_TAG 0x05 + +#define SAMPLE_RATES " 8000, " \ + "11025, " \ + "12000, " \ + "16000, " \ + "22050, " \ + "24000, " \ + "32000, " \ + "44100, " \ + "48000 " +/* Higher sample rates were failing when initializing the encoder. + * Probably supported only in specific circumstances, hard to find documentation about that. */ + +/* *INDENT-OFF* */ +static const GstATEncLayout aac_layouts[] = { + { + 1, kAudioChannelLayoutTag_Mono, { GST_AUDIO_CHANNEL_POSITION_MONO }}, { + 2, kAudioChannelLayoutTag_Stereo, { + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT }}, { + /* C L R */ + 3, kAudioChannelLayoutTag_AAC_3_0, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT }}, { + /* C L R Cs */ + 4, kAudioChannelLayoutTag_AAC_4_0, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_REAR_CENTER }}, { + /* C L R Ls Rs */ + 5, kAudioChannelLayoutTag_AAC_5_0, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT }}, { + /* C L R Ls Rs Lfe */ + 6, kAudioChannelLayoutTag_AAC_5_1, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT, + GST_AUDIO_CHANNEL_POSITION_LFE1 }}, { + /* C L R Ls Rs Cs */ + 6, kAudioChannelLayoutTag_AAC_6_0, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT, + GST_AUDIO_CHANNEL_POSITION_REAR_CENTER }}, { + /* C L R Ls Rs Cs Lfe */ + 7, kAudioChannelLayoutTag_AAC_6_1, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT, + GST_AUDIO_CHANNEL_POSITION_REAR_CENTER, + GST_AUDIO_CHANNEL_POSITION_LFE1 }}, { + /* C L R Ls Rs Rls Rrs */ + 7, kAudioChannelLayoutTag_AAC_7_0, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT, + GST_AUDIO_CHANNEL_POSITION_REAR_LEFT, + GST_AUDIO_CHANNEL_POSITION_REAR_RIGHT }}, { + /* C Lc Rc L R Ls Rs Lfe */ + 8, kAudioChannelLayoutTag_AAC_7_1, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT_OF_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT_OF_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT, + GST_AUDIO_CHANNEL_POSITION_LFE1 }}, { + /* C L R Ls Rs Rls Rrs LFE */ + 8, kAudioChannelLayoutTag_AAC_7_1_B, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT, + GST_AUDIO_CHANNEL_POSITION_REAR_LEFT, + GST_AUDIO_CHANNEL_POSITION_REAR_RIGHT, + GST_AUDIO_CHANNEL_POSITION_LFE1 }}, { + /* C L R Ls Rs LFE Vhl Vhr */ + 8, kAudioChannelLayoutTag_AAC_7_1_C, { + GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER, + GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT, + GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT, + GST_AUDIO_CHANNEL_POSITION_LFE1, + GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_LEFT, + GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_RIGHT }}, { + /* Only used when iterating through all positions */ + 0, kAudioChannelLayoutTag_Unknown, { 0 } } +}; +/* *INDENT-ON* */ + +static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("audio/x-raw, " + "format = (string) " GST_AUDIO_NE (S16) ", " + "layout = (string) interleaved, " + "rate = (int) { " SAMPLE_RATES " }, channels = (int) [ 1, 8 ]") + ); + +static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src", + GST_PAD_SRC, + GST_PAD_ALWAYS, + GST_STATIC_CAPS ("audio/mpeg, " + "mpegversion = (int) 4, " + "rate = (int) { " SAMPLE_RATES " }, " + "channels = (int) [ 1, 8 ], " + "stream-format = (string) raw, " + "profile = (string) lc, framed = (boolean) true") + ); + +GST_DEBUG_CATEGORY_STATIC (gst_atenc_debug); +#define GST_CAT_DEFAULT gst_atenc_debug + +G_DEFINE_TYPE (GstATEnc, gst_atenc, GST_TYPE_AUDIO_ENCODER); +GST_ELEMENT_REGISTER_DEFINE (atenc, "atenc", GST_RANK_PRIMARY, GST_TYPE_ATENC); + +#define GST_ATENC_RATE_CONTROL (gst_atenc_rate_control_get_type ()) +static GType +gst_atenc_rate_control_get_type (void) +{ + static GType atenc_rate_control_type = 0; + static const GEnumValue types[] = { + {GST_ATENC_RATE_CONTROL_CONSTANT, "Constant bitrate", "cbr"}, + {GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE, "Long-term-average bitrate", + "lta"}, + {GST_ATENC_RATE_CONTROL_VARIABLE_CONSTRAINED, + "Constrained variable bitrate", "cvbr"}, + {GST_ATENC_RATE_CONTROL_VARIABLE, "Variable bitrate", "vbr"}, + {0, NULL, NULL} + }; + + if (!atenc_rate_control_type) + atenc_rate_control_type = + g_enum_register_static ("GstATEncRateControl", types); + + return atenc_rate_control_type; +} + +static void +gst_atenc_set_property (GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstATEnc *self = GST_ATENC (object); + + switch (prop_id) { + case PROP_BITRATE: + self->bitrate = g_value_get_uint (value); + break; + case PROP_RATE_CONTROL: + self->rate_control = g_value_get_enum (value); + break; + case PROP_VBR_QUALITY: + self->vbr_quality = g_value_get_uint (value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static void +gst_atenc_get_property (GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec) +{ + GstATEnc *self = GST_ATENC (object); + + switch (prop_id) { + case PROP_BITRATE: + g_value_set_uint (value, self->bitrate); + break; + case PROP_RATE_CONTROL: + g_value_set_enum (value, self->rate_control); + break; + case PROP_VBR_QUALITY: + g_value_set_uint (value, self->vbr_quality); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec); + break; + } +} + +static gboolean +gst_atenc_start (GstAudioEncoder * enc) +{ + GstATEnc *self = GST_ATENC (enc); + + GST_DEBUG_OBJECT (self, "Starting encoder"); + + self->input_queue = gst_queue_array_new (0); + gst_queue_array_set_clear_func (self->input_queue, + (GDestroyNotify) gst_buffer_unref); + + return TRUE; +} + +static void +gst_atenc_flush (GstAudioEncoder * enc) +{ + GstATEnc *self = GST_ATENC (enc); + + GST_DEBUG_OBJECT (self, "Flushing encoder"); + AudioConverterReset (self->converter); + + gst_queue_array_clear (self->input_queue); +} + +static gboolean +gst_atenc_stop (GstAudioEncoder * enc) +{ + GstATEnc *self = GST_ATENC (enc); + + GST_DEBUG_OBJECT (self, "Stopping encoder"); + + gst_atenc_flush (enc); + + if (self->converter) { + AudioConverterDispose (self->converter); + self->converter = NULL; + } + + gst_queue_array_free (self->input_queue); + self->input_queue = NULL; + + if (self->used_buffer) { + gst_audio_buffer_unmap (self->used_buffer); + gst_buffer_unref (self->used_buffer->buffer); + g_free (self->used_buffer); + self->used_buffer = NULL; + } + + return TRUE; +} + +static GstCaps * +gst_atenc_get_caps (GstAudioEncoder * enc, GstCaps * filter) +{ + GstCaps *layout_caps, *ret, *caps = gst_caps_new_empty (); + const GstATEncLayout *layout; + guint64 channel_mask; + + for (layout = aac_layouts; layout->channels; layout++) { + layout_caps = + gst_caps_make_writable (gst_pad_get_pad_template_caps + (GST_AUDIO_ENCODER_SINK_PAD (enc))); + + if (layout->channels == 1) { + gst_caps_set_simple (layout_caps, "channels", G_TYPE_INT, + layout->channels, NULL); + } else { + gst_audio_channel_positions_to_mask (layout->positions, layout->channels, + FALSE, &channel_mask); + gst_caps_set_simple (layout_caps, "channels", G_TYPE_INT, + layout->channels, "channel-mask", GST_TYPE_BITMASK, channel_mask, + NULL); + } + + gst_caps_append (caps, layout_caps); + } + + ret = gst_audio_encoder_proxy_getcaps (enc, caps, filter); + gst_caps_unref (caps); + + return ret; +} + +static OSStatus +gst_atenc_fill_buffer (AudioConverterRef converter, UInt32 * packets_amount, + AudioBufferList * buffers, AudioStreamPacketDescription ** desc, + void *user_data) +{ + GstATEnc *self = GST_ATENC (user_data); + GstBuffer *buf; + GstAudioBuffer *audio_buf; + GstAudioInfo *audio_info; + UInt32 wanted_samples = *packets_amount; + + /* We can now safely clean up the buffer that was previously passed to AT */ + if (self->used_buffer) { + gst_audio_buffer_unmap (self->used_buffer); + gst_buffer_unref (self->used_buffer->buffer); + g_free (self->used_buffer); + self->used_buffer = NULL; + } + + /* See https://developer.apple.com/library/archive/qa/qa1317/_index.html + * packets_amount indicates how much data is expected to be filled in. + * + * The way this is set up, we tell the base class how many samples AT will expect, + * and it will provide us with that much. Only exception is at the end of stream, + * where there might not be enough data. Thankfully, if we signal EOS, AT will encode + * whatever it got as input, without needing to silence-pad to the expected amount. + * + * In case of less data than packets_amount => set that to the actual value and return noErr + * No data currently available, but more is expected => packets_amount=0 and return 1 + * No data available and input got EOS => packets_amount=0 and return noErr + */ + buf = gst_queue_array_pop_head (self->input_queue); + if (!buf) { + *packets_amount = 0; + + if (self->input_eos) { + GST_DEBUG_OBJECT (self, "No more input data, returning noErr"); + return noErr; + } else { + GST_LOG_OBJECT (self, "No input buffer yet, waiting for more data"); + return 1; + } + } + + /* We can only unmap the audio_buffer in the next callback, but in the meantime + * the base class can invalidate the underlying buffer. Ref it manually to ensure + * it lives long enough. */ + gst_buffer_ref (buf); + audio_info = gst_audio_encoder_get_audio_info (GST_AUDIO_ENCODER (self)); + audio_buf = g_malloc0 (sizeof (GstAudioBuffer)); + gst_audio_buffer_map (audio_buf, audio_info, buf, GST_MAP_READ); + + /* Pushing this as a pointer instead of using the _struct() variants + * because GstAudioBuffer contains self-references, so we'd get dangling pointers otherwise. */ + self->used_buffer = audio_buf; + + buffers->mNumberBuffers = 1; + buffers->mBuffers[0].mNumberChannels = GST_AUDIO_INFO_CHANNELS (audio_info); + buffers->mBuffers[0].mDataByteSize = GST_AUDIO_BUFFER_PLANE_SIZE (audio_buf); + buffers->mBuffers[0].mData = GST_AUDIO_BUFFER_PLANE_DATA (audio_buf, 0); + + *packets_amount = audio_buf->n_samples; + GST_LOG_OBJECT (self, "Wanted %d packets, filled %d", wanted_samples, + *packets_amount); + + return noErr; +} + +static GstFlowReturn +gst_atenc_handle_frame (GstAudioEncoder * enc, GstBuffer * buffer) +{ + GstATEnc *self = GST_ATENC (enc); + OSStatus status; + GstBuffer *outbuf; + GstFlowReturn ret; + GstMapInfo map_info; + GstAudioInfo *audio_info; + AudioBufferList out_bufs = { 0 }; + AudioStreamPacketDescription out_desc = { 0 }; + UInt32 out_packets; + + if (!buffer) { + self->input_eos = TRUE; + GST_DEBUG_OBJECT (self, "No input buffer, draining encoder"); + } else { + self->input_eos = FALSE; + gst_queue_array_push_tail (self->input_queue, buffer); + GST_LOG ("Pushed buffer to queue"); + } + + outbuf = + gst_audio_encoder_allocate_output_buffer (enc, + self->max_output_buffer_size); + if (!outbuf) { + GST_ERROR_OBJECT (self, "Failed to allocate output buffer"); + return GST_FLOW_ERROR; + } + + gst_buffer_map (outbuf, &map_info, GST_MAP_WRITE); + + audio_info = gst_audio_encoder_get_audio_info (enc); + out_bufs.mNumberBuffers = 1; + out_bufs.mBuffers[0].mNumberChannels = GST_AUDIO_INFO_CHANNELS (audio_info); + out_bufs.mBuffers[0].mDataByteSize = self->max_output_buffer_size; + out_bufs.mBuffers[0].mData = map_info.data; + out_packets = 1; + + status = + AudioConverterFillComplexBuffer (self->converter, gst_atenc_fill_buffer, + self, &out_packets, &out_bufs, &out_desc); + + /* gst_atenc_fill_buffer will return 1 when it doesn't have enough data yet */ + if (status != noErr && status != 1) { + GST_ERROR_OBJECT (self, "Failed to fill buffer: %d", status); + return GST_FLOW_ERROR; + } + + if (out_packets == 0) { + GST_LOG_OBJECT (self, "No packets produced, more data needed or input EOS"); + gst_buffer_unmap (outbuf, &map_info); + gst_buffer_unref (outbuf); + return GST_FLOW_OK; + } + + gst_buffer_unmap (outbuf, &map_info); + + /* On exit, mDataByteSize is set to the number of bytes written. */ + GST_LOG_OBJECT (self, "Output buffer size: %d", out_desc.mDataByteSize); + g_assert (out_desc.mDataByteSize <= self->max_output_buffer_size); + gst_buffer_set_size (outbuf, out_desc.mDataByteSize); + ret = gst_audio_encoder_finish_frame (enc, outbuf, self->n_output_samples); + + return ret; +} + +static void +gst_atenc_fill_input_layout (GstAudioInfo * info, AudioChannelLayout * layout) +{ + const GstAudioChannelPosition *input_positions = + &GST_AUDIO_INFO_POSITION (info, 0); + + layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions; + layout->mNumberChannelDescriptions = GST_AUDIO_INFO_CHANNELS (info); + for (int i = 0; i < GST_AUDIO_INFO_CHANNELS (info); i++) { + layout->mChannelDescriptions[i].mChannelLabel = + gst_audio_channel_position_to_core_audio (input_positions[i], i); + } +} + +static AudioChannelLayoutTag +gst_atenc_get_output_layout_tag (GstATEnc * self, GstAudioInfo * info) +{ + const GstAudioChannelPosition *input_positions = + &GST_AUDIO_INFO_POSITION (info, 0); + const GstATEncLayout *layout; + gint input_channels = GST_AUDIO_INFO_CHANNELS (info); + guint64 input_ch_mask; + + gst_audio_channel_positions_to_mask (input_positions, input_channels, FALSE, + &input_ch_mask); + + /* Try to find a predefined output layout that matches the input channels. + * Order doesn't matter - we set channel descriptions on input, so AT will reorder internally. */ + for (layout = aac_layouts; layout->channels; layout++) { + const GstAudioChannelPosition *output_positions = layout->positions; + guint64 layout_ch_mask; + + if (layout->channels != input_channels) + continue; + + gst_audio_channel_positions_to_mask (output_positions, layout->channels, + FALSE, &layout_ch_mask); + if (input_ch_mask != layout_ch_mask) + continue; + + return layout->aac_tag; + } + + return kAudioChannelLayoutTag_Unknown; +} + +static bool +_parse_descriptor (GstByteReader * br, guint8 * tag, gint * len) +{ + gint size_of_instance = 0; + guint8 size_byte; + gboolean has_next_byte; + + /* Descriptors are variable size, parse it according + * to the formula in sec. 14.3.3 of ISO/IEC 14496-1. + * First 8 bits is the tag. */ + if (!gst_byte_reader_get_uint8 (br, tag)) + return FALSE; + /* Following is one or more size_byte, in which bit 1 tells us if we should parse further, + * and the remaining 7 bits are the actual (portion of the) size */ + do { + if (!gst_byte_reader_get_uint8 (br, &size_byte)) + return FALSE; + has_next_byte = size_byte & 0x80; + size_of_instance = (size_of_instance << 7) | (size_byte & 0x7f); + g_assert (size_of_instance >= 0); + } while (has_next_byte && gst_byte_reader_get_remaining (br) > 0); + + if (len) + *len = size_of_instance; + + return TRUE; +} + +static void +gst_atenc_extract_audio_specific_config (guint8 * cookie_buf, guint cookie_size, + guint8 ** asc, guint * asc_size) +{ + GstByteReader *br = gst_byte_reader_new (cookie_buf, cookie_size); + gint len; + guint8 tag, flags, flag_skip; + + /* Cookie data is a MPEG descriptor structure, we need to extract the AudioSpecificConfig. + * Structures parsed below are described in ISO/IEC 14496-1 */ + while (gst_byte_reader_get_remaining (br) > 0) { + if (!_parse_descriptor (br, &tag, NULL)) + break; + if (tag == ES_DESCRIPTOR_TAG) { + /* First, find the ES_Descriptor and parse flags that tell us how many bits to skip */ + if (!gst_byte_reader_skip (br, 2)) + break; + if (!gst_byte_reader_get_uint8 (br, &flags)) + break; + if (flags & 0x80) + if (!gst_byte_reader_skip (br, 2)) + break; + if (flags & 0x40) { + if (!gst_byte_reader_get_uint8 (br, &flag_skip)) + break; + if (!gst_byte_reader_skip (br, flag_skip)) + break; + } + if (flags & 0x20) + if (!gst_byte_reader_skip (br, 2)) + break; + } else if (tag == DECODER_CONFIG_DESC_TAG) { + /* Then we get the DecoderConfigDescriptor and skip its first 13 bytes to get to DecoderSpecificInfo */ + if (!gst_byte_reader_skip (br, 13)) + break; + if (!_parse_descriptor (br, &tag, &len)) + break; + /* DecoderSpecificInfo is the AudioSpecificConfig in our case */ + if (tag == DECODER_SPECIFIC_INFO_TAG) { + *asc_size = len; + *asc = g_malloc0 (*asc_size); + if (!gst_byte_reader_dup_data (br, *asc_size, asc)) { + g_free (*asc); + *asc = NULL; + } + break; + } + } + } +} + +static gboolean +gst_atenc_set_format (GstAudioEncoder * enc, GstAudioInfo * info) +{ + GstATEnc *self = GST_ATENC (enc); + AudioStreamBasicDescription input_desc = { 0 }; + AudioStreamBasicDescription output_desc = { 0 }; + AudioChannelLayout *layout = NULL; + AudioChannelLayoutTag output_layout_tag; + GstCaps *src_caps; + OSStatus status; + gboolean ret; + UInt32 prop_size, max_output_size; + guint8 *cookie_data = NULL; + guint8 *audio_config = NULL; + guint32 audio_config_size = 0; + GstBuffer *asc_buf; + + if (self->converter) { + /* Drain any leftover data from encoder */ + gst_atenc_handle_frame (enc, NULL); + AudioConverterDispose (self->converter); + self->converter = NULL; + } + + input_desc.mSampleRate = GST_AUDIO_INFO_RATE (info); + input_desc.mFormatID = kAudioFormatLinearPCM; + input_desc.mFormatFlags = + kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked; + input_desc.mFramesPerPacket = 1; + input_desc.mBytesPerFrame = input_desc.mBytesPerPacket = + GST_AUDIO_INFO_BPF (info); + input_desc.mChannelsPerFrame = GST_AUDIO_INFO_CHANNELS (info); + input_desc.mBitsPerChannel = GST_AUDIO_INFO_DEPTH (info); + + /* HE-AAC v1/v2 and LD to be added later. + * For LD, AudioSpecificConfig parsing fails completely, might be due to faulty MPEG descriptor parsing. + * For HE-AAC, channel configurations need testing (also sometimes fail to parse). */ + output_desc.mFormatID = kAudioFormatMPEG4AAC; + output_desc.mSampleRate = GST_AUDIO_INFO_RATE (info); + output_desc.mChannelsPerFrame = GST_AUDIO_INFO_CHANNELS (info); + + status = AudioConverterNew (&input_desc, &output_desc, &self->converter); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to create audio converter: %d", status); + return FALSE; + } + + /* Using the encoder-provided size results in kAudioCodecBadPropertySizeError, so let's calculate it manually... */ + prop_size = + sizeof (AudioChannelLayout) + + sizeof (AudioChannelDescription) * GST_AUDIO_INFO_CHANNELS (info); + layout = g_malloc0 (prop_size); + + /* For input, AT expects per-channel descriptions to be used */ + gst_atenc_fill_input_layout (info, layout); + status = + AudioConverterSetProperty (self->converter, + kAudioConverterInputChannelLayout, prop_size, layout); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to set input channel layout: %d", status); + g_free (layout); + return FALSE; + } + + /* For output, instead of channel descriptions, we use an AAC tag indicating one of the predefined layouts */ + output_layout_tag = gst_atenc_get_output_layout_tag (self, info); + if (output_layout_tag == kAudioChannelLayoutTag_Unknown) { + GST_ERROR_OBJECT (self, + "Failed to find a matching output channel layout tag"); + g_free (layout); + return FALSE; + } + + layout->mChannelLayoutTag = output_layout_tag; + layout->mNumberChannelDescriptions = 0; + + status = + AudioConverterSetProperty (self->converter, + kAudioConverterOutputChannelLayout, prop_size, layout); + g_free (layout); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to set output channel layout: %d", status); + return FALSE; + } + + /* TODO: Check if this works on iOS */ + status = + AudioConverterSetProperty (self->converter, + kAudioCodecPropertyBitRateControlMode, sizeof (UInt32), + &self->rate_control); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to set bitrate control mode: %d", status); + return FALSE; + } + + if (self->rate_control == GST_ATENC_RATE_CONTROL_VARIABLE) { + status = + AudioConverterSetProperty (self->converter, + kAudioCodecPropertySoundQualityForVBR, sizeof (UInt32), + &self->vbr_quality); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to set VBR quality: %d", status); + return FALSE; + } + } + + if (self->bitrate > 0 + && (self->rate_control == GST_ATENC_RATE_CONTROL_CONSTANT + || self->rate_control == GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE)) { + /* Query the encoder for possible bitrate values and adjust if needed */ + AudioValueRange *bitrate_ranges; + UInt32 actual_bitrate; + + status = + AudioConverterGetPropertyInfo (self->converter, + kAudioConverterApplicableEncodeBitRates, &prop_size, NULL); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to get possible bitrates size: %d", + status); + return FALSE; + } + + bitrate_ranges = g_malloc (prop_size); + status = + AudioConverterGetProperty (self->converter, + kAudioConverterApplicableEncodeBitRates, &prop_size, bitrate_ranges); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to get possible bitrates: %d", status); + g_free (bitrate_ranges); + return FALSE; + } + + GST_LOG_OBJECT (self, "Allowed bitrate ranges:"); + for (int i = 0; i < prop_size / sizeof (AudioValueRange); i++) { + AudioValueRange *range = &bitrate_ranges[i]; + GST_LOG_OBJECT (self, "%d: %f - %f", + i + 1, range->mMinimum, range->mMaximum); + } + + /* Returned ranges are ordered from lowest to highest values */ + for (int i = 0; i < prop_size / sizeof (AudioValueRange); i++) { + AudioValueRange *range = &bitrate_ranges[i]; + if (self->bitrate == range->mMinimum && self->bitrate == range->mMaximum) { + /* Often the min/max values are identical, so not that much of a range... */ + actual_bitrate = self->bitrate; + break; + } else if (self->bitrate < range->mMinimum) { + actual_bitrate = range->mMinimum; + break; + } else if (self->bitrate > range->mMaximum) { + /* We might find higher values still, so no break */ + actual_bitrate = range->mMaximum; + } + } + + if (actual_bitrate != self->bitrate) { + GST_WARNING_OBJECT (self, + "Requested bitrate %d not in the allowed range, using %d", + self->bitrate, actual_bitrate); + self->bitrate = actual_bitrate; + } + + /* TODO: This could be changed at any time instead of just in set_format, + * but from initial testing, changing the bitrate when encoding introduces + * a very short pause in encoded sound. Needs investigation. */ + status = + AudioConverterSetProperty (self->converter, + kAudioConverterEncodeBitRate, sizeof (UInt32), &actual_bitrate); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to set bitrate: %d", status); + g_free (bitrate_ranges); + return FALSE; + } + } + + /* After creation, encoder fills input/output desc with more details */ + prop_size = sizeof (output_desc); + status = + AudioConverterGetProperty (self->converter, + kAudioConverterCurrentOutputStreamDescription, &prop_size, &output_desc); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to get output format: %d", status); + return FALSE; + } + self->n_output_samples = output_desc.mFramesPerPacket; + GST_DEBUG_OBJECT (self, "samples per output packet: %d", + self->n_output_samples); + + /* This isn't always set, so we might need to query manually */ + max_output_size = output_desc.mBytesPerPacket; + if (max_output_size == 0) { + prop_size = sizeof (max_output_size); + status = + AudioConverterGetProperty (self->converter, + kAudioConverterPropertyMaximumOutputPacketSize, &prop_size, + &max_output_size); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to get maximum output packet size: %d", + status); + return FALSE; + } + } + self->max_output_buffer_size = max_output_size; + GST_DEBUG_OBJECT (self, "maximum output buffer size: %d", + self->max_output_buffer_size); + + /* For AAC, AT usually asks for 1024 samples per packet, base class needs to know */ + gst_audio_encoder_set_frame_max (enc, 1); + gst_audio_encoder_set_frame_samples_min (enc, self->n_output_samples); + gst_audio_encoder_set_frame_samples_max (enc, self->n_output_samples); + gst_audio_encoder_set_drainable (enc, TRUE); + + /* FIXME: Handle lookahead according to kAudioConverterPrimeInfo.leadingFrames. + * When passed directly to gst_audio_encoder_set_lookahead, causes + * an audible skip in audio, and muxers such as mp4mux error out. + * To be investigated. */ + + status = + AudioConverterGetPropertyInfo (self->converter, + kAudioConverterCompressionMagicCookie, &prop_size, NULL); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to get magic cookie size: %d", status); + return FALSE; + } + + cookie_data = g_malloc (prop_size); + status = + AudioConverterGetProperty (self->converter, + kAudioConverterCompressionMagicCookie, &prop_size, cookie_data); + if (status != noErr) { + GST_ERROR_OBJECT (self, "Failed to get magic cookie: %d", status); + g_free (cookie_data); + return FALSE; + } + + /* Cookie contains a bunch of descriptors, gotta dig a bit to get the AudioSpecificConfig */ + gst_atenc_extract_audio_specific_config (cookie_data, prop_size, + &audio_config, &audio_config_size); + if (!audio_config) { + GST_ERROR_OBJECT (self, "Failed to extract AudioSpecificConfig"); + g_free (cookie_data); + return FALSE; + } + + asc_buf = gst_buffer_new_wrapped (audio_config, audio_config_size); + + src_caps = gst_caps_new_simple ("audio/mpeg", + "mpegversion", G_TYPE_INT, 4, + "rate", G_TYPE_INT, GST_AUDIO_INFO_RATE (info), + "channels", G_TYPE_INT, GST_AUDIO_INFO_CHANNELS (info), + "stream-format", G_TYPE_STRING, "raw", + "framed", G_TYPE_BOOLEAN, TRUE, + "codec_data", GST_TYPE_BUFFER, asc_buf, NULL); + + gst_codec_utils_aac_caps_set_level_and_profile (src_caps, audio_config, + audio_config_size); + gst_buffer_unref (asc_buf); + g_free (cookie_data); + + ret = gst_audio_encoder_set_output_format (enc, src_caps); + GST_DEBUG ("output caps: %" GST_PTR_FORMAT, src_caps); + gst_caps_unref (src_caps); + + return ret; +} + +static void +gst_atenc_init (GstATEnc * self) +{ + self->bitrate = DEFAULT_BITRATE; + self->rate_control = DEFAULT_RATE_CONTROL; + self->vbr_quality = DEFAULT_VBR_QUALITY; + self->input_eos = FALSE; + self->used_buffer = NULL; +} + +static void +gst_atenc_class_init (GstATEncClass * klass) +{ + GObjectClass *object_class = G_OBJECT_CLASS (klass); + GstElementClass *element_class = GST_ELEMENT_CLASS (klass); + GstAudioEncoderClass *base_class = GST_AUDIO_ENCODER_CLASS (klass); + + object_class->set_property = GST_DEBUG_FUNCPTR (gst_atenc_set_property); + object_class->get_property = GST_DEBUG_FUNCPTR (gst_atenc_get_property); + + base_class->start = GST_DEBUG_FUNCPTR (gst_atenc_start); + base_class->stop = GST_DEBUG_FUNCPTR (gst_atenc_stop); + base_class->getcaps = GST_DEBUG_FUNCPTR (gst_atenc_get_caps); + base_class->set_format = GST_DEBUG_FUNCPTR (gst_atenc_set_format); + base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_atenc_handle_frame); + base_class->flush = GST_DEBUG_FUNCPTR (gst_atenc_flush); + + /** + * GstATEnc:bitrate: + * + * Target output bitrate in bps, for CBR and LTA rate control modes. + * + * Since: 1.26 + */ + g_object_class_install_property (object_class, PROP_BITRATE, + g_param_spec_uint ("bitrate", + "Bitrate", + "target output bitrate in bps (for rate-control=cbr/lta) (0 - auto)", + 0, G_MAXUINT32, DEFAULT_BITRATE, + G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + + /** + * GstATEnc:rate-control: + * + * Rate control mode to be applied by the encoder. + * CBR and LTA modes use the bitrate property, VBR uses the vbr-quality property. + * Constrained VBR determines the bitrate/quality automatically based on the input signal. + * + * Since: 1.26 + */ + g_object_class_install_property (object_class, PROP_RATE_CONTROL, + g_param_spec_enum ("rate-control", + "Rate control", + "Mode of output bitrate control to be applied", + GST_ATENC_RATE_CONTROL, + DEFAULT_RATE_CONTROL, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + + /** + * GstATEnc:vbr-quality: + * + * Sound quality setting for VBR encoding. + * + * Since: 1.26 + */ + g_object_class_install_property (object_class, PROP_VBR_QUALITY, + g_param_spec_uint ("vbr-quality", + "VBR quality", + "Sound quality setting for VBR encoding (rate-control=vbr) (0-127)", + 0, 127, DEFAULT_VBR_QUALITY, + G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)); + + gst_element_class_add_static_pad_template (element_class, &sink_template); + gst_element_class_add_static_pad_template (element_class, &src_template); + + gst_element_class_set_static_metadata (element_class, + "AudioToolbox audio encoder", "Coder/Encoder/Audio/Converter", + "AudioToolbox based audio encoder for macOS/iOS", + "Piotr Brzeziński "); + + GST_DEBUG_CATEGORY_INIT (gst_atenc_debug, "atenc", 0, + "AudioToolbox based audio encoder"); +} diff --git a/subprojects/gst-plugins-good/sys/osxaudio/gstatenc.h b/subprojects/gst-plugins-good/sys/osxaudio/gstatenc.h new file mode 100644 index 0000000000..6bfdd6fdc9 --- /dev/null +++ b/subprojects/gst-plugins-good/sys/osxaudio/gstatenc.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2024 Piotr Brzeziński + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifndef _GST_ATENC_H_ +#define _GST_ATENC_H_ + +#include +#include +#include +#include +#include +#include + +#include "gstosxcoreaudiocommon.h" + +G_BEGIN_DECLS +#define GST_TYPE_ATENC (gst_atenc_get_type()) +#define GST_ATENC(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_ATENC,GstATEnc)) +#define GST_ATENC_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_ATENC,GstATEncClass)) +#define GST_IS_ATENC(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_ATENC)) +#define GST_IS_ATENC_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_ATENC)) +typedef struct _GstATEnc GstATEnc; +typedef struct _GstATEncClass GstATEncClass; + +/** + * GstATEncRateControl: + * @GST_ATENC_RATE_CONTROL_CONSTANT: Constant bitrate + * @GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE: Long-term-average bitrate + * @GST_ATENC_RATE_CONTROL_VARIABLE_CONSTRAINED: Variable constrained bitrate + * @GST_ATENC_RATE_CONTROL_VARIABLE: Variable bitrate + * + * Since: 1.26 + */ +typedef enum +{ + GST_ATENC_RATE_CONTROL_CONSTANT = 0, + GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE = 1, + GST_ATENC_RATE_CONTROL_VARIABLE_CONSTRAINED = 2, + GST_ATENC_RATE_CONTROL_VARIABLE = 3, +} GstATEncRateControl; + +typedef struct +{ + gint channels; + AudioChannelLayoutTag aac_tag; + GstAudioChannelPosition positions[8]; +} GstATEncLayout; + +struct _GstATEnc +{ + GstAudioEncoder encoder; + AudioConverterRef converter; + UInt32 max_output_buffer_size; + UInt32 n_output_samples; + GstQueueArray *input_queue; + GstAudioBuffer *used_buffer; + gboolean input_eos; + + GstATEncRateControl rate_control; + guint32 bitrate; + guint32 vbr_quality; +}; + +struct _GstATEncClass +{ + GstAudioEncoderClass encoder_class; +}; + +GType gst_atenc_get_type (void); + +GST_ELEMENT_REGISTER_DECLARE (atenc); + +G_END_DECLS +#endif diff --git a/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudio.c b/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudio.c index d47d3d1fc4..2b61ac1920 100644 --- a/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudio.c +++ b/subprojects/gst-plugins-good/sys/osxaudio/gstosxaudio.c @@ -31,6 +31,7 @@ #include "gstosxaudiosink.h" #include "gstosxaudiosrc.h" #include "gstatdec.h" +#include "gstatenc.h" #ifndef HAVE_IOS #include "gstosxaudiodeviceprovider.h" #endif @@ -43,6 +44,7 @@ plugin_init (GstPlugin * plugin) ret |= GST_ELEMENT_REGISTER (osxaudiosrc, plugin); ret |= GST_ELEMENT_REGISTER (osxaudiosink, plugin); ret |= GST_ELEMENT_REGISTER (atdec, plugin); + ret |= GST_ELEMENT_REGISTER (atenc, plugin); #ifndef HAVE_IOS ret |= GST_DEVICE_PROVIDER_REGISTER (osxaudiodeviceprovider, plugin); #endif diff --git a/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudiocommon.h b/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudiocommon.h index c4602a6b30..eb4fda8ed1 100644 --- a/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudiocommon.h +++ b/subprojects/gst-plugins-good/sys/osxaudio/gstosxcoreaudiocommon.h @@ -21,6 +21,8 @@ * */ +#pragma once + #include "gstosxcoreaudio.h" #include diff --git a/subprojects/gst-plugins-good/sys/osxaudio/meson.build b/subprojects/gst-plugins-good/sys/osxaudio/meson.build index 0f91f0f80d..6e9a213a69 100644 --- a/subprojects/gst-plugins-good/sys/osxaudio/meson.build +++ b/subprojects/gst-plugins-good/sys/osxaudio/meson.build @@ -7,6 +7,7 @@ osxaudio_sources = [ 'gstosxcoreaudio.c', 'gstosxaudio.c', 'gstatdec.c', + 'gstatenc.c', ] have_osxaudio = false @@ -38,7 +39,7 @@ if have_osxaudio osxaudio_sources, c_args : gst_plugins_good_args, include_directories : [configinc, libsinc], - dependencies : [gstaudio_dep, osxaudio_dep], + dependencies : [gstaudio_dep, gstpbutils_dep, osxaudio_dep], install : true, install_dir : plugins_install_dir) plugins += [gstosxaudio]