Ree's Movements

Moving all the time, capture it.

iOS 中采集aac 格式的音频

通过AVFoundation 采集音频

在iOS 开发中,最简单的采集手机音频的方式是使用AVFoundation:

import AVFoundation

func start() {
    try! audioInput = AVCaptureDeviceInput(device: audioDevice)
    captureSession.addInput(audioInput)
    let audioOutput = AVCaptureAudioDataOutput()
    audioOutput.setSampleBufferDelegate(self, queue: .main)
    captureSession.addOutput(audioOutput)
    captureSession.startRunning()
}

func captureOutput(_ output: AVCaptureOutput,
                       didOutput sampleBuffer: CMSampleBuffer,
                   from connection: AVCaptureConnection) {

}

这样就拿到了PCM 格式的音频数据。然后可以通过Audio Converter Services 对pcm 编码,进而得到 aac 格式的音频。

通过AudioQueue 采集 AAC 格式的音频

通过上述方式似乎有些麻烦,幸好Core Audio 还提供了一个专门用来录制和播放音频的服务 Audio Queue Services。

Audio Queue Services的架构非常简单,见下图:

A recording audio queue

使用Audio Queue 不仅可以轻松地采集到PCM 格式的音频,还可以采集编码过的音频数据。其内部还对几种codec 进行了封装,其中就包括aac 格式。在实现过程中也只需要设置相应的AudioStreamBasicDescription 即可,Audio Queue Service 会自动去选择相应的codec 并将采集到的PCM 数据进行编码,真正的开箱即用!

Audio format conversion during recording

具体代码实现如下:

func handleInputBuffer(aqData: UnsafeMutableRawPointer?,
                       inAQ: AudioQueueRef,
                       inBuffer: AudioQueueBufferRef,
                       inStartTime: UnsafePointer<AudioTimeStamp>,
                       inNumPacket: UInt32,
                       inPacketDesc: UnsafePointer<AudioStreamPacketDescription>?)
    -> Void {
        let pts = CMClockMakeHostTimeFromSystemUnits(inStartTime.pointee.mHostTime)
        let data = Data(bytes: inBuffer.pointee.mAudioData,
                        count: Int(inBuffer.pointee.mAudioDataByteSize))
        guard let recorder =
            aqData?.assumingMemoryBound(to: AudioRecorder.self) else {
                return
        }
        recorder.pointee.delegate?.handleAudioPacket(data: data, pts: pts)

        // 在inBuffer 中的数据处理完之后,再次将其加入buffer queue 中
        AudioQueueEnqueueBuffer(inAQ, inBuffer, 0, nil)
}

mutating func start() -> Bool {
    // 1. 初始化一个 AudioStreamBasicDescription 代表想要输出的音频格式
    var inFormat = AudioStreamBasicDescription()
    inFormat.mFormatID = kAudioFormatMPEG4AAC
    inFormat.mFormatFlags = AudioFormatFlags(MPEG4ObjectID.AAC_LC.rawValue)
    inFormat.mSampleRate = 48000
    inFormat.mFramesPerPacket = 1024
    inFormat.mChannelsPerFrame = 2

    // 2. 创建一个录音的 AudioQueue
    var result: OSStatus = noErr
    var mQueue: AudioQueueRef?
    result = AudioQueueNewInput(&inFormat, handleInputBuffer, &recorder,
                                nil, CFRunLoopMode.commonModes.rawValue, 0, &mQueue)
    guard result == noErr else {
        return false
    }
    recorder.mQueue = mQueue

    // 3. 在AudioQueueNewInput 函数内部会完善 asbd 的某些成员,因此重新获取一个新的asbd。
    var propertySize = UInt32(MemoryLayout.size(ofValue: inFormat))
    result = AudioQueueGetProperty(recorder.mQueue,
                                   kAudioQueueProperty_StreamDescription,
                                   &inFormat, &propertySize)
    guard result == noErr else {
        return false
    }
    recorder.mDataFormat = inFormat


    // 4. 创建Audio Queue buffer并将其加入 buffer queue
    var maxPacketSize: UInt32 = 0
    propertySize = UInt32(MemoryLayout.size(ofValue: maxPacketSize))
    /// 此函数在 iOS 的playground 中会返回 -50 失败
    result = AudioQueueGetProperty(recorder.mQueue,
                                   kAudioQueueProperty_MaximumOutputPacketSize,
                                   &maxPacketSize, &propertySize);
    guard result == noErr else {
        return false
    }

    for _ in 0..<kNumberBuffers {
        var buffer: AudioQueueBufferRef? = nil
        result = AudioQueueAllocateBuffer(recorder.mQueue,
                                          maxPacketSize,
                                          &buffer)
        guard result == noErr else {
            return false
        }
        result = AudioQueueEnqueueBuffer(recorder.mQueue,
                                         buffer!,
                                         0, nil)
        guard result == noErr else {
            return false
        }

        recorder.mBuffers.append(buffer!)
    }

    guard result == noErr else {
        return false
    }

    // 5. 获取 magic cookie
    result = AudioQueueGetPropertySize(recorder.mQueue,
                                       kAudioQueueProperty_MagicCookie,
                                       &propertySize)

    guard result == noErr else {
        return false
    }

    var magicCookie = Data(count: Int(propertySize))
    result = magicCookie.withUnsafeMutableBytes { (magicCookiePtr: UnsafeMutablePointer<UInt8>) -> OSStatus in
        let rawPtr = UnsafeMutableRawPointer(magicCookiePtr)
        return AudioQueueGetProperty(recorder.mQueue,
                                     kAudioQueueProperty_MagicCookie,
                                     rawPtr, &propertySize);
    }
    guard result == noErr else {
        return false
    }

    recorder.delegate = self

    // 6. 开启 audio queue
    result = AudioQueueStart(recorder.mQueue, nil)

    guard result == noErr else {
        return false
    }
    return true
}

在 iOS 中,需要先配置 AVAudioSession,否则在调用AudioQueueStart 的时候会返回 -50 错误。

let session = AVAudioSession.sharedInstance()
try! session.setCategory(AVAudioSessionCategoryRecord)
try! session.setActive(true)

当通过ffmpeg 将aac 数据写入mp4 或者rtmp 流的时候,需要在AVCodecContext 中extradata 变量写入音频的 Audio Specific Config

我们可以非常简单地通过其公式来计算出来:

5 bits: object type
if (object type == 31)
    6 bits + 32: object type
4 bits: frequency index
if (frequency index == 15)
    24 bits: frequency
4 bits: channel configuration
var bits: AOT Specific Config

然而在使用AudioQueue 的时候,虽然mDataFormat 的mFormatFlags 开始时有初始化为 MPEG4ObjectID.AAC_LC,但是在通过 AudioQueueGetProperty 获取 kAudioQueueProperty_StreamDescription 时,这个flags 变量被置为0,因此我们并不能确保通过计算得到的结果时正确的。翻了一下ffmpeg 的源码,在audiotoolboxenc.c 文件中,是通过如下方式去解析 magicCookie 而得到的 Audio Specific Config.

static int read_descr(GetByteContext *gb, int *tag)
{
    int len = 0;
    int count = 4;
    *tag = bytestream2_get_byte(gb);
    while (count--) {
        int c = bytestream2_get_byte(gb);
        len = (len << 7) | (c & 0x7f);
        if (!(c & 0x80))
            break;
    }
    return len;
}

extradata = magicCookie.bytes;
extradata_size = magicCookie.length;
GetByteContext gb;
int tag, len;
bytestream2_init(&gb, extradata, extradata_size);
do {
    len = read_descr(&gb, &tag);
    if (tag == MP4DecConfigDescrTag) {
        bytestream2_skip(&gb, 13);
        len = read_descr(&gb, &tag);
        if (tag == MP4DecSpecificDescrTag) {
            len = FFMIN(gb.buffer_end - gb.buffer, len);
            memmove(extradata, gb.buffer, len);
            avctx->extradata_size = len;
            break;
        }
    } else if (tag == MP4ESDescrTag) {
        int flags;
        bytestream2_skip(&gb, 2);
        flags = bytestream2_get_byte(&gb);
        if (flags & 0x80) //streamDependenceFlag
            bytestream2_skip(&gb, 2);
        if (flags & 0x40) //URL_Flag
            bytestream2_skip(&gb, bytestream2_get_byte(&gb));
        if (flags & 0x20) //OCRstreamFlag
            bytestream2_skip(&gb, 2);
    }
} while (bytestream2_get_bytes_left(&gb));

参见: