Recording gapless audio with AVAssetWriter

问题

I'm trying to record segments of audio and recombine them without producing a gap in audio.

The eventual goal is to also have video, but I've found that audio itself creates gaps when combined with ffmpeg -f concat -i list.txt -c copy out.mp4

If I put the audio in an HLS playlist, there are also gaps, so I don't think this is unique to ffmpeg.

The idea is that samples come in continuously, and my controller routes samples to the proper AVAssetWriter. How do I eliminate gaps in audio?

import Foundation
import UIKit
import AVFoundation

class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
    var closingAudioInput: AVAssetWriterInput?
    var closingAssetWriter: AVAssetWriter?

    var currentAudioInput: AVAssetWriterInput?
    var currentAssetWriter: AVAssetWriter?

    var nextAudioInput: AVAssetWriterInput?
    var nextAssetWriter: AVAssetWriter?

    var videoHelper: VideoHelper?

    var startTime: NSTimeInterval = 0
    let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);

    override func viewDidLoad() {
        super.viewDidLoad()
        startTime = NSDate().timeIntervalSince1970
        createSegmentWriter()
        videoHelper = VideoHelper()
        videoHelper!.delegate = self
        videoHelper!.startSession()
        NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)
    }

    func createSegmentWriter() {
        print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
        let outputPath = OutputFileNameHelper.instance.pathForOutput()
        OutputFileNameHelper.instance.incrementSegmentIndex()
        try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
        nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
        nextAssetWriter!.shouldOptimizeForNetworkUse = true

        let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
        nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
        nextAudioInput!.expectsMediaDataInRealTime = true
        nextAssetWriter?.addInput(nextAudioInput!)

        nextAssetWriter!.startWriting()
    }

    func closeWriterIfNecessary() {
        if closing && audioFinished {
            closing = false
            audioFinished = false
            let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
            closingAssetWriter?.finishWritingWithCompletionHandler() {
                let delta = NSDate().timeIntervalSince1970 - self.startTime
                print("segment \(outputFile!) finished at t=\(delta)")
            }
            self.closingAudioInput = nil
            self.closingAssetWriter = nil
        }
    }

    var audioFinished = false
    var closing = false

    func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
        if let nextWriter = nextAssetWriter {
            if nextWriter.status.rawValue != 0 {
                if (currentAssetWriter != nil) {
                    closing = true
                }

                var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
                CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)

                print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
                closingAssetWriter = currentAssetWriter
                closingAudioInput = currentAudioInput

                currentAssetWriter = nextAssetWriter
                currentAudioInput = nextAudioInput

                nextAssetWriter = nil
                nextAudioInput = nil

                currentAssetWriter?.startSessionAtSourceTime(sampleTiming.presentationTimeStamp)
            }
        }

        if let _ = captureOutput as? AVCaptureVideoDataOutput {
        } else if let _ = captureOutput as? AVCaptureAudioDataOutput {
            captureAudioSample(sampleBuffer)
        }

        dispatch_async(closeAssetQueue) {
            self.closeWriterIfNecessary()
        }
    }

    func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
        var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
        CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
        let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)
        print("\(prefix):\(presentationTime)")
    }

    func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
        printTimingInfo(sampleBuffer, prefix: "A")
        if (closing && !audioFinished) {
            if closingAudioInput?.readyForMoreMediaData == true {
                closingAudioInput?.appendSampleBuffer(sampleBuffer)
            }
            closingAudioInput?.markAsFinished()
            audioFinished = true
        } else {
            if currentAudioInput?.readyForMoreMediaData == true {
                currentAudioInput?.appendSampleBuffer(sampleBuffer)
            }
        }
    }
}

回答1:

With packet formats like AAC you have silent priming frames (a.k.a encoder delay) at the beginning and remainder frames at the end (when your audio length is not a multiple of the packet size). In your case it's 2112 of them at the beginning of every file. Priming and remainder frames break the possibility of concatenating the files without transcoding them, so you can't really blame ffmpeg -c copy for not producing seamless output.

I'm not sure where this leaves you with video - obviously audio is synced to the video, even in the presence of priming frames.

It all depends on how you intend to concatenate the final audio (and eventually video). If you're doing it yourself using AVFoundation, then you can detect and account for priming/remainder frames using

CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL) 
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL)

As a short term solution, you can switch to a non "packetised" to get gapless, concatenatable (with ffmpeg) files.

e.g.

AVFormatIDKey: kAudioFormatAppleIMA4, fileType: AVFileTypeAIFC, suffix ".aifc" or AVFormatIDKey: kAudioFormatLinearPCM, fileType: AVFileTypeWAVE, suffix ".wav"

p.s. you can see priming & remainder frames and packet sizes using the ubiquitous afinfo tool.

afinfo chunk.mp4

Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
...
audio 39596 valid frames + 2112 priming + 276 remainder = 41984
...

回答2:

Not sure if this helps you but if you have a bunch of MP4s you can use this code to combine them:

func mergeAudioFiles(audioFileUrls: NSArray, callback: (url: NSURL?, error: NSError?)->()) {

    // Create the audio composition
    let composition = AVMutableComposition()

    // Merge
    for (var i = 0; i < audioFileUrls.count; i++) {

        let compositionAudioTrack :AVMutableCompositionTrack = composition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: CMPersistentTrackID())

        let asset = AVURLAsset(URL: audioFileUrls[i] as! NSURL)

        let track = asset.tracksWithMediaType(AVMediaTypeAudio)[0]

        let timeRange = CMTimeRange(start: CMTimeMake(0, 600), duration: track.timeRange.duration)

        try! compositionAudioTrack.insertTimeRange(timeRange, ofTrack: track, atTime: composition.duration)
    }

    // Create output url
    let format = NSDateFormatter()
    format.dateFormat="yyyy-MM-dd-HH-mm-ss"
    let currentFileName = "recording-\(format.stringFromDate(NSDate()))-merge.m4a"
    print(currentFileName)

    let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
    let outputUrl = documentsDirectory.URLByAppendingPathComponent(currentFileName)
    print(outputUrl.absoluteString)

    // Export it
    let assetExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A)
    assetExport?.outputFileType = AVFileTypeAppleM4A
    assetExport?.outputURL = outputUrl

    assetExport?.exportAsynchronouslyWithCompletionHandler({ () -> Void in
        switch assetExport!.status {
            case AVAssetExportSessionStatus.Failed:
                callback(url: nil, error: assetExport?.error)
            default:
                callback(url: assetExport?.outputURL, error: nil)
        }
    })

}

来源：https://stackoverflow.com/questions/33903737/recording-gapless-audio-with-avassetwriter

标签

ios

swift

avfoundation

avassetwriter