问题
I'm trying to record segments of audio and recombine them without producing a gap in audio.
The eventual goal is to also have video, but I've found that audio itself creates gaps when combined with ffmpeg -f concat -i list.txt -c copy out.mp4
If I put the audio in an HLS playlist, there are also gaps, so I don't think this is unique to ffmpeg.
The idea is that samples come in continuously, and my controller routes samples to the proper AVAssetWriter
. How do I eliminate gaps in audio?
import Foundation
import UIKit
import AVFoundation
class StreamController: UIViewController, AVCaptureAudioDataOutputSampleBufferDelegate, AVCaptureVideoDataOutputSampleBufferDelegate {
var closingAudioInput: AVAssetWriterInput?
var closingAssetWriter: AVAssetWriter?
var currentAudioInput: AVAssetWriterInput?
var currentAssetWriter: AVAssetWriter?
var nextAudioInput: AVAssetWriterInput?
var nextAssetWriter: AVAssetWriter?
var videoHelper: VideoHelper?
var startTime: NSTimeInterval = 0
let closeAssetQueue: dispatch_queue_t = dispatch_queue_create("closeAssetQueue", nil);
override func viewDidLoad() {
super.viewDidLoad()
startTime = NSDate().timeIntervalSince1970
createSegmentWriter()
videoHelper = VideoHelper()
videoHelper!.delegate = self
videoHelper!.startSession()
NSTimer.scheduledTimerWithTimeInterval(1, target: self, selector: "createSegmentWriter", userInfo: nil, repeats: true)
}
func createSegmentWriter() {
print("Creating segment writer at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
let outputPath = OutputFileNameHelper.instance.pathForOutput()
OutputFileNameHelper.instance.incrementSegmentIndex()
try? NSFileManager.defaultManager().removeItemAtPath(outputPath)
nextAssetWriter = try! AVAssetWriter(URL: NSURL(fileURLWithPath: outputPath), fileType: AVFileTypeMPEG4)
nextAssetWriter!.shouldOptimizeForNetworkUse = true
let audioSettings: [String:AnyObject] = EncodingSettings.AUDIO
nextAudioInput = AVAssetWriterInput(mediaType: AVMediaTypeAudio, outputSettings: audioSettings)
nextAudioInput!.expectsMediaDataInRealTime = true
nextAssetWriter?.addInput(nextAudioInput!)
nextAssetWriter!.startWriting()
}
func closeWriterIfNecessary() {
if closing && audioFinished {
closing = false
audioFinished = false
let outputFile = closingAssetWriter?.outputURL.pathComponents?.last
closingAssetWriter?.finishWritingWithCompletionHandler() {
let delta = NSDate().timeIntervalSince1970 - self.startTime
print("segment \(outputFile!) finished at t=\(delta)")
}
self.closingAudioInput = nil
self.closingAssetWriter = nil
}
}
var audioFinished = false
var closing = false
func captureOutput(captureOutput: AVCaptureOutput!, didOutputSampleBuffer sampleBuffer: CMSampleBufferRef, fromConnection connection: AVCaptureConnection!) {
if let nextWriter = nextAssetWriter {
if nextWriter.status.rawValue != 0 {
if (currentAssetWriter != nil) {
closing = true
}
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
print("Switching asset writers at t=\(NSDate().timeIntervalSince1970 - self.startTime)")
closingAssetWriter = currentAssetWriter
closingAudioInput = currentAudioInput
currentAssetWriter = nextAssetWriter
currentAudioInput = nextAudioInput
nextAssetWriter = nil
nextAudioInput = nil
currentAssetWriter?.startSessionAtSourceTime(sampleTiming.presentationTimeStamp)
}
}
if let _ = captureOutput as? AVCaptureVideoDataOutput {
} else if let _ = captureOutput as? AVCaptureAudioDataOutput {
captureAudioSample(sampleBuffer)
}
dispatch_async(closeAssetQueue) {
self.closeWriterIfNecessary()
}
}
func printTimingInfo(sampleBuffer: CMSampleBufferRef, prefix: String) {
var sampleTiming: CMSampleTimingInfo = kCMTimingInfoInvalid
CMSampleBufferGetSampleTimingInfo(sampleBuffer, 0, &sampleTiming)
let presentationTime = Double(sampleTiming.presentationTimeStamp.value) / Double(sampleTiming.presentationTimeStamp.timescale)
print("\(prefix):\(presentationTime)")
}
func captureAudioSample(sampleBuffer: CMSampleBufferRef) {
printTimingInfo(sampleBuffer, prefix: "A")
if (closing && !audioFinished) {
if closingAudioInput?.readyForMoreMediaData == true {
closingAudioInput?.appendSampleBuffer(sampleBuffer)
}
closingAudioInput?.markAsFinished()
audioFinished = true
} else {
if currentAudioInput?.readyForMoreMediaData == true {
currentAudioInput?.appendSampleBuffer(sampleBuffer)
}
}
}
}
回答1:
With packet formats like AAC you have silent priming frames (a.k.a encoder delay) at the beginning and remainder frames at the end (when your audio length is not a multiple of the packet size). In your case it's 2112 of them at the beginning of every file. Priming and remainder frames break the possibility of concatenating the files without transcoding them, so you can't really blame ffmpeg -c copy
for not producing seamless output.
I'm not sure where this leaves you with video - obviously audio is synced to the video, even in the presence of priming frames.
It all depends on how you intend to concatenate the final audio (and eventually video). If you're doing it yourself using AVFoundation
, then you can detect and account for priming/remainder frames using
CMGetAttachment(buffer, kCMSampleBufferAttachmentKey_TrimDurationAtStart, NULL)
CMGetAttachment(audioBuffer, kCMSampleBufferAttachmentKey_TrimDurationAtEnd, NULL)
As a short term solution, you can switch to a non "packetised" to get gapless, concatenatable (with ffmpeg) files.
e.g.
AVFormatIDKey: kAudioFormatAppleIMA4
, fileType: AVFileTypeAIFC
, suffix ".aifc" or
AVFormatIDKey: kAudioFormatLinearPCM
, fileType: AVFileTypeWAVE
, suffix ".wav"
p.s. you can see priming & remainder frames and packet sizes using the ubiquitous afinfo
tool.
afinfo chunk.mp4
Data format: 2 ch, 44100 Hz, 'aac ' (0x00000000) 0 bits/channel, 0 bytes/packet, 1024 frames/packet, 0 bytes/frame
...
audio 39596 valid frames + 2112 priming + 276 remainder = 41984
...
回答2:
Not sure if this helps you but if you have a bunch of MP4s you can use this code to combine them:
func mergeAudioFiles(audioFileUrls: NSArray, callback: (url: NSURL?, error: NSError?)->()) {
// Create the audio composition
let composition = AVMutableComposition()
// Merge
for (var i = 0; i < audioFileUrls.count; i++) {
let compositionAudioTrack :AVMutableCompositionTrack = composition.addMutableTrackWithMediaType(AVMediaTypeAudio, preferredTrackID: CMPersistentTrackID())
let asset = AVURLAsset(URL: audioFileUrls[i] as! NSURL)
let track = asset.tracksWithMediaType(AVMediaTypeAudio)[0]
let timeRange = CMTimeRange(start: CMTimeMake(0, 600), duration: track.timeRange.duration)
try! compositionAudioTrack.insertTimeRange(timeRange, ofTrack: track, atTime: composition.duration)
}
// Create output url
let format = NSDateFormatter()
format.dateFormat="yyyy-MM-dd-HH-mm-ss"
let currentFileName = "recording-\(format.stringFromDate(NSDate()))-merge.m4a"
print(currentFileName)
let documentsDirectory = NSFileManager.defaultManager().URLsForDirectory(.DocumentDirectory, inDomains: .UserDomainMask)[0]
let outputUrl = documentsDirectory.URLByAppendingPathComponent(currentFileName)
print(outputUrl.absoluteString)
// Export it
let assetExport = AVAssetExportSession(asset: composition, presetName: AVAssetExportPresetAppleM4A)
assetExport?.outputFileType = AVFileTypeAppleM4A
assetExport?.outputURL = outputUrl
assetExport?.exportAsynchronouslyWithCompletionHandler({ () -> Void in
switch assetExport!.status {
case AVAssetExportSessionStatus.Failed:
callback(url: nil, error: assetExport?.error)
default:
callback(url: assetExport?.outputURL, error: nil)
}
})
}
来源:https://stackoverflow.com/questions/33903737/recording-gapless-audio-with-avassetwriter