问题
I am working on a app which has following requirements :
- Record real time audio from iOS device (iPhone)
- Encode this audio data to Opus data and send it to server over WebSocket
- Decode received data to pcm again
- Play received audio from WebSocket server on iOS device(iPhone)
I've used AVAudioEngine
for this.
var engine = AVAudioEngine()
var input: AVAudioInputNode = engine.inputNode
var format: AVAudioFormat = input.outputFormat(forBus: AVAudioNodeBus(0))
input.installTap(onBus: AVAudioNodeBus(0), bufferSize: AVAudioFrameCount(8192), format: format, block: { buf, when in
// ‘buf' contains audio captured from input node at time 'when'
})
// start engine
And I converted AVAudioPCMBuffer to Data using this function
func toData(PCMBuffer: AVAudioPCMBuffer) -> Data {
let channelCount = 1
let channels = UnsafeBufferPointer(start: PCMBuffer.floatChannelData, count: channelCount)
let ch0Data = NSData(bytes: channels[0], length:Int(PCMBuffer.frameLength * PCMBuffer.format.streamDescription.pointee.mBytesPerFrame))
return ch0Data as Data
}
I've found Opus Library from CocoaPod libopus
libopus
I have searched a lot on how to use OpusCodec in IOS but haven't got the solution.
How to encode and decode this data using OpusCodec ? And Am I need jitterBuffer ? If I need How to use it in IOS
This Code for Opus Codec but voice doesn't clear
#import "OpusManager.h"
#import <opus/opus.h>
#define SAMPLE_RATE 16000
#define CHANNELS 1
#define BITRATE SAMPLE_RATE * CHANNELS
/**
* Audio frame size
* It is divided by time. When calling, you must use the audio data of
exactly one frame (multiple of 2.5ms: 2.5, 5, 10, 20, 40, 60ms).
* Fs/ms 2.5 5 10 20 40 60
* 8kHz 20 40 80 160 320 480
* 16kHz 40 80 160 320 640 960
* 24KHz 60 120 240 480 960 1440
* 48kHz 120 240 480 960 1920 2880
*/
#define FRAME_SIZE 320
#define APPLICATION OPUS_APPLICATION_VOIP
#define MAX_PACKET_BYTES (FRAME_SIZE * CHANNELS * sizeof(float))
#define MAX_FRAME_SIZE (FRAME_SIZE * CHANNELS * sizeof(float))
typedef opus_int16 OPUS_DATA_SIZE_T;
@implementation OpusManager {
OpusEncoder *_encoder;
OpusDecoder *_decoder;
}
int size;
int error;
unsigned char encodedPacket[MAX_PACKET_BYTES];
- (instancetype)init {
self = [super init];
if (self) {
size = opus_encoder_get_size(CHANNELS);
_encoder = malloc(size);
error = opus_encoder_init(_encoder, SAMPLE_RATE, CHANNELS, APPLICATION);
_encoder = opus_encoder_create(SAMPLE_RATE, CHANNELS, APPLICATION, &error);
_decoder = opus_decoder_create(SAMPLE_RATE, CHANNELS, &error);
opus_encoder_ctl(_encoder, OPUS_SET_BITRATE(BITRATE));
opus_encoder_ctl(_encoder, OPUS_SET_COMPLEXITY(10));
opus_encoder_ctl(_encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
opus_encoder_ctl(_encoder, OPUS_SET_VBR(0));
opus_encoder_ctl(_encoder, OPUS_SET_APPLICATION(APPLICATION));
opus_encoder_ctl(_encoder, OPUS_SET_DTX(1));
opus_encoder_ctl(_encoder, OPUS_SET_INBAND_FEC(0));
opus_encoder_ctl(_encoder, OPUS_SET_BANDWIDTH(12000));
opus_encoder_ctl(_encoder, OPUS_SET_PACKET_LOSS_PERC(1));
opus_encoder_ctl(_encoder, OPUS_SET_INBAND_FEC(1));
opus_encoder_ctl(_encoder, OPUS_SET_FORCE_CHANNELS(CHANNELS));
opus_encoder_ctl(_encoder, OPUS_SET_PACKET_LOSS_PERC(1));
}
return self;
}
- (NSData *)encode:(NSData *)PCM {
opus_int16 *PCMPtr = (opus_int16 *)PCM.bytes;
int PCMSize = (int)PCM.length / sizeof(opus_int16);
opus_int16 *PCMEnd = PCMPtr + PCMSize;
NSMutableData *mutData = [NSMutableData data];
unsigned char encodedPacket[MAX_PACKET_BYTES];
// Record opus block size
OPUS_DATA_SIZE_T encodedBytes = 0;
while (PCMPtr + FRAME_SIZE < PCMEnd) {
encodedBytes = opus_encode_float(_encoder, (const float *) PCMPtr, FRAME_SIZE, encodedPacket, MAX_PACKET_BYTES);
if (encodedBytes <= 0) {
NSLog(@"ERROR: encodedBytes<=0");
return nil;
}
NSLog(@"encodedBytes: %d", encodedBytes);
// Save the opus block size
[mutData appendBytes:&encodedBytes length:sizeof(encodedBytes)];
// Save opus data
[mutData appendBytes:encodedPacket length:encodedBytes];
PCMPtr += FRAME_SIZE;
}
NSLog(@"mutData: %lu", (unsigned long)mutData.length);
NSLog(@"encodedPacket: %s", encodedPacket);
return mutData.length > 0 ? mutData : nil;
}
- (NSData *)decode:(NSData *)opus {
unsigned char *opusPtr = (unsigned char *)opus.bytes;
int opusSize = (int)opus.length;
unsigned char *opusEnd = opusPtr + opusSize;
NSMutableData *mutData = [NSMutableData data];
float decodedPacket[MAX_FRAME_SIZE];
int decodedSamples = 0;
// Save data for opus block size
OPUS_DATA_SIZE_T nBytes = 0;
while (opusPtr < opusEnd) {
// Take out the opus block size data
nBytes = *(OPUS_DATA_SIZE_T *)opusPtr;
opusPtr += sizeof(nBytes);
decodedSamples = opus_decode_float(_decoder, opusPtr, nBytes,decodedPacket, MAX_FRAME_SIZE, 0);
if (decodedSamples <= 0) {
NSLog(@"ERROR: decodedSamples<=0");
return nil;
}
NSLog(@"decodedSamples:%d", decodedSamples);
[mutData appendBytes:decodedPacket length:decodedSamples *sizeof(opus_int16)];
opusPtr += nBytes;
}
NSLog(@"mutData: %lu", (unsigned long)mutData.length);
return mutData.length > 0 ? mutData : nil;
}
@end
回答1:
Try to lower the bandwidth or set an higher bitrate. I think 16kbit for a 12kHz bandwidth mono audio is probably too low. Think it will be better to leave bandwidth to auto with application VOIP setted. There could be other problems around but the "doesn't sound good" is not enough to analyze.
来源:https://stackoverflow.com/questions/55692517/how-to-encode-and-decode-real-time-audio-using-opuscodec-in-ios