过去半年参与的一个项目中有遇到并解决了一系列关于音频文件的操作方式。相关资料在网上很难找到,所以先整理下以方便今后查漏补缺。
1.如果你需要分析的音频文件是在服务端,那需要先将文件下载在本地,所以我们先实现根据URL地址下载音频文件的功能。
import AVKit
typealias AudioDownloadedCallback = (URL?) -> ()
extension AVAsset {
static func downloadAudioToLocal(audioURL:URL,downloadCallback:@escaping AudioDownloadedCallback) {
// create your destination file url
let destinationUrl = FileManager.getDocumentPathWithFileName(sFileName: audioURL.lastPathComponent)
// to check if it exists before downloading it
if FileManager.default.fileExists(atPath: destinationUrl.path) {
//The file already exists at path
downloadCallback(destinationUrl)
} else {
//download audio file.
URLSession.shared.downloadTask(with: audioURL, completionHandler: { (location, response, error) -> Void in
guard let location = location, error == nil else { return }
do {
//File moved to documents folder
try FileManager.default.moveItem(at: location, to: destinationUrl)
downloadCallback(destinationUrl)
} catch let error as NSError {
print(error.localizedDescription)
downloadCallback(nil)
}
}).resume()
}
}
}
extension FileManager {
static func getDocumentPathWithFileName(sFileName:String) -> URL {
let fileMgr = FileManager.default
let dirPaths = fileMgr.urls(for: .documentDirectory,
in: .userDomainMask).first!
let filePath = dirPaths.appendingPathComponent(sFileName)
return filePath
}
}
文件有了后可以开始读取文件。这里有两种方式:
1.通过AVAssetReader
进行读取并用NSMutableData
记录下来。
2.生成AVAudioFile
音频实例,配合AVAudioPCMBuffer
进行分段读取。
两者的区别在于:第一种方式是整体读取,当AVAssetReader
开始执行读取后,读取的内容段并不受控制,每段读取的长度由CMBlockBufferGetDataLength()
计算好了返回。而第二种方式可以自己定义读取的起始点和步长,你甚至可以选择性的重复读取。具体使用哪个看自己的业务场景。
整体读取AVAssetReader
+ NSMutableData
import UIKit
import AVFoundation
import AVKit
extension AVURLAsset {
func readAudioBuffer() -> NSMutableData {
var reader:AVAssetReader?
do {
reader = try AVAssetReader(asset: self)
} catch let error{
print("Create asset reader failed.\(error.localizedDescription)")
}
let sampleData = NSMutableData()
if reader != nil {
//we only read one tracks from the audio file. If your file is not mone and you need to read them all. just duplicate operation once again.
let songTrack:AVAssetTrack = self.tracks[0]
let readerOutputSettings: [String: Int] = [ AVFormatIDKey: Int(kAudioFormatLinearPCM),
AVLinearPCMBitDepthKey: 16,
AVLinearPCMIsBigEndianKey: 0,
AVLinearPCMIsFloatKey: 0,
AVLinearPCMIsNonInterleaved: 0]
let output = AVAssetReaderTrackOutput(track: songTrack, outputSettings: readerOutputSettings)
reader?.add(output)
reader?.startReading()
while reader?.status == AVAssetReader.Status.reading {
if let sampleBufferRef = output.copyNextSampleBuffer() {
if let blockBufferRef = CMSampleBufferGetDataBuffer(sampleBufferRef) {
let bufferLength = CMBlockBufferGetDataLength(blockBufferRef)
let data = NSMutableData(length: bufferLength)
CMBlockBufferCopyDataBytes(blockBufferRef, atOffset: 0, dataLength: bufferLength, destination: (data?.mutableBytes)!)
let samples = UnsafeMutablePointer<Int16>(OpaquePointer(UnsafeMutableRawPointer(data!.mutableBytes)))
sampleData.append(samples, length: bufferLength)
CMSampleBufferInvalidate(sampleBufferRef)
}
}
}
}
return sampleData
}
}
PS:代码中注释处标明这段代码只是读第0条音轨。如果你需要分析的文件是立体声的(也就是存在两条或以上的音轨),需要针对不同的音轨进行调整。
效果:
拿到结果后可以根据自己需求进行再加工,比如我自己实现了一个根据UI要求的高宽来对音频数据进行偏移处理。
extension NSMutableData {
func toInt16Sequence(size:CGSize) -> [Float] {
var filteredSamplesMA:[Float] = [Float]()
let sampleCount = self.length / MemoryLayout<Int16>.size
let binSize = Float(sampleCount) / (Float(size.width) * 0.5)
var i = 0
while i < sampleCount {
let rangeData = self.subdata(with: NSRange(location: i, length: 1))
let item = rangeData.withUnsafeBytes({ (ptr: UnsafePointer<Int>) -> Int in
return ptr.pointee
})
filteredSamplesMA.append(Float(item))
i += Int(binSize)
}
let result = NSMutableData.trackScale(size: size, source: filteredSamplesMA)
return result
}
private static func trackScale(size: CGSize, source: [Float]) -> [Float] {
if let max = source.max() {
let k = Float(size.height) / max
return source.map{ $0 * k }
}
return source
}
}
最终效果:
有了这些数据后可以去UI层进行相应的绘制,或者保存下来进行其他方式的分析。
接下来介绍第二种方式 AVAudioFile
+ AVAudioPCMBuffer
这种方式不同于第一种整体读取,而是可以自己指定每次读取的起始位置和长度,相比较而言更加灵活一些。
import AVFoundation
extension AVAudioFile {
static func readAmplitude(audioURL:URL) -> NSMutableData {
let asset = AVURLAsset(url: audioURL)
var file:AVAudioFile!
do{
file = try AVAudioFile(forReading: audioURL)
} catch let error{
print("AVAudioFile create failed \(error.localizedDescription)")
}
let amplitudes:NSMutableData = NSMutableData()
//FPS
let frameCountPeiSecond = 30.0
//Calculate totoal frame of the audio file.
let frameCountTotal = asset.duration.seconds * frameCountPeiSecond
//merge samples of each frame
let sampleForEachFrame = Double(file.length.magnitude) / frameCountTotal
var sampleReadIndex:Double = 0.0
while sampleReadIndex < Double(file.length) {
let audioBuffer:AVAudioPCMBuffer = AVAudioPCMBuffer(pcmFormat: file.processingFormat, frameCapacity: AVAudioFrameCount(sampleForEachFrame))!
audioBuffer.frameLength = UInt32(sampleForEachFrame)
//adjust frame position each time
file.framePosition = AVAudioFramePosition(sampleReadIndex)
do{
//read same size buffer of each time
try file.read(into: audioBuffer, frameCount: UInt32(sampleForEachFrame))
amplitudes.append((audioBuffer.floatChannelData?.pointee)!, length: Int(sampleForEachFrame))
} catch let error{
print("read to buffer failed \(error.localizedDescription)")
}
//adding up
sampleReadIndex = Double(sampleReadIndex + sampleForEachFrame)
}
return amplitudes
}
}
结果打印
到此数据就都分析出来了,接下来可以随意选用适合自己场景的第三方框架显示出对应的分析图出来。