版本记录
版本号 | 时间 |
---|---|
V1.0 | 2019.12.04 星期三 |
前言
iOS 11+
和macOS 10.13+
新出了Vision
框架,提供了人脸识别、物体检测、物体跟踪等技术,它是基于Core ML的。可以说是人工智能的一部分,接下来几篇我们就详细的解析一下Vision框架。感兴趣的看下面几篇文章。
1. Vision框架详细解析(一) —— 基本概览(一)
2. Vision框架详细解析(二) —— 基于Vision的人脸识别(一)
3. Vision框架详细解析(三) —— 基于Vision的人脸识别(二)
4. Vision框架详细解析(四) —— 在iOS中使用Vision和Metal进行照片堆叠(一)
5. Vision框架详细解析(五) —— 在iOS中使用Vision和Metal进行照片堆叠(二)
6. Vision框架详细解析(六) —— 基于Vision的显著性分析(一)
源码
1. Swift
首先看下工程组织结构
下面是sb中的内容
最后就是源码了
1. AppDelegate.swift
import UIKit
@UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey : Any]? = nil) -> Bool {
UIApplication.shared.isIdleTimerDisabled = true
return true
}
}
2. CameraViewController.swift
import AVFoundation
import UIKit
import VideoToolbox
import Vision
enum ViewMode: String {
case original = "Original"
case heatMap = "Heat Map"
case flashlight = "Spotlight"
}
class CameraViewController: UIViewController {
var sequenceHandler = VNSequenceRequestHandler()
@IBOutlet var modeLabel: UILabel!
@IBOutlet var saliencyControl: UISegmentedControl!
@IBOutlet var frameView: UIImageView!
var mode = ViewMode.original
let session = AVCaptureSession()
var currentFrame: CIImage?
let dataOutputQueue = DispatchQueue(
label: "video data queue",
qos: .userInitiated,
attributes: [],
autoreleaseFrequency: .workItem)
override func viewDidLoad() {
super.viewDidLoad()
saliencyControl.isHidden = true
configureCaptureSession()
session.startRunning()
}
}
// MARK: - Gesture methods
extension CameraViewController {
@IBAction func handleTap(_ sender: UITapGestureRecognizer) {
saliencyControl.isHidden = false
switch mode {
case .original:
mode = .heatMap
case .heatMap:
mode = .flashlight
case .flashlight:
mode = .original
saliencyControl.isHidden = true
}
modeLabel.text = mode.rawValue
}
}
// MARK: - Video Processing methods
extension CameraViewController {
func configureCaptureSession() {
// Define the capture device we want to use
guard let camera = AVCaptureDevice.default(.builtInWideAngleCamera,
for: .video,
position: .back) else {
fatalError("No back video camera available")
}
// Connect the camera to the capture session input
do {
let cameraInput = try AVCaptureDeviceInput(device: camera)
session.addInput(cameraInput)
} catch {
fatalError(error.localizedDescription)
}
// Create the video data output
let videoOutput = AVCaptureVideoDataOutput()
videoOutput.setSampleBufferDelegate(self, queue: dataOutputQueue)
videoOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA]
// Add the video output to the capture session
session.addOutput(videoOutput)
let videoConnection = videoOutput.connection(with: .video)
videoConnection?.videoOrientation = .portrait
}
func display(frame: CIImage?) {
guard let frame = frame else {
return
}
DispatchQueue.main.async {
self.frameView.image = UIImage(ciImage: frame)
}
}
}
// MARK: - AVCaptureVideoDataOutputSampleBufferDelegate methods
extension CameraViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
currentFrame = CIImage(cvImageBuffer: imageBuffer)
if mode == .original {
display(frame: currentFrame)
return
}
let req: VNImageBasedRequest
var selectedSegmentIndex = 0
DispatchQueue.main.sync {
selectedSegmentIndex = saliencyControl.selectedSegmentIndex
}
switch selectedSegmentIndex {
case 0:
req = VNGenerateAttentionBasedSaliencyImageRequest(completionHandler: handleSaliency)
case 1:
req = VNGenerateObjectnessBasedSaliencyImageRequest(completionHandler: handleSaliency)
default:
fatalError("Unhandled segment index!")
}
do {
try sequenceHandler.perform(
[req],
on: imageBuffer,
orientation: .up)
} catch {
print(error.localizedDescription)
}
}
}
// MARK: - Vision methods
extension CameraViewController {
func showHeatMap(with heatMap: CIImage) {
guard let frame = currentFrame else {
return
}
let yellowHeatMap = heatMap
.applyingFilter("CIColorMatrix", parameters:
["inputBVector": CIVector(x: 0, y: 0, z: 0, w: 0),
"inputAVector": CIVector(x: 0, y: 0, z: 0, w: 0.7)])
.composited(over: frame)
display(frame: yellowHeatMap)
}
func showFlashlight(with heatMap: CIImage) {
guard let frame = currentFrame else {
return
}
let mask = heatMap
.applyingFilter("CIColorMatrix", parameters:
["inputAVector": CIVector(x: 0, y: 0, z: 0, w: 2)])
let spotlight = frame.applyingFilter("CIBlendWithMask", parameters: ["inputMaskImage": mask])
display(frame: spotlight)
}
func handleSaliency(request: VNRequest, error: Error?) {
guard
let results = request.results as? [VNSaliencyImageObservation],
let result = results.first
else {
return
}
guard let targetExtent = currentFrame?.extent else {
return
}
result.pixelBuffer.normalize()
var ciImage = CIImage(cvImageBuffer: result.pixelBuffer)
let heatmapExtent = ciImage.extent
let scaleX = targetExtent.width / heatmapExtent.width
let scaleY = targetExtent.height / heatmapExtent.height
ciImage = ciImage
.transformed(by: CGAffineTransform(scaleX: scaleX, y: scaleY))
.applyingGaussianBlur(sigma: 20.0)
.cropped(to: targetExtent)
switch mode {
case .heatMap:
showHeatMap(with: ciImage)
case .flashlight:
showFlashlight(with: ciImage)
default:
break
}
}
}
3. CVPixelBufferExtension.swift
import CoreVideo
extension CVPixelBuffer {
func normalize() {
let bytesPerRow = CVPixelBufferGetBytesPerRow(self)
let totalBytes = CVPixelBufferGetDataSize(self)
let width = bytesPerRow / MemoryLayout<Float>.size
let height = totalBytes / bytesPerRow
CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
let floatBuffer = unsafeBitCast(CVPixelBufferGetBaseAddress(self), to: UnsafeMutablePointer<Float>.self)
var minPixel: Float = 1.0
var maxPixel: Float = 0.0
for i in 0 ..< width * height {
let pixel = floatBuffer[i]
minPixel = min(pixel, minPixel)
maxPixel = max(pixel, maxPixel)
}
let range = maxPixel - minPixel
for i in 0 ..< width * height {
let pixel = floatBuffer[i]
floatBuffer[i] = (pixel - minPixel) / range
}
CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0))
}
func printDebugInfo() {
let width = CVPixelBufferGetWidth(self)
let height = CVPixelBufferGetHeight(self)
let bytesPerRow = CVPixelBufferGetBytesPerRow(self)
let totalBytes = CVPixelBufferGetDataSize(self)
print("Heat Map Info: \(width)x\(height)")
print("Bytes per Row: \(bytesPerRow)")
print(" Total Bytes: \(totalBytes)")
}
}
后记
本篇主要讲述了基于Vision的显著性分析,感兴趣的给个赞或者关注~~~