39

How can I do face detection in realtime just as "Camera" does?

enter image description here

I noticed that AVCaptureStillImageOutput is deprecated after 10.0, so I use AVCapturePhotoOutput instead. However, I found that the image I saved for facial detection is not so satisfied? Any ideas?


UPDATE

After giving a try of @Shravya Boggarapu mentioned. Currently, I use AVCaptureMetadataOutput to detect the face without CIFaceDetector. It works as expected. However, when I'm trying to draw bounds of the face, it seems mislocated. Any idea?

enter image description here

let metaDataOutput = AVCaptureMetadataOutput()

captureSession.sessionPreset = AVCaptureSessionPresetPhoto
    let backCamera = AVCaptureDevice.defaultDevice(withDeviceType: .builtInWideAngleCamera, mediaType: AVMediaTypeVideo, position: .back)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {
            captureSession.addInput(input)

            // MetadataOutput instead
            if(captureSession.canAddOutput(metaDataOutput)) {
                captureSession.addOutput(metaDataOutput)

                metaDataOutput.setMetadataObjectsDelegate(self, queue: DispatchQueue.main)
                metaDataOutput.metadataObjectTypes = [AVMetadataObjectTypeFace]

                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.frame = cameraView.bounds
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill

                cameraView.layer.addSublayer(previewLayer!)
                captureSession.startRunning()
            }

        }

    } catch {
        print(error.localizedDescription)
    }

and

extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
        findFaceControl = false
        for metadataObject in metadataObjects {
            if (metadataObject as AnyObject).type == AVMetadataObjectTypeFace {
                print("")
                print(metadataObject)
                let bounds = (metadataObject as! AVMetadataFaceObject).bounds
                print("origin x: \(bounds.origin.x)")
                print("origin y: \(bounds.origin.y)")
                print("size width: \(bounds.size.width)")
                print("size height: \(bounds.size.height)")
                print("cameraView width: \(self.cameraView.frame.width)")
                print("cameraView height: \(self.cameraView.frame.height)")
                var face = CGRect()
                face.origin.x = bounds.origin.x * self.cameraView.frame.width
                face.origin.y = bounds.origin.y * self.cameraView.frame.height
                face.size.width = bounds.size.width * self.cameraView.frame.width
                face.size.height = bounds.size.height * self.cameraView.frame.height
                print(face)

                showBounds(at: face)
            }
        }
    }

}
}

Original

see in Github

var captureSession = AVCaptureSession()
var photoOutput = AVCapturePhotoOutput()
var previewLayer: AVCaptureVideoPreviewLayer?    

override func viewWillAppear(_ animated: Bool) {
    super.viewWillAppear(true)

    captureSession.sessionPreset = AVCaptureSessionPresetHigh

    let backCamera = AVCaptureDevice.defaultDevice(withMediaType: AVMediaTypeVideo)
    do {
        let input = try AVCaptureDeviceInput(device: backCamera)

        if (captureSession.canAddInput(input)) {
            captureSession.addInput(input)

            if(captureSession.canAddOutput(photoOutput)){
                captureSession.addOutput(photoOutput)
                captureSession.startRunning()

                previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
                previewLayer?.videoGravity = AVLayerVideoGravityResizeAspectFill
                previewLayer?.frame = cameraView.bounds

                cameraView.layer.addSublayer(previewLayer!)
            }
        }

    } catch {
        print(error.localizedDescription)
    }

}

func captureImage() {
    let settings = AVCapturePhotoSettings()
    let previewPixelType = settings.availablePreviewPhotoPixelFormatTypes.first!
    let previewFormat = [kCVPixelBufferPixelFormatTypeKey as String: previewPixelType
                         ]
    settings.previewPhotoFormat = previewFormat
    photoOutput.capturePhoto(with: settings, delegate: self)

}



func capture(_ captureOutput: AVCapturePhotoOutput, didFinishProcessingPhotoSampleBuffer photoSampleBuffer: CMSampleBuffer?, previewPhotoSampleBuffer: CMSampleBuffer?, resolvedSettings: AVCaptureResolvedPhotoSettings, bracketSettings: AVCaptureBracketedStillImageSettings?, error: Error?) {
    if let error = error {
        print(error.localizedDescription)
    }
    // Not include previewPhotoSampleBuffer
    if let sampleBuffer = photoSampleBuffer,
        let dataImage = AVCapturePhotoOutput.jpegPhotoDataRepresentation(forJPEGSampleBuffer: sampleBuffer, previewPhotoSampleBuffer: nil) {
            self.imageView.image = UIImage(data: dataImage)
            self.imageView.isHidden = false
            self.previewLayer?.isHidden = true
            self.findFace(img: self.imageView.image!)
        }
}

The findFace works with normal image. However, the image I capture via camera will not work or sometimes only recognize one face.

Normal Image

enter image description here

Capture Image

enter image description here

func findFace(img: UIImage) {
    guard let faceImage = CIImage(image: img) else { return }
    let accuracy = [CIDetectorAccuracy: CIDetectorAccuracyHigh]
    let faceDetector = CIDetector(ofType: CIDetectorTypeFace, context: nil, options: accuracy)


    // For converting the Core Image Coordinates to UIView Coordinates
    let detectedImageSize = faceImage.extent.size
    var transform = CGAffineTransform(scaleX: 1, y: -1)
    transform = transform.translatedBy(x: 0, y: -detectedImageSize.height)


    if let faces = faceDetector?.features(in: faceImage, options: [CIDetectorSmile: true, CIDetectorEyeBlink: true]) {
        for face in faces as! [CIFaceFeature] {

            // Apply the transform to convert the coordinates
            var faceViewBounds =  face.bounds.applying(transform)
            // Calculate the actual position and size of the rectangle in the image view
            let viewSize = imageView.bounds.size
            let scale = min(viewSize.width / detectedImageSize.width,
                            viewSize.height / detectedImageSize.height)
            let offsetX = (viewSize.width - detectedImageSize.width * scale) / 2
            let offsetY = (viewSize.height - detectedImageSize.height * scale) / 2

            faceViewBounds = faceViewBounds.applying(CGAffineTransform(scaleX: scale, y: scale))
            print("faceBounds = \(faceViewBounds)")
            faceViewBounds.origin.x += offsetX
            faceViewBounds.origin.y += offsetY

            showBounds(at: faceViewBounds)
        }

        if faces.count != 0 {
            print("Number of faces: \(faces.count)")
        } else {
            print("No faces ")
        }
    }


}

func showBounds(at bounds: CGRect) {
    let indicator = UIView(frame: bounds)
    indicator.frame =  bounds
    indicator.layer.borderWidth = 3
    indicator.layer.borderColor = UIColor.red.cgColor
    indicator.backgroundColor = .clear

    self.imageView.addSubview(indicator)
    faceBoxes.append(indicator)

}
Willjay
  • 5,865
  • 4
  • 30
  • 55
  • you should use `CIDetector` to detecte the face. – aircraft Dec 28 '16 at 03:02
  • Here's a link that has an example using Core Image face detection from a live video feed. It's from iOS 5 days, so it's obviously both dated and in Objective-C, but if you've worked with CI before, you could probably translate it. http://www.icapps.com/face-detection-with-core-image-on-live-video/. Sorry, hit return not realizing it equates to an edit. Here's a second link to help with using Swift 2 and applying CI filters to a camera feed: http://flexmonkey.blogspot.com/2015/07/applying-cifilters-to-live-camera-feed.html?q=camera – dfd Dec 28 '16 at 03:03
  • Use this example from [here](https://github.com/shinobicontrols/iOS8-day-by-day/blob/master/13-coreimage-detectors/13-coreimage-detectors.md). This example has live detection for rectangles/squares and qr codes but you can easily tweak that out to detect faces. You can use this example to change overlays and all sorts of other stuff too, its very customizable. Hope this helps :D – Munib Jan 04 '17 at 04:19
  • You're forcing a smile and an eye blink face by filtering the results using: `options: [CIDetectorSmile: true, CIDetectorEyeBlink: true]`. Is that what you want? This could lead to poor results while detecting faces. – ricardopereira Jan 11 '17 at 09:19
  • I've set the `options: nil` you mentioned, but it still not work as expected – Willjay Jan 11 '17 at 09:22
  • See my answer. I tested the code and it's working. – ricardopereira Jan 11 '17 at 12:40
  • would you answer this featured question http://stackoverflow.com/questions/41238781/real-time-face-detection-in-iphone-does-not-work – Xcodian Solangi Jan 18 '17 at 08:00
  • I've update an [solution](https://github.com/Weijay/AppleFaceDetection) with `Image` framework with iOS11. It just woks :D – Willjay Jun 09 '17 at 06:31

6 Answers6

12

There are two ways to detect faces: CIFaceDetector and AVCaptureMetadataOutput. Depending on your requirements, choose what is relevant for you.

CIFaceDetector has more features, it gives you the location of the eyes and mouth, a smile detector, etc.

On the other hand, AVCaptureMetadataOutput is computed on the frames and the detected faces are tracked and there is no extra code to be added by us. I find that, because of tracking. faces are detected more reliably in this process. The downside of this is that you will simply detect faces, no the position of the eyes or mouth. Another advantage of this method is that orientation issues are smaller as you can use videoOrientation whenever the device orientation changes and the orientation of the faces will relative to that orientation.

In my case, my application uses YUV420 as the required format so using CIDetector (which works with RGB) in real-time was not viable. Using AVCaptureMetadataOutput saved a lot of effort and performed more reliably due to continuous tracking.

Once I had the bounding box for the faces, I coded extra features, such as skin detection and applied it on the still image.

Note: When you capture a still image, the face box information is added along with the metadata so there are no sync issues.

You can also use a combination of the two to get better results.

Explore and evaluate the pros and cons as per your application.


The face rectangle is wrt image origin. So, for the screen, it may be different. Use:

for (AVMetadataFaceObject *faceFeatures in metadataObjects) {
    CGRect face = faceFeatures.bounds;
    CGRect facePreviewBounds = CGRectMake(face.origin.y * previewLayerRect.size.width,
                               face.origin.x * previewLayerRect.size.height,
                               face.size.width * previewLayerRect.size.height,
                               face.size.height * previewLayerRect.size.width);

    /* Draw rectangle facePreviewBounds on screen */
}
the Tin Man
  • 150,910
  • 39
  • 198
  • 279
Shravya Boggarapu
  • 474
  • 1
  • 7
  • 17
  • 1
    I set the `metadataObjectTypes` to `[AVMetadataObjectTypeFace]`. Also, `didOutputMetadataObjects` will be called after faces found. However, how can I draw a rectangle onto the screen? – Willjay Jan 25 '17 at 06:58
  • In [iOS7-day-by-day](https://github.com/shinobicontrols/iOS7-day-by-day/blob/master/18-coreimage-features/SimleyFace/SimleyFace/SCViewController.m), it detects faces using `AVCaptureMetadataOutput `, and use `CIFaceDetector ` afterwards with `AVCaptureStillImageOutput ` – Willjay Jan 25 '17 at 07:01
  • My question is that even though I detect the faces through `AVCaptureMetadataOutput ` , when I capture it with `AVCapturePhotoOutput `, then I want to draw the face location with rectangle use `CIFaceDetector `, the CIFaceDetector did not work as expected. – Willjay Jan 25 '17 at 07:04
  • Use the delegate function in the protocol of AVCaptureMetadataOutputObjectsDelegate to get continuous output. USe this to show faces continuously – Shravya Boggarapu Jan 25 '17 at 11:09
  • When you finally capture an image, print out the metadata in the in the CMSampleBuffer(use CMSampleBufferGetSampleAttachmentsArray). My workspace uses older API but I believe the metadata would not change due to the change in version. In any case, I will try updating it to use the iOS 10 API and then let you know – Shravya Boggarapu Jan 25 '17 at 11:17
  • I updated my question above. I currently try to draw the bounds using `AVMetadataFaceObject`, but the bounds mislocated, any ideas? BTW, I also tried use `CMSampleBufferGetSampleAttachmentsArray ` in `AVCaptureVideoDataOutputSampleBufferDelegate`, but it seems like an empty array. – Willjay Jan 26 '17 at 03:18
  • Orientation? Make sure you feed the videoOrientation when setting up camera. Also, the coordinates would be relative to the image and not the screen. Are you taking care of that? – Shravya Boggarapu Jan 27 '17 at 05:23
  • Looking at the image, it clearly looks like the orientation is wrong (It's sideways) – Shravya Boggarapu Jan 27 '17 at 05:25
  • Many thanks, it works. Can you explain why you do this trick? I'm still confused. – Willjay Jan 27 '17 at 15:41
  • The reference/origin of the screen is different from that of the video frame, that's all – Shravya Boggarapu Jan 30 '17 at 07:56
  • btw, a clearer explanation for the rectangle: y refers to columns => width and x refers to rows -> height. SIze is not really a main thing. it should just be proportional to face size – Shravya Boggarapu Mar 06 '17 at 09:47
7

To perform face detection on iOS, there are either CIDetector (Apple) or Mobile Vision (Google) API.

IMO, Google Mobile Vision provides better performance.

If you are interested, here is the project you can play with. (iOS 10.2, Swift 3)


After WWDC 2017, Apple introduces CoreML in iOS 11. The Vision framework makes the face detection more accurate :)

I've made a Demo Project. containing Vision v.s. CIDetector. Also, it contains face landmarks detection in real time.

Willjay
  • 5,865
  • 4
  • 30
  • 55
4

A bit late, but here it is the solution for the coordinates problem. There is a method you can call on the preview layer to transform the metadata object to your coordinate system: transformedMetadataObject(for: metadataObject).

guard let transformedObject = previewLayer.transformedMetadataObject(for: metadataObject) else {
     continue
}
let bounds = transformedObject.bounds
showBounds(at: bounds)

Source: https://developer.apple.com/documentation/avfoundation/avcapturevideopreviewlayer/1623501-transformedmetadataobjectformeta

By the way, in case you are using (or upgrade your project to) Swift 4, the delegate method of AVCaptureMetadataOutputsObject has change into:

func metadataOutput(_ output: AVCaptureMetadataOutput, didOutput metadataObjects: [AVMetadataObject], from connection: AVCaptureConnection)

Kind regards

SHR
  • 7,149
  • 9
  • 32
  • 50
Elena
  • 161
  • 5
2
extension CameraViewController: AVCaptureMetadataOutputObjectsDelegate {
  func captureOutput(_ captureOutput: AVCaptureOutput!, didOutputMetadataObjects metadataObjects: [Any]!, from connection: AVCaptureConnection!) {
    if findFaceControl {
      findFaceControl = false
      let faces = metadata.flatMap { $0 as? AVMetadataFaceObject } .flatMap { (face) -> CGRect in
                  guard let localizedFace =
      previewLayer?.transformedMetadataObject(for: face) else { return nil }
                  return localizedFace.bounds }
      for face in faces {
        let temp = UIView(frame: face)
        temp.layer.borderColor = UIColor.white
        temp.layer.borderWidth = 2.0
        view.addSubview(view: temp)
      }
    }
  }
}

Be sure to remove the views created by didOutputMetadataObjects.

Keeping track of the active facial ids is the best way to do this ^

Also when you're trying to find the location of faces for your preview layer, it is much easier to use facial data and transform. Also I think CIDetector is junk, metadataoutput will use hardware stuff for face detection making it really fast.

jnblanchard
  • 978
  • 8
  • 11
1
  1. Create CaptureSession
  2. For AVCaptureVideoDataOutput create following settings

    output.videoSettings = [ kCVPixelBufferPixelFormatTypeKey as AnyHashable: Int(kCMPixelFormat_32BGRA) ]

3.When you receive CMSampleBuffer, create image

DispatchQueue.main.async {
    let sampleImg = self.imageFromSampleBuffer(sampleBuffer: sampleBuffer)
    self.imageView.image = sampleImg
}
func imageFromSampleBuffer(sampleBuffer : CMSampleBuffer) -> UIImage
    {
        // Get a CMSampleBuffer's Core Video image buffer for the media data
        let  imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
        // Lock the base address of the pixel buffer
        CVPixelBufferLockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);


        // Get the number of bytes per row for the pixel buffer
        let baseAddress = CVPixelBufferGetBaseAddress(imageBuffer!);

        // Get the number of bytes per row for the pixel buffer
        let bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer!);
        // Get the pixel buffer width and height
        let width = CVPixelBufferGetWidth(imageBuffer!);
        let height = CVPixelBufferGetHeight(imageBuffer!);

        // Create a device-dependent RGB color space
        let colorSpace = CGColorSpaceCreateDeviceRGB();

        // Create a bitmap graphics context with the sample buffer data
        var bitmapInfo: UInt32 = CGBitmapInfo.byteOrder32Little.rawValue
        bitmapInfo |= CGImageAlphaInfo.premultipliedFirst.rawValue & CGBitmapInfo.alphaInfoMask.rawValue
        //let bitmapInfo: UInt32 = CGBitmapInfo.alphaInfoMask.rawValue
        let context = CGContext.init(data: baseAddress, width: width, height: height, bitsPerComponent: 8, bytesPerRow: bytesPerRow, space: colorSpace, bitmapInfo: bitmapInfo)
        // Create a Quartz image from the pixel data in the bitmap graphics context
        let quartzImage = context?.makeImage();
        // Unlock the pixel buffer
        CVPixelBufferUnlockBaseAddress(imageBuffer!, CVPixelBufferLockFlags.readOnly);

        // Create an image object from the Quartz image
        let image = UIImage.init(cgImage: quartzImage!);

        return (image);
    }
duzvik
  • 106
  • 1
  • 6
0

By looking at your code I detected 2 things that could lead to wrong/poor face detection.

  1. One of them is the face detector features options where you are filtering the results by [CIDetectorSmile: true, CIDetectorEyeBlink: true]. Try to set it to nil: faceDetector?.features(in: faceImage, options: nil)
  2. Another guess I have is the result image orientation. I noticed you use AVCapturePhotoOutput.jpegPhotoDataRepresentation method to generate the source image for the detection and the system, by default, it generates that image with a specific orientation, of type Left/LandscapeLeft, I think. So, basically you can tell the face detector to have that in mind by using the CIDetectorImageOrientation key.

CIDetectorImageOrientation: the value for this key is an integer NSNumber from 1..8 such as that found in kCGImagePropertyOrientation. If present, the detection will be done based on that orientation but the coordinates in the returned features will still be based on those of the image.

Try to set it like faceDetector?.features(in: faceImage, options: [CIDetectorImageOrientation: 8 /*Left, bottom*/]).

ricardopereira
  • 9,557
  • 4
  • 53
  • 68
  • I don't think that `[CIDetectorSmile: true, CIDetectorEyeBlink: true]` is a filter. It tells detector to spend more time so that it's able to return specified info. So it actually expands the results – Oleksii Nezhyborets Aug 03 '17 at 17:43