【Unity】カメラから人間の肌(肌色)を検出する

はじめに

カメラからUnityに最適化されたジェスチャー検出はできないかと色々と考えていたのですが、その前哨戦としてWebカメラから人間の肌の検出を行ってみました。(肌色って確か今は使っちゃダメなんですよね・・・)

f:id:hanaaaaaachiru:20200327023042g:plain

多少ノイズがはいってしまっているっぽいですが、結構なFPSを出せている気がします。

貼ってある画像はGIFなので実際の画面と結構異なるので、実際に試していただいくのが一番体感できるのではないでしょうか。

といっても中身を見るとかなり簡単にできたので、今回はその手法について紹介をしていきたいと思います。

はじめに
肌色の検出方法
Unityでの実装方針
実装
- HandTracker.cs
- ExtractSkinColor.compute
さいごに

肌色の検出方法

色を表すためによくRGBを用いると思います。

RGBは赤・緑・青で色を表現していましたが、色相・彩度・明度で色を表現したHSVを用いることで簡単に知ることができます。

HSVを用いて以下の式にあれはまれば良いっぽいです。

$0 <= h <= 20$
$30 <= s <= 150$
$60 <= v <= 255$

Python,OpenCVでHSV色空間を用いて肌色領域を抽出 | YAJIROBE BLOG

ここの箇所はネットを見る限り色々な条件にしていらっしゃる方がいましたので、自分でも色々変えてみるのもありだと思います。

Unityでの実装方針

まず画像処理においてCPUのみで処理していては重すぎてFPSは出ません。

そこでGPUを計算に用いることができるComputeShaderを用います。

Webカメラから取得できるのはWebCamTexture，GPUで処理・UnityのRawImageに貼るためにTexture2D・RenderTextureといくつか種類があるところだけはやや要注意です。

f:id:hanaaaaaachiru:20200327025237p:plain

実装

HandTracker.cs

using UnityEngine;
using UnityEngine.UI;

public class HandTracker : MonoBehaviour
{
    [SerializeField] private ComputeShader _computeShader;
    [SerializeField] private RawImage _renderer;

    private int _width = 1920;
    private int _height = 1080;
    private int _fps = 30;
    private WebCamTexture _webcamTexture;

    private void Start()
    {
        WebCamDevice[] devices = WebCamTexture.devices;
        _webcamTexture = new WebCamTexture(devices[0].name, this._width, this._height, this._fps);
        _webcamTexture.Play();
    }

    private void Update()
        => ExtractSkinColor(ConvertTexture2D(_webcamTexture));

    private Texture2D ConvertTexture2D(WebCamTexture webcamTexture)
    {
        var color32 = webcamTexture.GetPixels32();
        Texture2D texture = new Texture2D(webcamTexture.width, webcamTexture.height);
        texture.SetPixels32(color32);
        texture.Apply();
        return texture;
    }

    private void ExtractSkinColor(Texture2D tex)
    {
        if (!SystemInfo.supportsComputeShaders)
        {
            Debug.LogError("Comppute Shader is not support.");
            return;
        }

        var result = new RenderTexture(tex.width, tex.height, 0, RenderTextureFormat.ARGB32);
        result.enableRandomWrite = true;
        result.Create();

        var kernelIndex = _computeShader.FindKernel("ExtractSkinColor");

        ThreadSize threadSize = new ThreadSize();
        _computeShader.GetKernelThreadGroupSizes(kernelIndex, out threadSize.x, out threadSize.y, out threadSize.z);

        _computeShader.SetTexture(kernelIndex, "Texture", tex);
        _computeShader.SetTexture(kernelIndex, "Result", result);

        _computeShader.Dispatch(kernelIndex, tex.width / (int)threadSize.x, tex.height / (int)threadSize.y, (int)threadSize.z);

        _renderer.texture = result;
    }

    struct ThreadSize
    {
        public uint x;
        public uint y;
        public uint z;

        public ThreadSize(uint x, uint y, uint z)
        {
            this.x = x;
            this.y = y;
            this.z = z;
        }
    }
}

ExtractSkinColor.compute

#pragma kernel ExtractSkinColor

RWTexture2D<float4> Result;
Texture2D<float4> Texture;

[numthreads(16,16,1)]
void ExtractSkinColor (uint3 id : SV_DispatchThreadID)
{
    //----------------
    //-  RGB -> HSV  -
    //----------------
    float h,s,v;
    float r = Texture[id.xy].x * 255;
    float g = Texture[id.xy].y * 255;
    float b = Texture[id.xy].z * 255;
    float maxValue = max(r, max(g, b));
    float minValue = min(r, min(g, b));

    if(maxValue == minValue){
        h = 0;
    } else if(minValue == b){
        h = 60 * (g - r) / (maxValue - minValue) + 60;
    } else if(minValue == r){
        h = 60 * (b - g) / (maxValue - minValue) + 180;
    } else if(minValue == g){
        h = 60 * (r - b) / (maxValue - minValue) + 300;
    }

    s = maxValue - minValue;
    v = maxValue;

    //------------------------
    //-  Extract Skin Color  -
    //------------------------
    float3 hsvMin = float3(0, 30, 60);
    float3 hsvMax = float3(20, 150, 255);

    if(hsvMin.x > h || hsvMin.y > s || hsvMin.z > v || hsvMax.x < h || hsvMax.y < s || hsvMax.z < v){
        Result[id.xy] = float4(0, 0, 0, 1);
        return;
    }
    
    Result[id.xy] = float4(Texture[id.xy].x, Texture[id.xy].y, Texture[id.xy].z, 1);
}