openVinoを使った話 | クラウド・分散システム研究室

# やったこと

– WebCamをUnityで表示

– コントローラーをUnityで認識

– Pythonで顔認識から感情推定を行う

– Python for Unityの導入

## WebCamから映像をUnityで表示

Scriptsを作成する．URL(https://note.com/npaka/n/nbaa0e466b0de)を参考にした．

Scriptsを作成するには，C#Scriptを押してそこにコードを追加する．

Hierarchyウィンドウに「RawImage」に追加する．RawImageに，つくったScriptsをドラッグ&ドロップすることで，

## コントローラーがUnityで認識しているか確認

Unityがデバイスを認識しているか，デバイスのボタンの変数名のする時に，確認する方法は，Package ManagerからInput SystemをInstallする．AnalysisのInput Debbugerを選択することでUnityが認識しているデバイスの一覧が表示される．

確認したいデバイスをクリックすることで，そのデバイスの詳細が表示される．

HORI Racing wheelていうデバイスが Unityでペダルの変数名を確認したときは以下のようになった．値は0 < x < 1 までをとる．

|デバイス|変数名|

|–|–|

|右ペダル|right trigger|

|左ペダル|left trigger|

## Python for Unityを導入

公式ドキュメントを参照すること．

Unity 2021.2~　の場合

Package Manager →　+　→　Add package by name　→　com.unity.scripting.python

Unity 2022.1~　の場合

https://docs.unity3d.com/Packages/com.unity.scripting.python@6.0/manual/installation.html

## openVINOを使った感情推定

### 環境構築と学習済みモデルインストール

モデルをインストールするためにopenVINOをインストールする．https://docs.openvino.ai/nightly/omz_tools_downloader.html

`python -m pip install openvino-dev`

次に学習済みモデルのインストールする．　

– 顔認識

`omz_downloader –name face-detection-0202`

https://docs.openvino.ai/nightly/omz_models_model_face_detection_0202.html#doxid-omz-models-model-face-detection-0202

– 感情推定

`omz_downloader –name emotion-recognition-retail-0003`

https://docs.openvino.ai/nightly/omz_models_model_emotions_recognition_retail_0003.html#doxid-omz-models-model-emotions-recognition-retail-0003

### プログラムの作成

公式ドキュメントのチュートリアルを参考に作成した．

https://docs.openvino.ai/latest/notebooks/002-openvino-api-with-output.html

– 使うモジュール

– cv2

pip install opencv-python

– numpy

– openvino

pip install openvino-dev

“`python:facecheck.py

import sys

import cv2 as cv

import numpy as np

from openvino.runtime import Core

class face_detect:

def __init__(self,path):

ie = Core()

#モデルの読み込み

classfication_model_xml=f”{path}/intel/face-detection-0202/FP16/face-detection-0202.xml”

emotion_classfication_model_xml=f”{path}/intel/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml”

model = ie.read_model(model=classfication_model_xml)

self.compiled_model = ie.compile_model(model=model, device_name=”CPU”)

input_layer = self.compiled_model.input(0)

self.output_layer = self.compiled_model.output(0)

emotion_model = ie.read_model(model=emotion_classfication_model_xml)

self.emotion_compiled_model = ie.compile_model(model=emotion_model, device_name=”CPU”)

emotion_input_layer = self.emotion_compiled_model.input(0)

self.emotion_output_layer = self.emotion_compiled_model.output(0)

self.list_emotion = [‘neutral’, ‘happy’, ‘sad’, ‘surprise’, ‘anger’]

def face_detect(self,frame):

#モデルの必要なサイズに変更

img = cv.resize(frame, (384,384))

img = img.transpose((2, 0, 1))

img = np.expand_dims(img, axis=0)

#画像をクラスタリング(顔認識)

result = self.compiled_model([img])[self.output_layer]

result = np.squeeze(result)

#クラスタリングした結果を基に

#print(f”result{result[:,2]}”)

index_max_result = np.argmax(result[:,2])

#print(index_max_result)

detection = result[index_max_result]

#detection = result[0]

confidence = float(detection[2])

if confidence > 0.7:

xmin = int(detection[3] * frame.shape[1])

ymin = int(detection[4] * frame.shape[0])

xmax = int(detection[5] * frame.shape[1])

ymax = int(detection[6] * frame.shape[0])

#画像中の見つけた顔の位置を抜き出す

frame_face = frame[ymin:ymax, xmin:xmax]

#インプットの画像をリサイズ

#print(frame_face)

try:

img = cv.resize(frame_face, (64, 64))

except cv2.error as e:

print(e)

return 0

img = img.transpose((2, 0, 1))

img = np.expand_dims(img, axis=0)

#画像をクラスタリング

emotion_result = self.emotion_compiled_model([img])[self.emotion_output_layer]

emotion_result = np.squeeze(emotion_result)

#結果の表示

index_max = np.argmax(emotion_result)

#画像にemotionを表示

#cv.putText(frame, self.list_emotion[index_max], (xmin, ymin – 10), cv.FONT_HERSHEY_SIMPLEX, 1, (xmin, ymin, 0), 2)

#画像を表示

#cv.imshow(‘image’, frame)

key = cv.waitKey(1)

return index_max

else:

return -1

a = face_detect(“”)

cap = cv.VideoCapture(0)

while cap.isOpened():

ret, frame = cap.read()

num = a.face_detect(frame)

print(num)

key = cv.waitKey(1)

if key != -1:

break

cap.release()

cv.destoryAllWindows()

“`

## WEBCamを使ったface_detect

“`python:face_detect.py

import cv2 as cv

import numpy as np

import sys

from openvino.runtime import Core

ie = Core()

#モデルの読み込み

classfication_model_xml=f”{sys.argv[1]}/intel/face-detection-0202/FP16/face-detection-0202.xml”

emotion_classfication_model_xml=f”{sys.argv[2]}/intel/emotions-recognition-retail-0003/FP16/emotions-recognition-retail-0003.xml”

model = ie.read_model(model=classfication_model_xml)

compiled_model = ie.compile_model(model=model, device_name=”CPU”)

input_layer = compiled_model.input(0)

output_layer = compiled_model.output(0)

emotion_model = ie.read_model(model=emotion_classfication_model_xml)

emotion_compiled_model = ie.compile_model(model=emotion_model, device_name=”CPU”)

emotion_input_layer = emotion_compiled_model.input(0)

emotion_output_layer = emotion_compiled_model.output(0)

#WEBCamの指定

cap = cv.VideoCapture(0)

#cap.set(cv.CAP_PROP_FRAME_WIDTH, 384)

#cap.set(cv.CAP_PROP_FRAME_HEIGHT, 384)

list_emotion = [‘neutral’, ‘happy’, ‘sad’, ‘surprise’, ‘anger’]

while cap.isOpened():

#画像の読み込み

ret, frame = cap.read()

if ret is False:

break

#モデルの必要なサイズに変更

img = cv.resize(frame, (384,384))

img = img.transpose((2, 0, 1))

img = np.expand_dims(img, axis=0)

#画像をクラスタリング(顔認識)

result = compiled_model([img])[output_layer]

result = np.squeeze(result)

#クラスタリングした結果を選別

detection = result[0]

confidence = float(detection[2])

if confidence > 0.5:

xmin = int(detection[3] * frame.shape[1])

ymin = int(detection[4] * frame.shape[0])

xmax = int(detection[5] * frame.shape[1])

ymax = int(detection[6] * frame.shape[0])

#cv.rectangle(frame, (xmin, ymin), (xmax, ymax), color=(xmin, ymin, 0), thickness=3)

#画像中の見つけた顔の位置を抜き出す

frame_face = frame[ymin:ymax, xmin:xmax]

#インプットの画像をリサイズ

img = cv.resize(frame_face, (64, 64))

img = img.transpose((2, 0, 1))

img = np.expand_dims(img, axis=0)

#画像をクラスタリング

emotion_result = emotion_compiled_model([img])[emotion_output_layer]

emotion_result = np.squeeze(emotion_result)

#結果の表示

index_max = np.argmax(emotion_result)

print(emotion_result)

cv.putText(frame, list_emotion[index_max], (xmin, ymin – 10), cv.FONT_HERSHEY_SIMPLEX, 1, (xmin, ymin, 0), 2)

cv.imshow(‘image’, frame)

key = cv.waitKey(1)

if key != -1:

break

cap.release()

cv.destoryAllWindows()

“`

コメントを残す コメントをキャンセル

コメントを残すコメントをキャンセル