markcarlo25
Journeyman
- Joined
- Aug 7, 2022
- Posts
- 7
- Reaction
- 0
- Points
- 22
Please help me to make a GUI for this code that has 2 buttons which function is to exit or start the recording the Gui must replace this code function:
pressedKey = cv2.waitKey(1) & 0xFF
if pressedKey == 32: # Record pressing r
recording = True
frame_count = 0
frames = []
print("Start signing")
color = green_color
predicted_word = "Start signing"
accuracy_text = ""
elif pressedKey == ord("q"): # Break pressing q
break
HERE IS THE FULL CODE
import pyttsx3
import time
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import os
from matplotlib import pyplot as plt
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import mediapipe as mp
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
#labelled action
actions = np.array([
'kumusta',
'salamat',
'mahal kita',
])
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
#load weioghts
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.load_weights('test.h5')
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
output_frame = input_frame.copy()
for num, prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
return output_frame
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, face, lh, rh])
def mediapipe_detection(image, model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
image.flags.writeable = False # Image is no longer writeable
results = model.process(image) # Make prediction
image.flags.writeable = True # Image is now writeable
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return image, results
def draw_landmarks(image, results):
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
def draw_styled_landmarks(image, results):
# Draw face connections
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
)
# Draw pose connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
)
# Draw left hand connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
)
# Draw right hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)
# Initialize TTS engine
engine = pyttsx3.init()
import time
cap = cv2.VideoCapture(0)
frame_count = 0
recording = False
predicted_word = ""
accuracy_text = ""
color = (21, 209, 0)
green_color = (21, 209, 0)
word_color = (255, 179, 0)
prev_frame_time = 0
new_frame_time = 0
with mp_holistic.Holistic(
min_detection_confidence=0.5, min_tracking_confidence=0.5
) as holistic:
while cap.isOpened():
# Read feed
ret, frame = cap.read()
# Make detections
image, results = mediapipe_detection(frame, holistic)
# print(results)
# Draw landmarks
draw_styled_landmarks(image, results)
# 2. Prediction logic
keypoints = extract_keypoints(results)
pressedKey = cv2.waitKey(1) & 0xFF
if pressedKey == 32: # Record pressing r
recording = True
frame_count = 0
frames = []
print("Start signing")
color = green_color
predicted_word = "Start signing"
accuracy_text = ""
elif pressedKey == ord("q"): # Break pressing q
break
if frame_count >= 30:
frame_count = 0
recording = False
res = model.predict(np.expand_dims(frames, axis=0))[0]
print(actions[np.argmax(res)], res[np.argmax(res)])
color = word_color
predicted_word = actions[np.argmax(res)]
accuracy = res[np.argmax(res)]
accuracy_text = "{:.0%}".format(accuracy)
# Speak the predicted word
engine.say(predicted_word)
engine.runAndWait()
frames = []
if recording:
frame_count += 1
frames.append(keypoints)
new_frame_time = time.time()
fps = 1/(new_frame_time-prev_frame_time)
prev_frame_time = new_frame_time
fps = int(fps)
fps = str(fps)
fps_text = "FPS: " + fps
overlay = image.copy()
cv2.rectangle(overlay, (0, 0), (225, 80), (0, 0, 0), -1)
image = cv2.addWeighted(overlay, 0.5, image, 0.5, 1.0)
cv2.putText(image, fps_text, (5, 15),
cv2.FONT_HERSHEY_DUPLEX, 0.5, green_color)
cv2.putText(image, predicted_word, (5, 50),
cv2.FONT_HERSHEY_DUPLEX, 1, color)
cv2.putText(image, accuracy_text, (5, 70),
cv2.FONT_HERSHEY_DUPLEX, 0.5, color)
cv2.imshow('Dynamic FSL', image)
cap.release()
cv2.destroyAllWindows()
pressedKey = cv2.waitKey(1) & 0xFF
if pressedKey == 32: # Record pressing r
recording = True
frame_count = 0
frames = []
print("Start signing")
color = green_color
predicted_word = "Start signing"
accuracy_text = ""
elif pressedKey == ord("q"): # Break pressing q
break
HERE IS THE FULL CODE
import pyttsx3
import time
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import os
from matplotlib import pyplot as plt
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import mediapipe as mp
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
#labelled action
actions = np.array([
'kumusta',
'salamat',
'mahal kita',
])
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
#load weioghts
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.load_weights('test.h5')
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
output_frame = input_frame.copy()
for num, prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
return output_frame
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, face, lh, rh])
def mediapipe_detection(image, model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
image.flags.writeable = False # Image is no longer writeable
results = model.process(image) # Make prediction
image.flags.writeable = True # Image is now writeable
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return image, results
def draw_landmarks(image, results):
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
def draw_styled_landmarks(image, results):
# Draw face connections
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
)
# Draw pose connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
)
# Draw left hand connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
)
# Draw right hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)
# Initialize TTS engine
engine = pyttsx3.init()
import time
cap = cv2.VideoCapture(0)
frame_count = 0
recording = False
predicted_word = ""
accuracy_text = ""
color = (21, 209, 0)
green_color = (21, 209, 0)
word_color = (255, 179, 0)
prev_frame_time = 0
new_frame_time = 0
with mp_holistic.Holistic(
min_detection_confidence=0.5, min_tracking_confidence=0.5
) as holistic:
while cap.isOpened():
# Read feed
ret, frame = cap.read()
# Make detections
image, results = mediapipe_detection(frame, holistic)
# print(results)
# Draw landmarks
draw_styled_landmarks(image, results)
# 2. Prediction logic
keypoints = extract_keypoints(results)
pressedKey = cv2.waitKey(1) & 0xFF
if pressedKey == 32: # Record pressing r
recording = True
frame_count = 0
frames = []
print("Start signing")
color = green_color
predicted_word = "Start signing"
accuracy_text = ""
elif pressedKey == ord("q"): # Break pressing q
break
if frame_count >= 30:
frame_count = 0
recording = False
res = model.predict(np.expand_dims(frames, axis=0))[0]
print(actions[np.argmax(res)], res[np.argmax(res)])
color = word_color
predicted_word = actions[np.argmax(res)]
accuracy = res[np.argmax(res)]
accuracy_text = "{:.0%}".format(accuracy)
# Speak the predicted word
engine.say(predicted_word)
engine.runAndWait()
frames = []
if recording:
frame_count += 1
frames.append(keypoints)
new_frame_time = time.time()
fps = 1/(new_frame_time-prev_frame_time)
prev_frame_time = new_frame_time
fps = int(fps)
fps = str(fps)
fps_text = "FPS: " + fps
overlay = image.copy()
cv2.rectangle(overlay, (0, 0), (225, 80), (0, 0, 0), -1)
image = cv2.addWeighted(overlay, 0.5, image, 0.5, 1.0)
cv2.putText(image, fps_text, (5, 15),
cv2.FONT_HERSHEY_DUPLEX, 0.5, green_color)
cv2.putText(image, predicted_word, (5, 50),
cv2.FONT_HERSHEY_DUPLEX, 1, color)
cv2.putText(image, accuracy_text, (5, 70),
cv2.FONT_HERSHEY_DUPLEX, 0.5, color)
cv2.imshow('Dynamic FSL', image)
cap.release()
cv2.destroyAllWindows()