What's new

Help make a GUI for this code

markcarlo25

Journeyman
Joined
Aug 7, 2022
Posts
7
Reaction
0
Points
22
Please help me to make a GUI for this code that has 2 buttons which function is to exit or start the recording the Gui must replace this code function:
pressedKey = cv2.waitKey(1) & 0xFF
if pressedKey == 32: # Record pressing r
recording = True

frame_count = 0
frames = []

print("Start signing")
color = green_color

predicted_word = "Start signing"
accuracy_text = ""

elif pressedKey == ord("q"): # Break pressing q
break

HERE IS THE FULL CODE
import pyttsx3
import time
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import os
from matplotlib import pyplot as plt
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import mediapipe as mp
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard


#labelled action
actions = np.array([
'kumusta',
'salamat',
'mahal kita',
])
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
#load weioghts
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.load_weights('test.h5')
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
output_frame = input_frame.copy()
for num, prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

return output_frame
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, face, lh, rh])
def mediapipe_detection(image, model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
image.flags.writeable = False # Image is no longer writeable
results = model.process(image) # Make prediction
image.flags.writeable = True # Image is now writeable
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return image, results
def draw_landmarks(image, results):
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
def draw_styled_landmarks(image, results):
# Draw face connections
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
)
# Draw pose connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
)
# Draw left hand connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
)
# Draw right hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)

# Initialize TTS engine
engine = pyttsx3.init()
import time
cap = cv2.VideoCapture(0)

frame_count = 0
recording = False

predicted_word = ""
accuracy_text = ""
color = (21, 209, 0)
green_color = (21, 209, 0)
word_color = (255, 179, 0)

prev_frame_time = 0
new_frame_time = 0

with mp_holistic.Holistic(
min_detection_confidence=0.5, min_tracking_confidence=0.5
) as holistic:
while cap.isOpened():
# Read feed
ret, frame = cap.read()

# Make detections
image, results = mediapipe_detection(frame, holistic)
# print(results)

# Draw landmarks
draw_styled_landmarks(image, results)

# 2. Prediction logic
keypoints = extract_keypoints(results)

pressedKey = cv2.waitKey(1) & 0xFF
if pressedKey == 32: # Record pressing r
recording = True

frame_count = 0
frames = []

print("Start signing")
color = green_color

predicted_word = "Start signing"
accuracy_text = ""

elif pressedKey == ord("q"): # Break pressing q
break

if frame_count >= 30:
frame_count = 0
recording = False

res = model.predict(np.expand_dims(frames, axis=0))[0]
print(actions[np.argmax(res)], res[np.argmax(res)])
color = word_color
predicted_word = actions[np.argmax(res)]
accuracy = res[np.argmax(res)]
accuracy_text = "{:.0%}".format(accuracy)

# Speak the predicted word
engine.say(predicted_word)
engine.runAndWait()

frames = []

if recording:
frame_count += 1
frames.append(keypoints)

new_frame_time = time.time()
fps = 1/(new_frame_time-prev_frame_time)
prev_frame_time = new_frame_time

fps = int(fps)
fps = str(fps)
fps_text = "FPS: " + fps

overlay = image.copy()
cv2.rectangle(overlay, (0, 0), (225, 80), (0, 0, 0), -1)
image = cv2.addWeighted(overlay, 0.5, image, 0.5, 1.0)

cv2.putText(image, fps_text, (5, 15),
cv2.FONT_HERSHEY_DUPLEX, 0.5, green_color)

cv2.putText(image, predicted_word, (5, 50),
cv2.FONT_HERSHEY_DUPLEX, 1, color)

cv2.putText(image, accuracy_text, (5, 70),
cv2.FONT_HERSHEY_DUPLEX, 0.5, color)

cv2.imshow('Dynamic FSL', image)

cap.release()
cv2.destroyAllWindows()
 
To create a GUI for this code, you can use a Python library called tkinter which provides a set of tools for creating graphical user interfaces. Here's an outline of how you could modify the code to include a GUI:

1. Import the tkinter library at the top of your script:

Code:
   import tkinter as tk

2. Create a new class that inherits from tk.Tk. This will be the main window of your GUI:

Code:
   class MainWindow(tk.Tk):
       def __init__(self):
           super().__init__()
           self.title("Dynamic FSL")  # Set the title of the window
           self.geometry("640x480")  # Set the initial size of the window

3. Inside the MainWindow class, define two methods for the "Start" and "Stop" buttons. You can use the existing logic for recording and breaking from the while loop by setting a flag variable in each method:

Code:
   def start_recording(self):
       self.recording = True

   def stop_recording(self):
       self.recording = False

4. Add two buttons to the main window that call these methods when clicked:

Code:
   self.start_button = tk.Button(self, text="Start", command=self.start_recording)
   self.start_button.pack()

   self.stop_button = tk.Button(self, text="Stop", command=self.stop_recording)
   self.stop_button.pack()

5. Modify the while loop to use the flag variable set by the buttons. Replace the existing if statements for recording and breaking with checks for the flag:

Code:
   while cap.isOpened() and self.recording:
       # Read feed
       ret, frame = cap.read()

       # Make detections
       image, results = mediapipe_detection(frame, holistic)
       # print(results)

       # Draw landmarks
       draw_styled_landmarks(image, results)

       # 2. Prediction logic
       keypoints = extract_keypoints(results)

       # ... rest of the code ...

Here's the full modified code:

```
import pyttsx3
import time
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import os
from matplotlib import pyplot as plt
import time
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import mediapipe as mp
from sklearn.metrics import multilabel_confusion_matrix, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import TensorBoard
import tkinter as tk

#labelled action
actions = np.array([
'kumusta',
'salamat',
'mahal kita',
])
model = Sequential()
model.add(LSTM(64, return_sequences=True, activation='relu', input_shape=(30,1662)))
model.add(LSTM(128, return_sequences=True, activation='relu'))
model.add(LSTM(64, return_sequences=False, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(actions.shape[0], activation='softmax'))
#load weioghts
model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['categorical_accuracy'])
model.load_weights('test.h5')
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities
colors = [(245,117,16), (117,245,16), (16,117,245)]
def prob_viz(res, actions, input_frame, colors):
output_frame = input_frame.copy()
for num, prob in enumerate(res):
cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)

return output_frame
def extract_keypoints(results):
pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
return np.concatenate([pose, face, lh, rh])
def mediapipe_detection(image, model):
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
image.flags.writeable = False # Image is no longer writeable
results = model.process(image) # Make prediction
image.flags.writeable = True # Image is now writeable
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
return image, results
def draw_landmarks(image, results):
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS) # Draw face connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections
def draw_styled_landmarks(image, results):
# Draw face connections
mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_CONTOURS,
mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1),
mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
)
# Draw pose connections
mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
)
# Draw left hand connections
mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
)
# Draw right hand connections
mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
)

# Initialize TTS engine
engine = pyttsx3.init()
cap = cv2.VideoCapture(0)

frame_count = 0
recording = False

predicted_word = ""
accuracy_text = ""
color = (21, 209, 0)
green_color = (21, 209, 0)
word_color = (255, 179, 0)

prev_frame_time = 0
new_frame_time = 0

class MainWindow(tk.Tk):
def init(self):
super().init()
self.title("Dynamic FSL") # Set the title of the window
self.geometry("640x480") # Set the initial size of the window

self.start_button = tk.Button(self, text="Start", command=self.start_recording)
self.start_button.pack()

self.stop_button = tk.Button(self, text="Stop", command=self.stop_recording)
self.stop_button.pack()

def start_recording(self):
self.recording = True

def stop_recording(self):
self.recording = False

app = MainWindow()

with mp_holistic.Holistic(
min_detection_confidence=0.5, min_tracking_confidence=0.5
) as holistic:
while cap.isOpened():
# Read feed
ret, frame = cap.read()

# Make detections
image, results = mediapipe_detection(frame, holistic)
# print(results)

# Draw landmarks
draw_styled_landmarks(image, results)

# 2. Prediction logic
keypoints = extract_keypoints(results)

if app.recording:
frame_count += 1
frames.append(keypoints)

if not app.recording:
if frame_count >= 30:
frame_count = 0
res = model.predict(np.expand_dims(frames, axis=0))[0]
print(actions[np.argmax(res)], res[np.argmax(res)])
color = word_color
predicted_word = actions[np.argmax(res)]
accuracy = res[np.argmax(res)]
accuracy_text = "{:.0%}".format(accuracy)

# Speak the predicted word
engine.say(predicted_word)
engine.runAndWait()

 
Back
Top