Initial codebase (#1)

* Add project code

* Logger improvements

* Improvements to web demo code

* added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py

* Fix rotation augmentation

* fixed error in docstring, and removed unnecessary replace -1 -> 0

* Readme updates

* Share base notebooks

* Add notebooks and unify for different datasets

* requirements update

* fixes

* Make evaluate more deterministic

* Allow training with clearml

* refactor preprocessing and apply linter

* Minor fixes

* Minor notebook tweaks

* Readme updates

* Fix PR comments

* Remove unneeded code

* Add banner to Readme

---------

Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
This commit is contained in:
Mathias Claassen
2023-03-03 10:07:54 -03:00
committed by GitHub
parent 661e4bbc03
commit 81bbf66aab
49 changed files with 4205 additions and 0 deletions

View File

@@ -0,0 +1,154 @@
import os
import os.path as op
from itertools import chain
from collections import namedtuple
import glob
import cv2
import numpy as np
import mediapipe as mp
from tqdm.auto import tqdm
# Import drawing_utils and drawing_styles.
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_holistic = mp.solutions.holistic
mp_pose = mp.solutions.pose
LEN_LANDMARKS_POSE = len(mp_holistic.PoseLandmark)
LEN_LANDMARKS_HAND = len(mp_holistic.HandLandmark)
TOTAL_LANDMARKS = LEN_LANDMARKS_POSE + 2 * LEN_LANDMARKS_HAND
Landmark = namedtuple("Landmark", ["x", "y"])
class LandmarksResults:
"""
Wrapper for landmarks results. When not available it fills with 0
"""
def __init__(
self,
results,
num_landmarks_pose=LEN_LANDMARKS_POSE,
num_landmarks_hand=LEN_LANDMARKS_HAND,
):
self.results = results
self.num_landmarks_pose = num_landmarks_pose
self.num_landmarks_hand = num_landmarks_hand
@property
def pose_landmarks(self):
if self.results.pose_landmarks is None:
return [Landmark(0, 0)] * self.num_landmarks_pose
else:
return self.results.pose_landmarks.landmark
@property
def left_hand_landmarks(self):
if self.results.left_hand_landmarks is None:
return [Landmark(0, 0)] * self.num_landmarks_hand
else:
return self.results.left_hand_landmarks.landmark
@property
def right_hand_landmarks(self):
if self.results.right_hand_landmarks is None:
return [Landmark(0, 0)] * self.num_landmarks_hand
else:
return self.results.right_hand_landmarks.landmark
def get_landmarks(image_orig, holistic, debug=False):
"""
Runs landmarks detection for single image
Returns: list of landmarks
"""
# Convert the BGR image to RGB before processing.
image = cv2.cvtColor(image_orig, cv2.COLOR_BGR2RGB)
results = LandmarksResults(holistic.process(image))
if debug:
lmks_pose = []
for lmk in results.pose_landmarks:
lmks_pose.append(lmk.x)
lmks_pose.append(lmk.y)
assert len(lmks_pose) == LEN_LANDMARKS_POSE
lmks_left_hand = []
for lmk in results.left_hand_landmarks:
lmks_left_hand.append(lmk.x)
lmks_left_hand.append(lmk.y)
assert (
len(lmks_left_hand) == 2 * LEN_LANDMARKS_HAND
), f"{len(lmks_left_hand)} != {2 * LEN_LANDMARKS_HAND}"
lmks_right_hand = []
for lmk in results.right_hand_landmarks:
lmks_right_hand.append(lmk.x)
lmks_right_hand.append(lmk.y),
assert (
len(lmks_right_hand) == 2 * LEN_LANDMARKS_HAND
), f"{len(lmks_right_hand)} != {2 * LEN_LANDMARKS_HAND}"
landmarks = []
for lmk in chain(
results.pose_landmarks,
results.left_hand_landmarks,
results.right_hand_landmarks,
):
landmarks.append(lmk.x)
landmarks.append(lmk.y)
assert (
len(landmarks) == TOTAL_LANDMARKS * 2
), f"{len(landmarks)} != {TOTAL_LANDMARKS * 2}"
return landmarks
def parse_extract_args(parser):
parser.add_argument(
"--videos-folder",
"-videos",
help="Path of folder with videos to extract landmarks from",
required=True,
)
parser.add_argument(
"--output-landmarks",
"-lmks",
help="Path of output folder where landmarks npy files will be saved",
required=True,
)
# python3 preprocessing.py -videos=data/wlasl/videos_25fps/ -lmks=data/landmarks
def extract(args):
landmarks_output = args.output_landmarks
videos_folder = args.videos_folder
os.makedirs(landmarks_output, exist_ok=True)
for fn_video in tqdm(sorted(glob.glob(op.join(videos_folder, "*mp4")))):
cap = cv2.VideoCapture(fn_video)
ret, image_orig = cap.read()
height, width = image_orig.shape[:2]
landmarks_video = []
with tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT))) as pbar:
with mp_holistic.Holistic(
static_image_mode=False,
min_detection_confidence=0.5,
model_complexity=2,
) as holistic:
while ret:
try:
landmarks = get_landmarks(image_orig, holistic)
except Exception as e:
print(e)
landmarks = get_landmarks(image_orig, holistic, debug=True)
ret, image_orig = cap.read()
landmarks_video.append(landmarks)
pbar.update(1)
landmarks_video = np.vstack(landmarks_video)
np.save(
op.join(landmarks_output, op.basename(fn_video).split(".")[0]),
landmarks_video,
)