Initial codebase (#1)
* Add project code * Logger improvements * Improvements to web demo code * added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py * Fix rotation augmentation * fixed error in docstring, and removed unnecessary replace -1 -> 0 * Readme updates * Share base notebooks * Add notebooks and unify for different datasets * requirements update * fixes * Make evaluate more deterministic * Allow training with clearml * refactor preprocessing and apply linter * Minor fixes * Minor notebook tweaks * Readme updates * Fix PR comments * Remove unneeded code * Add banner to Readme --------- Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
This commit is contained in:
1
augmentations/__init__.py
Normal file
1
augmentations/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
from .augment import augment_arm_joint_rotate, augment_rotate, augment_shear
|
||||
228
augmentations/augment.py
Normal file
228
augmentations/augment.py
Normal file
@@ -0,0 +1,228 @@
|
||||
|
||||
import math
|
||||
import logging
|
||||
import cv2
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from normalization.body_normalization import BODY_IDENTIFIERS
|
||||
from normalization.hand_normalization import HAND_IDENTIFIERS
|
||||
|
||||
|
||||
HAND_IDENTIFIERS = [id + "_0" for id in HAND_IDENTIFIERS] + [id + "_1" for id in HAND_IDENTIFIERS]
|
||||
ARM_IDENTIFIERS_ORDER = ["neck", "$side$Shoulder", "$side$Elbow", "$side$Wrist"]
|
||||
|
||||
|
||||
def __random_pass(prob):
|
||||
return random.random() < prob
|
||||
|
||||
|
||||
def __numpy_to_dictionary(data_array: np.ndarray) -> dict:
|
||||
"""
|
||||
Supplementary method converting a NumPy array of body landmark data into dictionaries. The array data must match the
|
||||
order of the BODY_IDENTIFIERS list.
|
||||
"""
|
||||
|
||||
output = {}
|
||||
|
||||
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS):
|
||||
output[identifier] = data_array[:, landmark_index].tolist()
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def __dictionary_to_numpy(landmarks_dict: dict) -> np.ndarray:
|
||||
"""
|
||||
Supplementary method converting dictionaries of body landmark data into respective NumPy arrays. The resulting array
|
||||
will match the order of the BODY_IDENTIFIERS list.
|
||||
"""
|
||||
|
||||
output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS), 2))
|
||||
|
||||
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS):
|
||||
output[:, landmark_index, 0] = np.array(landmarks_dict[identifier])[:, 0]
|
||||
output[:, landmark_index, 1] = np.array(landmarks_dict[identifier])[:, 1]
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def __rotate(origin: tuple, point: tuple, angle: float):
|
||||
"""
|
||||
Rotates a point counterclockwise by a given angle around a given origin.
|
||||
|
||||
:param origin: Landmark in the (X, Y) format of the origin from which to count angle of rotation
|
||||
:param point: Landmark in the (X, Y) format to be rotated
|
||||
:param angle: Angle under which the point shall be rotated
|
||||
:return: New landmarks (coordinates)
|
||||
"""
|
||||
|
||||
ox, oy = origin
|
||||
px, py = point
|
||||
|
||||
qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
|
||||
qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
|
||||
|
||||
return qx, qy
|
||||
|
||||
|
||||
def __preprocess_row_sign(sign: dict) -> (dict, dict):
|
||||
"""
|
||||
Supplementary method splitting the single-dictionary skeletal data into two dictionaries of body and hand landmarks
|
||||
respectively.
|
||||
"""
|
||||
|
||||
sign_eval = sign
|
||||
|
||||
if "nose_X" in sign_eval:
|
||||
body_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])]
|
||||
for identifier in BODY_IDENTIFIERS}
|
||||
hand_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])]
|
||||
for identifier in HAND_IDENTIFIERS}
|
||||
|
||||
else:
|
||||
body_landmarks = {identifier: sign_eval[identifier] for identifier in BODY_IDENTIFIERS}
|
||||
hand_landmarks = {identifier: sign_eval[identifier] for identifier in HAND_IDENTIFIERS}
|
||||
|
||||
return body_landmarks, hand_landmarks
|
||||
|
||||
|
||||
def __wrap_sign_into_row(body_identifiers: dict, hand_identifiers: dict) -> dict:
|
||||
"""
|
||||
Supplementary method for merging body and hand data into a single dictionary.
|
||||
"""
|
||||
|
||||
return {**body_identifiers, **hand_identifiers}
|
||||
|
||||
|
||||
def augment_rotate(sign: dict, angle_range: tuple) -> dict:
|
||||
"""
|
||||
AUGMENTATION TECHNIQUE. All the joint coordinates in each frame are rotated by a random angle up to 13 degrees with
|
||||
the center of rotation lying in the center of the frame, which is equal to [0.5; 0.5].
|
||||
|
||||
:param sign: Dictionary with sequential skeletal data of the signing person
|
||||
:param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the
|
||||
angle by which the landmarks will be rotated from
|
||||
|
||||
:return: Dictionary with augmented (by rotation) sequential skeletal data of the signing person
|
||||
"""
|
||||
|
||||
body_landmarks, hand_landmarks = __preprocess_row_sign(sign)
|
||||
angle = math.radians(random.uniform(*angle_range))
|
||||
|
||||
body_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in
|
||||
body_landmarks.items()}
|
||||
hand_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in
|
||||
hand_landmarks.items()}
|
||||
|
||||
return __wrap_sign_into_row(body_landmarks, hand_landmarks)
|
||||
|
||||
|
||||
def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict:
|
||||
"""
|
||||
AUGMENTATION TECHNIQUE.
|
||||
|
||||
- Squeeze. All the frames are squeezed from both horizontal sides. Two different random proportions up to 15% of
|
||||
the original frame's width for both left and right side are cut.
|
||||
|
||||
- Perspective transformation. The joint coordinates are projected onto a new plane with a spatially defined
|
||||
center of projection, which simulates recording the sign video with a slight tilt. Each time, the right or left
|
||||
side, as well as the proportion by which both the width and height will be reduced, are chosen randomly. This
|
||||
proportion is selected from a uniform distribution on the [0; 1) interval. Subsequently, the new plane is
|
||||
delineated by reducing the width at the desired side and the respective vertical edge (height) at both of its
|
||||
adjacent corners.
|
||||
|
||||
:param sign: Dictionary with sequential skeletal data of the signing person
|
||||
:param type: Type of shear augmentation to perform (either 'squeeze' or 'perspective')
|
||||
:param squeeze_ratio: Tuple containing the relative range from what the proportion of the original width will be
|
||||
randomly chosen. These proportions will either be cut from both sides or used to construct the
|
||||
new projection
|
||||
|
||||
:return: Dictionary with augmented (by squeezing or perspective transformation) sequential skeletal data of the
|
||||
signing person
|
||||
"""
|
||||
|
||||
body_landmarks, hand_landmarks = __preprocess_row_sign(sign)
|
||||
|
||||
if type == "squeeze":
|
||||
move_left = random.uniform(*squeeze_ratio)
|
||||
move_right = random.uniform(*squeeze_ratio)
|
||||
|
||||
src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32)
|
||||
dest = np.array(((0 + move_left, 1), (1 - move_right, 1), (0 + move_left, 0), (1 - move_right, 0)),
|
||||
dtype=np.float32)
|
||||
mtx = cv2.getPerspectiveTransform(src, dest)
|
||||
|
||||
elif type == "perspective":
|
||||
|
||||
move_ratio = random.uniform(*squeeze_ratio)
|
||||
src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32)
|
||||
|
||||
if __random_pass(0.5):
|
||||
dest = np.array(((0 + move_ratio, 1 - move_ratio), (1, 1), (0 + move_ratio, 0 + move_ratio), (1, 0)),
|
||||
dtype=np.float32)
|
||||
else:
|
||||
dest = np.array(((0, 1), (1 - move_ratio, 1 - move_ratio), (0, 0), (1 - move_ratio, 0 + move_ratio)),
|
||||
dtype=np.float32)
|
||||
|
||||
mtx = cv2.getPerspectiveTransform(src, dest)
|
||||
|
||||
else:
|
||||
|
||||
logging.error("Unsupported shear type provided.")
|
||||
return {}
|
||||
|
||||
landmarks_array = __dictionary_to_numpy(body_landmarks)
|
||||
augmented_landmarks = cv2.perspectiveTransform(np.array(landmarks_array, dtype=np.float32), mtx)
|
||||
|
||||
augmented_zero_landmark = cv2.perspectiveTransform(np.array([[[0, 0]]], dtype=np.float32), mtx)[0][0]
|
||||
augmented_landmarks = np.stack([np.where(sub == augmented_zero_landmark, [0, 0], sub) for sub in augmented_landmarks])
|
||||
|
||||
body_landmarks = __numpy_to_dictionary(augmented_landmarks)
|
||||
|
||||
return __wrap_sign_into_row(body_landmarks, hand_landmarks)
|
||||
|
||||
|
||||
def augment_arm_joint_rotate(sign: dict, probability: float, angle_range: tuple) -> dict:
|
||||
"""
|
||||
AUGMENTATION TECHNIQUE. The joint coordinates of both arms are passed successively, and the impending landmark is
|
||||
slightly rotated with respect to the current one. The chance of each joint to be rotated is 3:10 and the angle of
|
||||
alternation is a uniform random angle up to +-4 degrees. This simulates slight, negligible variances in each
|
||||
execution of a sign, which do not change its semantic meaning.
|
||||
|
||||
:param sign: Dictionary with sequential skeletal data of the signing person
|
||||
:param probability: Probability of each joint to be rotated (float from the range [0, 1])
|
||||
:param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the
|
||||
angle by which the landmarks will be rotated from
|
||||
|
||||
:return: Dictionary with augmented (by arm joint rotation) sequential skeletal data of the signing person
|
||||
"""
|
||||
|
||||
body_landmarks, hand_landmarks = __preprocess_row_sign(sign)
|
||||
|
||||
# Iterate over both directions (both hands)
|
||||
for side in ["left", "right"]:
|
||||
# Iterate gradually over the landmarks on arm
|
||||
for landmark_index, landmark_origin in enumerate(ARM_IDENTIFIERS_ORDER):
|
||||
landmark_origin = landmark_origin.replace("$side$", side)
|
||||
|
||||
# End the process on the current hand if the landmark is not present
|
||||
if landmark_origin not in body_landmarks:
|
||||
break
|
||||
|
||||
# Perform rotation by provided probability
|
||||
if __random_pass(probability):
|
||||
angle = math.radians(random.uniform(*angle_range))
|
||||
|
||||
for to_be_rotated in ARM_IDENTIFIERS_ORDER[landmark_index + 1:]:
|
||||
to_be_rotated = to_be_rotated.replace("$side$", side)
|
||||
|
||||
# Skip if the landmark is not present
|
||||
if to_be_rotated not in body_landmarks:
|
||||
continue
|
||||
|
||||
body_landmarks[to_be_rotated] = [__rotate(body_landmarks[landmark_origin][frame_index], frame,
|
||||
angle)
|
||||
for frame_index, frame in enumerate(body_landmarks[to_be_rotated])]
|
||||
|
||||
return __wrap_sign_into_row(body_landmarks, hand_landmarks)
|
||||
Reference in New Issue
Block a user