Initial codebase (#1)
* Add project code * Logger improvements * Improvements to web demo code * added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py * Fix rotation augmentation * fixed error in docstring, and removed unnecessary replace -1 -> 0 * Readme updates * Share base notebooks * Add notebooks and unify for different datasets * requirements update * fixes * Make evaluate more deterministic * Allow training with clearml * refactor preprocessing and apply linter * Minor fixes * Minor notebook tweaks * Readme updates * Fix PR comments * Remove unneeded code * Add banner to Readme --------- Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
This commit is contained in:
92
normalization/blazepose_mapping.py
Normal file
92
normalization/blazepose_mapping.py
Normal file
@@ -0,0 +1,92 @@
|
||||
|
||||
_BODY_KEYPOINT_MAPPING = {
|
||||
"nose": "nose",
|
||||
"left_eye": "leftEye",
|
||||
"right_eye": "rightEye",
|
||||
"left_ear": "leftEar",
|
||||
"right_ear": "rightEar",
|
||||
"left_shoulder": "leftShoulder",
|
||||
"right_shoulder": "rightShoulder",
|
||||
"left_elbow": "leftElbow",
|
||||
"right_elbow": "rightElbow",
|
||||
"left_wrist": "leftWrist",
|
||||
"right_wrist": "rightWrist"
|
||||
}
|
||||
|
||||
_HAND_KEYPOINT_MAPPING = {
|
||||
"wrist": "wrist",
|
||||
"index_finger_tip": "indexTip",
|
||||
"index_finger_dip": "indexDIP",
|
||||
"index_finger_pip": "indexPIP",
|
||||
"index_finger_mcp": "indexMCP",
|
||||
"middle_finger_tip": "middleTip",
|
||||
"middle_finger_dip": "middleDIP",
|
||||
"middle_finger_pip": "middlePIP",
|
||||
"middle_finger_mcp": "middleMCP",
|
||||
"ring_finger_tip": "ringTip",
|
||||
"ring_finger_dip": "ringDIP",
|
||||
"ring_finger_pip": "ringPIP",
|
||||
"ring_finger_mcp": "ringMCP",
|
||||
"pinky_tip": "littleTip",
|
||||
"pinky_dip": "littleDIP",
|
||||
"pinky_pip": "littlePIP",
|
||||
"pinky_mcp": "littleMCP",
|
||||
"thumb_tip": "thumbTip",
|
||||
"thumb_ip": "thumbIP",
|
||||
"thumb_mcp": "thumbMP",
|
||||
"thumb_cmc": "thumbCMC"
|
||||
}
|
||||
|
||||
|
||||
def map_blazepose_keypoint(column):
|
||||
# Remove _x, _y suffixes
|
||||
suffix = column[-2:].upper()
|
||||
column = column[:-2]
|
||||
|
||||
if column.startswith("left_hand_"):
|
||||
hand = "left"
|
||||
finger_name = column[10:]
|
||||
elif column.startswith("right_hand_"):
|
||||
hand = "right"
|
||||
finger_name = column[11:]
|
||||
else:
|
||||
if column not in _BODY_KEYPOINT_MAPPING:
|
||||
return None
|
||||
mapped = _BODY_KEYPOINT_MAPPING[column]
|
||||
return mapped + suffix
|
||||
|
||||
if finger_name not in _HAND_KEYPOINT_MAPPING:
|
||||
return None
|
||||
mapped = _HAND_KEYPOINT_MAPPING[finger_name]
|
||||
return f"{mapped}_{hand}{suffix}"
|
||||
|
||||
|
||||
def map_blazepose_df(df):
|
||||
to_drop = []
|
||||
renamings = {}
|
||||
for column in df.columns:
|
||||
mapped_column = map_blazepose_keypoint(column)
|
||||
if mapped_column:
|
||||
renamings[column] = mapped_column
|
||||
else:
|
||||
to_drop.append(column)
|
||||
df = df.rename(columns=renamings)
|
||||
|
||||
for index, row in df.iterrows():
|
||||
|
||||
sequence_size = len(row["leftEar_Y"])
|
||||
lsx = row["leftShoulder_X"]
|
||||
rsx = row["rightShoulder_X"]
|
||||
lsy = row["leftShoulder_Y"]
|
||||
rsy = row["rightShoulder_Y"]
|
||||
neck_x = []
|
||||
neck_y = []
|
||||
# Treat each element of the sequence (analyzed frame) individually
|
||||
for sequence_index in range(sequence_size):
|
||||
neck_x.append((float(lsx[sequence_index]) + float(rsx[sequence_index])) / 2)
|
||||
neck_y.append((float(lsy[sequence_index]) + float(rsy[sequence_index])) / 2)
|
||||
df.loc[index, "neck_X"] = str(neck_x)
|
||||
df.loc[index, "neck_Y"] = str(neck_y)
|
||||
|
||||
df.drop(columns=to_drop, inplace=True)
|
||||
return df
|
||||
241
normalization/body_normalization.py
Normal file
241
normalization/body_normalization.py
Normal file
@@ -0,0 +1,241 @@
|
||||
|
||||
from typing import Tuple
|
||||
import pandas as pd
|
||||
from utils import get_logger
|
||||
|
||||
|
||||
BODY_IDENTIFIERS = [
|
||||
"nose",
|
||||
"neck",
|
||||
"rightEye",
|
||||
"leftEye",
|
||||
"rightEar",
|
||||
"leftEar",
|
||||
"rightShoulder",
|
||||
"leftShoulder",
|
||||
"rightElbow",
|
||||
"leftElbow",
|
||||
"rightWrist",
|
||||
"leftWrist"
|
||||
]
|
||||
|
||||
|
||||
def normalize_body_full(df: pd.DataFrame) -> Tuple[pd.DataFrame, list]:
|
||||
"""
|
||||
Normalizes the body position data using the Bohacek-normalization algorithm.
|
||||
|
||||
:param df: pd.DataFrame to be normalized
|
||||
:return: pd.DataFrame with normalized values for body pose
|
||||
"""
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# TODO: Fix division by zero
|
||||
|
||||
normalized_df = pd.DataFrame(columns=df.columns)
|
||||
invalid_row_indexes = []
|
||||
body_landmarks = {"X": [], "Y": []}
|
||||
|
||||
# Construct the relevant identifiers
|
||||
for identifier in BODY_IDENTIFIERS:
|
||||
body_landmarks["X"].append(identifier + "_X")
|
||||
body_landmarks["Y"].append(identifier + "_Y")
|
||||
|
||||
# Iterate over all of the records in the dataset
|
||||
for index, row in df.iterrows():
|
||||
|
||||
sequence_size = len(row["leftEar_Y"])
|
||||
valid_sequence = True
|
||||
original_row = row
|
||||
|
||||
last_starting_point, last_ending_point = None, None
|
||||
|
||||
# Treat each element of the sequence (analyzed frame) individually
|
||||
for sequence_index in range(sequence_size):
|
||||
|
||||
# Prevent from even starting the analysis if some necessary elements are not present
|
||||
if (row["leftShoulder_X"][sequence_index] == 0 or row["rightShoulder_X"][sequence_index] == 0) and \
|
||||
(row["neck_X"][sequence_index] == 0 or row["nose_X"][sequence_index] == 0):
|
||||
if not last_starting_point:
|
||||
valid_sequence = False
|
||||
continue
|
||||
|
||||
else:
|
||||
starting_point, ending_point = last_starting_point, last_ending_point
|
||||
|
||||
else:
|
||||
|
||||
# NOTE:
|
||||
#
|
||||
# While in the paper, it is written that the head metric is calculated by halving the shoulder distance,
|
||||
# this is meant for the distance between the very ends of one's shoulder, as literature studying body
|
||||
# metrics and ratios generally states. The Vision Pose Estimation API, however, seems to be predicting
|
||||
# rather the center of one's shoulder. Based on our experiments and manual reviews of the data,
|
||||
# employing
|
||||
# this as just the plain shoulder distance seems to be more corresponding to the desired metric.
|
||||
#
|
||||
# Please, review this if using other third-party pose estimation libraries.
|
||||
|
||||
if row["leftShoulder_X"][sequence_index] != 0 and row["rightShoulder_X"][sequence_index] != 0:
|
||||
left_shoulder = (row["leftShoulder_X"][sequence_index], row["leftShoulder_Y"][sequence_index])
|
||||
right_shoulder = (row["rightShoulder_X"][sequence_index], row["rightShoulder_Y"][sequence_index])
|
||||
shoulder_distance = ((((left_shoulder[0] - right_shoulder[0]) ** 2) + (
|
||||
(left_shoulder[1] - right_shoulder[1]) ** 2)) ** 0.5)
|
||||
head_metric = shoulder_distance
|
||||
else:
|
||||
neck = (row["neck_X"][sequence_index], row["neck_Y"][sequence_index])
|
||||
nose = (row["nose_X"][sequence_index], row["nose_Y"][sequence_index])
|
||||
neck_nose_distance = ((((neck[0] - nose[0]) ** 2) + ((neck[1] - nose[1]) ** 2)) ** 0.5)
|
||||
head_metric = neck_nose_distance
|
||||
|
||||
# Set the starting and ending point of the normalization bounding box
|
||||
starting_point = [row["neck_X"][sequence_index] - 3 * head_metric,
|
||||
row["leftEye_Y"][sequence_index] + (head_metric / 2)]
|
||||
ending_point = [row["neck_X"][sequence_index] + 3 * head_metric, starting_point[1] - 6 * head_metric]
|
||||
|
||||
last_starting_point, last_ending_point = starting_point, ending_point
|
||||
|
||||
# Ensure that all of the bounding-box-defining coordinates are not out of the picture
|
||||
if starting_point[0] < 0:
|
||||
starting_point[0] = 0
|
||||
if starting_point[1] < 0:
|
||||
starting_point[1] = 0
|
||||
if ending_point[0] < 0:
|
||||
ending_point[0] = 0
|
||||
if ending_point[1] < 0:
|
||||
ending_point[1] = 0
|
||||
|
||||
# Normalize individual landmarks and save the results
|
||||
for identifier in BODY_IDENTIFIERS:
|
||||
key = identifier + "_"
|
||||
|
||||
# Prevent from trying to normalize incorrectly captured points
|
||||
if row[key + "X"][sequence_index] == 0:
|
||||
continue
|
||||
|
||||
normalized_x = (row[key + "X"][sequence_index] - starting_point[0]) / (ending_point[0] -
|
||||
starting_point[0])
|
||||
normalized_y = (row[key + "Y"][sequence_index] - ending_point[1]) / (starting_point[1] -
|
||||
ending_point[1])
|
||||
|
||||
row[key + "X"][sequence_index] = normalized_x
|
||||
row[key + "Y"][sequence_index] = normalized_y
|
||||
|
||||
if valid_sequence:
|
||||
normalized_df = normalized_df.append(row, ignore_index=True)
|
||||
else:
|
||||
logger.warning(" BODY LANDMARKS: One video instance could not be normalized.")
|
||||
normalized_df = normalized_df.append(original_row, ignore_index=True)
|
||||
invalid_row_indexes.append(index)
|
||||
|
||||
logger.info("The normalization of body is finished.")
|
||||
logger.info("\t-> Original size:", df.shape[0])
|
||||
logger.info("\t-> Normalized size:", normalized_df.shape[0])
|
||||
logger.info("\t-> Problematic videos:", len(invalid_row_indexes))
|
||||
|
||||
return normalized_df, invalid_row_indexes
|
||||
|
||||
|
||||
def normalize_single_dict(row: dict):
|
||||
"""
|
||||
Normalizes the skeletal data for a given sequence of frames with signer's body pose data. The normalization follows
|
||||
the definition from our paper.
|
||||
|
||||
:param row: Dictionary containing key-value pairs with joint identifiers and corresponding lists (sequences) of
|
||||
that particular joints coordinates
|
||||
:return: Dictionary with normalized skeletal data (following the same schema as input data)
|
||||
"""
|
||||
|
||||
sequence_size = len(row["leftEar"])
|
||||
valid_sequence = True
|
||||
original_row = row
|
||||
logger = get_logger(__name__)
|
||||
|
||||
last_starting_point, last_ending_point = None, None
|
||||
|
||||
# Treat each element of the sequence (analyzed frame) individually
|
||||
for sequence_index in range(sequence_size):
|
||||
left_shoulder = (row["leftShoulder"][sequence_index][0], row["leftShoulder"][sequence_index][1])
|
||||
right_shoulder = (row["rightShoulder"][sequence_index][0], row["rightShoulder"][sequence_index][1])
|
||||
neck = (row["neck"][sequence_index][0], row["neck"][sequence_index][1])
|
||||
nose = (row["nose"][sequence_index][0], row["nose"][sequence_index][1])
|
||||
# Prevent from even starting the analysis if some necessary elements are not present
|
||||
if (left_shoulder[0] == 0 or right_shoulder[0] == 0
|
||||
or (left_shoulder[0] == right_shoulder[0] and left_shoulder[1] == right_shoulder[1])) and (
|
||||
neck[0] == 0 or nose[0] == 0 or (neck[0] == nose[0] and neck[1] == nose[1])):
|
||||
if not last_starting_point:
|
||||
valid_sequence = False
|
||||
continue
|
||||
|
||||
else:
|
||||
starting_point, ending_point = last_starting_point, last_ending_point
|
||||
|
||||
else:
|
||||
|
||||
# NOTE:
|
||||
#
|
||||
# While in the paper, it is written that the head metric is calculated by halving the shoulder distance,
|
||||
# this is meant for the distance between the very ends of one's shoulder, as literature studying body
|
||||
# metrics and ratios generally states. The Vision Pose Estimation API, however, seems to be predicting
|
||||
# rather the center of one's shoulder. Based on our experiments and manual reviews of the data, employing
|
||||
# this as just the plain shoulder distance seems to be more corresponding to the desired metric.
|
||||
#
|
||||
# Please, review this if using other third-party pose estimation libraries.
|
||||
|
||||
if left_shoulder[0] != 0 and right_shoulder[0] != 0 and \
|
||||
(left_shoulder[0] != right_shoulder[0] or left_shoulder[1] != right_shoulder[1]):
|
||||
shoulder_distance = ((((left_shoulder[0] - right_shoulder[0]) ** 2) + (
|
||||
(left_shoulder[1] - right_shoulder[1]) ** 2)) ** 0.5)
|
||||
head_metric = shoulder_distance
|
||||
else:
|
||||
neck_nose_distance = ((((neck[0] - nose[0]) ** 2) + ((neck[1] - nose[1]) ** 2)) ** 0.5)
|
||||
head_metric = neck_nose_distance
|
||||
|
||||
# Set the starting and ending point of the normalization bounding box
|
||||
# starting_point = [row["neck"][sequence_index][0] - 3 * head_metric,
|
||||
# row["leftEye"][sequence_index][1] + (head_metric / 2)]
|
||||
starting_point = [row["neck"][sequence_index][0] - 3 * head_metric,
|
||||
row["leftEye"][sequence_index][1] + head_metric]
|
||||
ending_point = [row["neck"][sequence_index][0] + 3 * head_metric, starting_point[1] - 6 * head_metric]
|
||||
|
||||
last_starting_point, last_ending_point = starting_point, ending_point
|
||||
|
||||
# Ensure that all of the bounding-box-defining coordinates are not out of the picture
|
||||
if starting_point[0] < 0:
|
||||
starting_point[0] = 0
|
||||
if starting_point[1] < 0:
|
||||
starting_point[1] = 0
|
||||
if ending_point[0] < 0:
|
||||
ending_point[0] = 0
|
||||
if ending_point[1] < 0:
|
||||
ending_point[1] = 0
|
||||
|
||||
# Normalize individual landmarks and save the results
|
||||
for identifier in BODY_IDENTIFIERS:
|
||||
key = identifier
|
||||
|
||||
# Prevent from trying to normalize incorrectly captured points
|
||||
if row[key][sequence_index][0] == 0:
|
||||
continue
|
||||
|
||||
if (ending_point[0] - starting_point[0]) == 0 or (starting_point[1] - ending_point[1]) == 0:
|
||||
logger.warning("Problematic normalization")
|
||||
valid_sequence = False
|
||||
break
|
||||
|
||||
normalized_x = (row[key][sequence_index][0] - starting_point[0]) / (ending_point[0] - starting_point[0])
|
||||
normalized_y = (row[key][sequence_index][1] - ending_point[1]) / (starting_point[1] - ending_point[1])
|
||||
|
||||
row[key][sequence_index] = list(row[key][sequence_index])
|
||||
|
||||
row[key][sequence_index][0] = normalized_x
|
||||
row[key][sequence_index][1] = normalized_y
|
||||
|
||||
if valid_sequence:
|
||||
return row
|
||||
|
||||
else:
|
||||
return original_row
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
195
normalization/hand_normalization.py
Normal file
195
normalization/hand_normalization.py
Normal file
@@ -0,0 +1,195 @@
|
||||
|
||||
import pandas as pd
|
||||
from utils import get_logger
|
||||
|
||||
|
||||
HAND_IDENTIFIERS = [
|
||||
"wrist",
|
||||
"indexTip",
|
||||
"indexDIP",
|
||||
"indexPIP",
|
||||
"indexMCP",
|
||||
"middleTip",
|
||||
"middleDIP",
|
||||
"middlePIP",
|
||||
"middleMCP",
|
||||
"ringTip",
|
||||
"ringDIP",
|
||||
"ringPIP",
|
||||
"ringMCP",
|
||||
"littleTip",
|
||||
"littleDIP",
|
||||
"littlePIP",
|
||||
"littleMCP",
|
||||
"thumbTip",
|
||||
"thumbIP",
|
||||
"thumbMP",
|
||||
"thumbCMC"
|
||||
]
|
||||
|
||||
|
||||
def normalize_hands_full(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""
|
||||
Normalizes the hands position data using the Bohacek-normalization algorithm.
|
||||
|
||||
:param df: pd.DataFrame to be normalized
|
||||
:return: pd.DataFrame with normalized values for hand pose
|
||||
"""
|
||||
|
||||
logger = get_logger(__name__)
|
||||
# TODO: Fix division by zero
|
||||
df.columns = [item.replace("_left_", "_0_").replace("_right_", "_1_") for item in list(df.columns)]
|
||||
|
||||
normalized_df = pd.DataFrame(columns=df.columns)
|
||||
|
||||
hand_landmarks = {"X": {0: [], 1: []}, "Y": {0: [], 1: []}}
|
||||
|
||||
# Determine how many hands are present in the dataset
|
||||
range_hand_size = 1
|
||||
if "wrist_1_X" in df.columns:
|
||||
range_hand_size = 2
|
||||
|
||||
# Construct the relevant identifiers
|
||||
for identifier in HAND_IDENTIFIERS:
|
||||
for hand_index in range(range_hand_size):
|
||||
hand_landmarks["X"][hand_index].append(identifier + "_" + str(hand_index) + "_X")
|
||||
hand_landmarks["Y"][hand_index].append(identifier + "_" + str(hand_index) + "_Y")
|
||||
|
||||
# Iterate over all of the records in the dataset
|
||||
for index, row in df.iterrows():
|
||||
# Treat each hand individually
|
||||
for hand_index in range(range_hand_size):
|
||||
|
||||
sequence_size = len(row["wrist_" + str(hand_index) + "_X"])
|
||||
|
||||
# Treat each element of the sequence (analyzed frame) individually
|
||||
for sequence_index in range(sequence_size):
|
||||
|
||||
# Retrieve all of the X and Y values of the current frame
|
||||
landmarks_x_values = [row[key][sequence_index]
|
||||
for key in hand_landmarks["X"][hand_index] if row[key][sequence_index] != 0]
|
||||
landmarks_y_values = [row[key][sequence_index]
|
||||
for key in hand_landmarks["Y"][hand_index] if row[key][sequence_index] != 0]
|
||||
|
||||
# Prevent from even starting the analysis if some necessary elements are not present
|
||||
if not landmarks_x_values or not landmarks_y_values:
|
||||
logger.warning(
|
||||
" HAND LANDMARKS: One frame could not be normalized as there is no data present. Record: " +
|
||||
str(index) +
|
||||
", Frame: " + str(sequence_index))
|
||||
continue
|
||||
|
||||
# Calculate the deltas
|
||||
width, height = max(landmarks_x_values) - min(landmarks_x_values), max(landmarks_y_values) - min(
|
||||
landmarks_y_values)
|
||||
if width > height:
|
||||
delta_x = 0.1 * width
|
||||
delta_y = delta_x + ((width - height) / 2)
|
||||
else:
|
||||
delta_y = 0.1 * height
|
||||
delta_x = delta_y + ((height - width) / 2)
|
||||
|
||||
# Set the starting and ending point of the normalization bounding box
|
||||
starting_point = (min(landmarks_x_values) - delta_x, min(landmarks_y_values) - delta_y)
|
||||
ending_point = (max(landmarks_x_values) + delta_x, max(landmarks_y_values) + delta_y)
|
||||
|
||||
# Normalize individual landmarks and save the results
|
||||
for identifier in HAND_IDENTIFIERS:
|
||||
key = identifier + "_" + str(hand_index) + "_"
|
||||
|
||||
# Prevent from trying to normalize incorrectly captured points
|
||||
if row[key + "X"][sequence_index] == 0 or (ending_point[0] - starting_point[0]) == 0 or \
|
||||
(starting_point[1] - ending_point[1]) == 0:
|
||||
continue
|
||||
|
||||
normalized_x = (row[key + "X"][sequence_index] - starting_point[0]) / (ending_point[0] -
|
||||
starting_point[0])
|
||||
normalized_y = (row[key + "Y"][sequence_index] - ending_point[1]) / (starting_point[1] -
|
||||
ending_point[1])
|
||||
|
||||
row[key + "X"][sequence_index] = normalized_x
|
||||
row[key + "Y"][sequence_index] = normalized_y
|
||||
|
||||
normalized_df = normalized_df.append(row, ignore_index=True)
|
||||
|
||||
return normalized_df
|
||||
|
||||
|
||||
def normalize_single_dict(row: dict):
|
||||
"""
|
||||
Normalizes the skeletal data for a given sequence of frames with signer's hand pose data. The normalization follows
|
||||
the definition from our paper.
|
||||
|
||||
:param row: Dictionary containing key-value pairs with joint identifiers and corresponding lists (sequences) of
|
||||
that particular joints coordinates
|
||||
:return: Dictionary with normalized skeletal data (following the same schema as input data)
|
||||
"""
|
||||
|
||||
hand_landmarks = {0: [], 1: []}
|
||||
|
||||
# Determine how many hands are present in the dataset
|
||||
range_hand_size = 1
|
||||
if "wrist_1" in row.keys():
|
||||
range_hand_size = 2
|
||||
|
||||
# Construct the relevant identifiers
|
||||
for identifier in HAND_IDENTIFIERS:
|
||||
for hand_index in range(range_hand_size):
|
||||
hand_landmarks[hand_index].append(identifier + "_" + str(hand_index))
|
||||
|
||||
# Treat each hand individually
|
||||
for hand_index in range(range_hand_size):
|
||||
|
||||
sequence_size = len(row["wrist_" + str(hand_index)])
|
||||
|
||||
# Treat each element of the sequence (analyzed frame) individually
|
||||
for sequence_index in range(sequence_size):
|
||||
|
||||
# Retrieve all of the X and Y values of the current frame
|
||||
landmarks_x_values = [row[key][sequence_index][0] for key in hand_landmarks[hand_index] if
|
||||
row[key][sequence_index][0] != 0]
|
||||
landmarks_y_values = [row[key][sequence_index][1] for key in hand_landmarks[hand_index] if
|
||||
row[key][sequence_index][1] != 0]
|
||||
|
||||
# Prevent from even starting the analysis if some necessary elements are not present
|
||||
if not landmarks_x_values or not landmarks_y_values:
|
||||
continue
|
||||
|
||||
# Calculate the deltas
|
||||
width, height = max(landmarks_x_values) - min(landmarks_x_values), max(landmarks_y_values) - min(
|
||||
landmarks_y_values)
|
||||
if width > height:
|
||||
delta_x = 0.1 * width
|
||||
delta_y = delta_x + ((width - height) / 2)
|
||||
else:
|
||||
delta_y = 0.1 * height
|
||||
delta_x = delta_y + ((height - width) / 2)
|
||||
|
||||
# Set the starting and ending point of the normalization bounding box
|
||||
starting_point = (min(landmarks_x_values) - delta_x, min(landmarks_y_values) - delta_y)
|
||||
ending_point = (max(landmarks_x_values) + delta_x, max(landmarks_y_values) + delta_y)
|
||||
|
||||
# Normalize individual landmarks and save the results
|
||||
for identifier in HAND_IDENTIFIERS:
|
||||
key = identifier + "_" + str(hand_index)
|
||||
|
||||
# Prevent from trying to normalize incorrectly captured points
|
||||
if row[key][sequence_index][0] == 0 or (ending_point[0] - starting_point[0]) == 0 or (
|
||||
starting_point[1] - ending_point[1]) == 0:
|
||||
continue
|
||||
|
||||
normalized_x = (row[key][sequence_index][0] - starting_point[0]) / (ending_point[0] -
|
||||
starting_point[0])
|
||||
normalized_y = (row[key][sequence_index][1] - starting_point[1]) / (ending_point[1] -
|
||||
starting_point[1])
|
||||
|
||||
row[key][sequence_index] = list(row[key][sequence_index])
|
||||
|
||||
row[key][sequence_index][0] = normalized_x
|
||||
row[key][sequence_index][1] = normalized_y
|
||||
|
||||
return row
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
47
normalization/main.py
Normal file
47
normalization/main.py
Normal file
@@ -0,0 +1,47 @@
|
||||
import os
|
||||
import ast
|
||||
import pandas as pd
|
||||
|
||||
from normalization.hand_normalization import normalize_hands_full
|
||||
from normalization.body_normalization import normalize_body_full
|
||||
|
||||
DATASET_PATH = './data'
|
||||
# Load the dataset
|
||||
df = pd.read_csv(os.path.join(DATASET_PATH, "WLASL_test_15fps.csv"), encoding="utf-8")
|
||||
|
||||
# Retrieve metadata
|
||||
video_size_heights = df["video_size_height"].to_list()
|
||||
video_size_widths = df["video_size_width"].to_list()
|
||||
|
||||
# Delete redundant (non-related) properties
|
||||
del df["video_size_height"]
|
||||
del df["video_size_width"]
|
||||
|
||||
# Temporarily remove other relevant metadata
|
||||
labels = df["labels"].to_list()
|
||||
video_fps = df["video_fps"].to_list()
|
||||
del df["labels"]
|
||||
del df["video_fps"]
|
||||
|
||||
# Convert the strings into lists
|
||||
|
||||
|
||||
def convert(x): return ast.literal_eval(str(x))
|
||||
|
||||
|
||||
for column in df.columns:
|
||||
df[column] = df[column].apply(convert)
|
||||
|
||||
# Perform the normalizations
|
||||
df = normalize_hands_full(df)
|
||||
df, invalid_row_indexes = normalize_body_full(df)
|
||||
|
||||
# Clear lists of items from deleted rows
|
||||
# labels = [t for i, t in enumerate(labels) if i not in invalid_row_indexes]
|
||||
# video_fps = [t for i, t in enumerate(video_fps) if i not in invalid_row_indexes]
|
||||
|
||||
# Return the metadata back to the dataset
|
||||
df["labels"] = labels
|
||||
df["video_fps"] = video_fps
|
||||
|
||||
df.to_csv(os.path.join(DATASET_PATH, "WLASL_test_15fps_normalized.csv"), encoding="utf-8", index=False)
|
||||
Reference in New Issue
Block a user