Initial codebase (#1)
* Add project code * Logger improvements * Improvements to web demo code * added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py * Fix rotation augmentation * fixed error in docstring, and removed unnecessary replace -1 -> 0 * Readme updates * Share base notebooks * Add notebooks and unify for different datasets * requirements update * fixes * Make evaluate more deterministic * Allow training with clearml * refactor preprocessing and apply linter * Minor fixes * Minor notebook tweaks * Readme updates * Fix PR comments * Remove unneeded code * Add banner to Readme --------- Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
This commit is contained in:
3
datasets/__init__.py
Normal file
3
datasets/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .czech_slr_dataset import CzechSLRDataset
|
||||
from .embedding_dataset import SLREmbeddingDataset
|
||||
from .datasets_utils import collate_fn_triplet_padd, collate_fn_padd
|
||||
8
datasets/clearml_dataset_loader.py
Normal file
8
datasets/clearml_dataset_loader.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from clearml import Dataset
|
||||
from .dataset_loader import DatasetLoader
|
||||
|
||||
|
||||
class ClearMLDatasetLoader(DatasetLoader):
|
||||
|
||||
def get_dataset_folder(self, dataset_project, dataset_name):
|
||||
return Dataset.get(dataset_project=dataset_project, dataset_name=dataset_name).get_local_copy()
|
||||
72
datasets/czech_slr_dataset.py
Normal file
72
datasets/czech_slr_dataset.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import torch.utils.data as torch_data
|
||||
|
||||
from datasets.datasets_utils import load_dataset, tensor_to_dictionary, dictionary_to_tensor, \
|
||||
random_augmentation
|
||||
from normalization.body_normalization import normalize_single_dict as normalize_single_body_dict
|
||||
from normalization.hand_normalization import normalize_single_dict as normalize_single_hand_dict
|
||||
|
||||
|
||||
class CzechSLRDataset(torch_data.Dataset):
|
||||
"""Advanced object representation of the HPOES dataset for loading hand joints landmarks utilizing the Torch's
|
||||
built-in Dataset properties"""
|
||||
|
||||
data: [np.ndarray]
|
||||
labels: [np.ndarray]
|
||||
|
||||
def __init__(self, dataset_filename: str, num_labels=5, transform=None, augmentations=False,
|
||||
augmentations_prob=0.5, normalize=True):
|
||||
"""
|
||||
Initiates the HPOESDataset with the pre-loaded data from the h5 file.
|
||||
|
||||
:param dataset_filename: Path to the h5 file
|
||||
:param transform: Any data transformation to be applied (default: None)
|
||||
"""
|
||||
|
||||
loaded_data = load_dataset(dataset_filename)
|
||||
data, labels = loaded_data[0], loaded_data[1]
|
||||
|
||||
self.data = data
|
||||
self.labels = labels
|
||||
self.targets = list(labels)
|
||||
self.num_labels = num_labels
|
||||
self.transform = transform
|
||||
|
||||
self.augmentations = augmentations
|
||||
self.augmentations_prob = augmentations_prob
|
||||
self.normalize = normalize
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""
|
||||
Allocates, potentially transforms and returns the item at the desired index.
|
||||
|
||||
:param idx: Index of the item
|
||||
:return: Tuple containing both the depth map and the label
|
||||
"""
|
||||
|
||||
depth_map = torch.from_numpy(np.copy(self.data[idx]))
|
||||
# label = torch.Tensor([self.labels[idx] - 1])
|
||||
label = torch.Tensor([self.labels[idx]])
|
||||
|
||||
depth_map = tensor_to_dictionary(depth_map)
|
||||
|
||||
# Apply potential augmentations
|
||||
depth_map = random_augmentation(self.augmentations, self.augmentations_prob, depth_map)
|
||||
|
||||
if self.normalize:
|
||||
depth_map = normalize_single_body_dict(depth_map)
|
||||
depth_map = normalize_single_hand_dict(depth_map)
|
||||
|
||||
depth_map = dictionary_to_tensor(depth_map)
|
||||
|
||||
# Move the landmark position interval to improve performance
|
||||
depth_map = depth_map - 0.5
|
||||
|
||||
if self.transform:
|
||||
depth_map = self.transform(depth_map)
|
||||
|
||||
return depth_map, label
|
||||
|
||||
def __len__(self):
|
||||
return len(self.labels)
|
||||
17
datasets/dataset_loader.py
Normal file
17
datasets/dataset_loader.py
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
import os
|
||||
|
||||
|
||||
class DatasetLoader():
|
||||
"""Abstract class that serves to load datasets from different sources (local, ClearML, other tracker)
|
||||
"""
|
||||
|
||||
def get_dataset_folder(self, dataset_project, dataset_name):
|
||||
return NotImplementedError()
|
||||
|
||||
|
||||
class LocalDatasetLoader(DatasetLoader):
|
||||
|
||||
def get_dataset_folder(self, dataset_project, dataset_name):
|
||||
base_folder = os.environ.get("BASE_DATA_FOLDER", "data")
|
||||
return os.path.join(base_folder, dataset_name)
|
||||
133
datasets/datasets_utils.py
Normal file
133
datasets/datasets_utils.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import pandas as pd
|
||||
import ast
|
||||
import torch
|
||||
import random
|
||||
import numpy as np
|
||||
from torch.nn.utils.rnn import pad_sequence
|
||||
from random import randrange
|
||||
|
||||
from augmentations import augment_arm_joint_rotate, augment_rotate, augment_shear
|
||||
from normalization.body_normalization import BODY_IDENTIFIERS
|
||||
from augmentations.augment import HAND_IDENTIFIERS
|
||||
|
||||
|
||||
def load_dataset(file_location: str):
|
||||
|
||||
# Load the datset csv file
|
||||
df = pd.read_csv(file_location, encoding="utf-8")
|
||||
df.columns = [item.replace("_left_", "_0_").replace("_right_", "_1_") for item in list(df.columns)]
|
||||
|
||||
# TEMP
|
||||
labels = df["labels"].to_list()
|
||||
|
||||
data = []
|
||||
|
||||
for row_index, row in df.iterrows():
|
||||
current_row = np.empty(shape=(len(ast.literal_eval(row["leftEar_X"])),
|
||||
len(BODY_IDENTIFIERS + HAND_IDENTIFIERS),
|
||||
2)
|
||||
)
|
||||
for index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
|
||||
current_row[:, index, 0] = ast.literal_eval(row[identifier + "_X"])
|
||||
current_row[:, index, 1] = ast.literal_eval(row[identifier + "_Y"])
|
||||
|
||||
data.append(current_row)
|
||||
|
||||
return data, labels
|
||||
|
||||
|
||||
def tensor_to_dictionary(landmarks_tensor: torch.Tensor) -> dict:
|
||||
|
||||
data_array = landmarks_tensor.numpy()
|
||||
output = {}
|
||||
|
||||
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
|
||||
output[identifier] = data_array[:, landmark_index]
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def dictionary_to_tensor(landmarks_dict: dict) -> torch.Tensor:
|
||||
|
||||
output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS + HAND_IDENTIFIERS), 2))
|
||||
|
||||
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
|
||||
output[:, landmark_index, 0] = [frame[0] for frame in landmarks_dict[identifier]]
|
||||
output[:, landmark_index, 1] = [frame[1] for frame in landmarks_dict[identifier]]
|
||||
|
||||
return torch.from_numpy(output)
|
||||
|
||||
|
||||
def random_augmentation(augmentations, augmentations_prob, depth_map):
|
||||
if augmentations and random.random() < augmentations_prob:
|
||||
selected_aug = randrange(4)
|
||||
if selected_aug == 0:
|
||||
depth_map = augment_arm_joint_rotate(depth_map, 0.3, (-4, 4))
|
||||
elif selected_aug == 1:
|
||||
depth_map = augment_shear(depth_map, "perspective", (0, 0.1))
|
||||
elif selected_aug == 2:
|
||||
depth_map = augment_shear(depth_map, "squeeze", (0, 0.15))
|
||||
elif selected_aug == 3:
|
||||
depth_map = augment_rotate(depth_map, (-13, 13))
|
||||
|
||||
return depth_map
|
||||
|
||||
|
||||
def collate_fn_triplet_padd(batch):
|
||||
'''
|
||||
Padds batch of variable length
|
||||
|
||||
note: it converts things ToTensor manually here since the ToTensor transform
|
||||
assume it takes in images rather than arbitrary tensors.
|
||||
'''
|
||||
# batch: list of length batch_size, each element contains ouput of dataset
|
||||
# MASKING
|
||||
anchor_lengths = [element[0].shape[0] for element in batch]
|
||||
max_anchor_l = max(anchor_lengths)
|
||||
positive_lengths = [element[1].shape[0] for element in batch]
|
||||
max_positive_l = max(positive_lengths)
|
||||
negative_lengths = [element[2].shape[0] for element in batch]
|
||||
max_negative_l = max(negative_lengths)
|
||||
|
||||
anchor_mask = [[False] * anchor_lengths[n] + [True] * (max_anchor_l - anchor_lengths[n])
|
||||
for n in range(len(batch))]
|
||||
positive_mask = [[False] * positive_lengths[n] + [True] * (max_positive_l - positive_lengths[n])
|
||||
for n in range(len(batch))]
|
||||
negative_mask = [[False] * negative_lengths[n] + [True] * (max_negative_l - negative_lengths[n])
|
||||
for n in range(len(batch))]
|
||||
|
||||
# PADDING
|
||||
anchor_batch = [element[0] for element in batch]
|
||||
positive_batch = [element[1] for element in batch]
|
||||
negative_batch = [element[2] for element in batch]
|
||||
|
||||
anchor_batch = pad_sequence(anchor_batch, batch_first=True)
|
||||
positive_batch = pad_sequence(positive_batch, batch_first=True)
|
||||
negative_batch = pad_sequence(negative_batch, batch_first=True)
|
||||
|
||||
return anchor_batch, positive_batch, negative_batch, \
|
||||
torch.Tensor(anchor_mask), torch.Tensor(positive_mask), torch.Tensor(negative_mask)
|
||||
|
||||
|
||||
def collate_fn_padd(batch):
|
||||
'''
|
||||
Padds batch of variable length
|
||||
|
||||
note: it converts things ToTensor manually here since the ToTensor transform
|
||||
assume it takes in images rather than arbitrary tensors.
|
||||
'''
|
||||
# batch: list of length batch_size, each element contains ouput of dataset
|
||||
# MASKING
|
||||
anchor_lengths = [element[0].shape[0] for element in batch]
|
||||
max_anchor_l = max(anchor_lengths)
|
||||
|
||||
anchor_mask = [[False] * anchor_lengths[n] + [True] * (max_anchor_l - anchor_lengths[n])
|
||||
for n in range(len(batch))]
|
||||
|
||||
# PADDING
|
||||
anchor_batch = [element[0] for element in batch]
|
||||
anchor_batch = pad_sequence(anchor_batch, batch_first=True)
|
||||
|
||||
labels = torch.Tensor([element[1] for element in batch])
|
||||
|
||||
return anchor_batch, labels, torch.Tensor(anchor_mask)
|
||||
103
datasets/embedding_dataset.py
Normal file
103
datasets/embedding_dataset.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import torch
|
||||
import torch.utils.data as torch_data
|
||||
from random import sample
|
||||
from typing import List
|
||||
import numpy as np
|
||||
|
||||
from datasets.datasets_utils import load_dataset, tensor_to_dictionary, dictionary_to_tensor, \
|
||||
random_augmentation
|
||||
from normalization.body_normalization import normalize_single_dict as normalize_single_body_dict
|
||||
from normalization.hand_normalization import normalize_single_dict as normalize_single_hand_dict
|
||||
|
||||
|
||||
class SLREmbeddingDataset(torch_data.Dataset):
|
||||
"""Advanced object representation of the WLASL dataset for loading triplet used in triplet loss utilizing the
|
||||
Torch's built-in Dataset properties"""
|
||||
|
||||
data: List[np.ndarray]
|
||||
labels: List[np.ndarray]
|
||||
|
||||
def __init__(self, dataset_filename: str, triplet=True, transform=None, augmentations=False,
|
||||
augmentations_prob=0.5, normalize=True):
|
||||
"""
|
||||
Initiates the HPOESDataset with the pre-loaded data from the h5 file.
|
||||
|
||||
:param dataset_filename: Path to the h5 file
|
||||
:param transform: Any data transformation to be applied (default: None)
|
||||
"""
|
||||
|
||||
loaded_data = load_dataset(dataset_filename)
|
||||
data, labels = loaded_data[0], loaded_data[1]
|
||||
|
||||
self.data = data
|
||||
self.labels = labels
|
||||
self.targets = list(labels)
|
||||
self.transform = transform
|
||||
self.triplet = triplet
|
||||
self.augmentations = augmentations
|
||||
self.augmentations_prob = augmentations_prob
|
||||
self.normalize = normalize
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""
|
||||
Allocates, potentially transforms and returns the item at the desired index.
|
||||
|
||||
:param idx: Index of the item
|
||||
:return: Tuple containing both the depth map and the label
|
||||
"""
|
||||
depth_map_a = torch.from_numpy(np.copy(self.data[idx]))
|
||||
label = torch.Tensor([self.labels[idx]])
|
||||
|
||||
depth_map_a = tensor_to_dictionary(depth_map_a)
|
||||
|
||||
if self.triplet:
|
||||
positive_indexes = list(np.where(np.array(self.labels) == self.labels[idx])[0])
|
||||
positive_index_sample = sample(positive_indexes, 2)
|
||||
positive_index = positive_index_sample[0] if positive_index_sample[0] != idx else positive_index_sample[1]
|
||||
negative_indexes = list(np.where(np.array(self.labels) != self.labels[idx])[0])
|
||||
negative_index = sample(negative_indexes, 1)[0]
|
||||
# TODO: implement hard triplets
|
||||
|
||||
depth_map_p = torch.from_numpy(np.copy(self.data[positive_index]))
|
||||
depth_map_n = torch.from_numpy(np.copy(self.data[negative_index]))
|
||||
|
||||
depth_map_p = tensor_to_dictionary(depth_map_p)
|
||||
depth_map_n = tensor_to_dictionary(depth_map_n)
|
||||
|
||||
# TODO: Add Data augmentation to positive and negative ?
|
||||
|
||||
# Apply potential augmentations
|
||||
depth_map_a = random_augmentation(self.augmentations, self.augmentations_prob, depth_map_a)
|
||||
|
||||
if self.normalize:
|
||||
depth_map_a = normalize_single_body_dict(depth_map_a)
|
||||
depth_map_a = normalize_single_hand_dict(depth_map_a)
|
||||
if self.triplet:
|
||||
depth_map_p = normalize_single_body_dict(depth_map_p)
|
||||
depth_map_p = normalize_single_hand_dict(depth_map_p)
|
||||
depth_map_n = normalize_single_body_dict(depth_map_n)
|
||||
depth_map_n = normalize_single_hand_dict(depth_map_n)
|
||||
|
||||
depth_map_a = dictionary_to_tensor(depth_map_a)
|
||||
# Move the landmark position interval to improve performance
|
||||
depth_map_a = depth_map_a - 0.5
|
||||
|
||||
if self.triplet:
|
||||
depth_map_p = dictionary_to_tensor(depth_map_p)
|
||||
depth_map_p = depth_map_p - 0.5
|
||||
depth_map_n = dictionary_to_tensor(depth_map_n)
|
||||
depth_map_n = depth_map_n - 0.5
|
||||
|
||||
if self.transform:
|
||||
depth_map_a = self.transform(depth_map_a)
|
||||
if self.triplet:
|
||||
depth_map_p = self.transform(depth_map_p)
|
||||
depth_map_n = self.transform(depth_map_n)
|
||||
|
||||
if self.triplet:
|
||||
return depth_map_a, depth_map_p, depth_map_n
|
||||
|
||||
return depth_map_a, label
|
||||
|
||||
def __len__(self):
|
||||
return len(self.labels)
|
||||
Reference in New Issue
Block a user