Initial codebase (#1)

* Add project code * Logger improvements * Improvements to web demo code * added create_wlasl_landmarks_dataset.py and xtract_mediapipe_landmarks.py * Fix rotation augmentation * fixed error in docstring, and removed unnecessary replace -1 -> 0 * Readme updates * Share base notebooks * Add notebooks and unify for different datasets * requirements update * fixes * Make evaluate more deterministic * Allow training with clearml * refactor preprocessing and apply linter * Minor fixes * Minor notebook tweaks * Readme updates * Fix PR comments * Remove unneeded code * Add banner to Readme --------- Co-authored-by: Gabriel Lema <gabriel.lema@xmartlabs.com>
2023-03-03 10:07:54 -03:00
parent 661e4bbc03
commit 81bbf66aab
49 changed files with 4205 additions and 0 deletions
--- a/datasets/init.py
+++ b/datasets/init.py
@@ -0,0 +1,3 @@
+from .czech_slr_dataset import CzechSLRDataset
+from .embedding_dataset import SLREmbeddingDataset
+from .datasets_utils import collate_fn_triplet_padd, collate_fn_padd
--- a/datasets/clearml_dataset_loader.py
+++ b/datasets/clearml_dataset_loader.py
@@ -0,0 +1,8 @@
+from clearml import Dataset
+from .dataset_loader import DatasetLoader
+
+
+class ClearMLDatasetLoader(DatasetLoader):
+
+    def get_dataset_folder(self, dataset_project, dataset_name):
+        return Dataset.get(dataset_project=dataset_project, dataset_name=dataset_name).get_local_copy()
--- a/datasets/czech_slr_dataset.py
+++ b/datasets/czech_slr_dataset.py
@@ -0,0 +1,72 @@
+import torch
+import numpy as np
+import torch.utils.data as torch_data
+
+from datasets.datasets_utils import load_dataset, tensor_to_dictionary, dictionary_to_tensor, \
+    random_augmentation
+from normalization.body_normalization import normalize_single_dict as normalize_single_body_dict
+from normalization.hand_normalization import normalize_single_dict as normalize_single_hand_dict
+
+
+class CzechSLRDataset(torch_data.Dataset):
+    """Advanced object representation of the HPOES dataset for loading hand joints landmarks utilizing the Torch's
+    built-in Dataset properties"""
+
+    data: [np.ndarray]
+    labels: [np.ndarray]
+
+    def __init__(self, dataset_filename: str, num_labels=5, transform=None, augmentations=False,
+                 augmentations_prob=0.5, normalize=True):
+        """
+        Initiates the HPOESDataset with the pre-loaded data from the h5 file.
+
+        :param dataset_filename: Path to the h5 file
+        :param transform: Any data transformation to be applied (default: None)
+        """
+
+        loaded_data = load_dataset(dataset_filename)
+        data, labels = loaded_data[0], loaded_data[1]
+
+        self.data = data
+        self.labels = labels
+        self.targets = list(labels)
+        self.num_labels = num_labels
+        self.transform = transform
+
+        self.augmentations = augmentations
+        self.augmentations_prob = augmentations_prob
+        self.normalize = normalize
+
+    def __getitem__(self, idx):
+        """
+        Allocates, potentially transforms and returns the item at the desired index.
+
+        :param idx: Index of the item
+        :return: Tuple containing both the depth map and the label
+        """
+
+        depth_map = torch.from_numpy(np.copy(self.data[idx]))
+        # label = torch.Tensor([self.labels[idx] - 1])
+        label = torch.Tensor([self.labels[idx]])
+
+        depth_map = tensor_to_dictionary(depth_map)
+
+        # Apply potential augmentations
+        depth_map = random_augmentation(self.augmentations, self.augmentations_prob, depth_map)
+
+        if self.normalize:
+            depth_map = normalize_single_body_dict(depth_map)
+            depth_map = normalize_single_hand_dict(depth_map)
+
+        depth_map = dictionary_to_tensor(depth_map)
+
+        # Move the landmark position interval to improve performance
+        depth_map = depth_map - 0.5
+
+        if self.transform:
+            depth_map = self.transform(depth_map)
+
+        return depth_map, label
+
+    def __len__(self):
+        return len(self.labels)
--- a/datasets/dataset_loader.py
+++ b/datasets/dataset_loader.py
@@ -0,0 +1,17 @@
+
+import os
+
+
+class DatasetLoader():
+    """Abstract class that serves to load datasets from different sources (local, ClearML, other tracker)
+    """
+
+    def get_dataset_folder(self, dataset_project, dataset_name):
+        return NotImplementedError()
+
+
+class LocalDatasetLoader(DatasetLoader):
+
+    def get_dataset_folder(self, dataset_project, dataset_name):
+        base_folder = os.environ.get("BASE_DATA_FOLDER", "data")
+        return os.path.join(base_folder, dataset_name)
--- a/datasets/datasets_utils.py
+++ b/datasets/datasets_utils.py
@@ -0,0 +1,133 @@
+import pandas as pd
+import ast
+import torch
+import random
+import numpy as np
+from torch.nn.utils.rnn import pad_sequence
+from random import randrange
+
+from augmentations import augment_arm_joint_rotate, augment_rotate, augment_shear
+from normalization.body_normalization import BODY_IDENTIFIERS
+from augmentations.augment import HAND_IDENTIFIERS
+
+
+def load_dataset(file_location: str):
+
+    # Load the datset csv file
+    df = pd.read_csv(file_location, encoding="utf-8")
+    df.columns = [item.replace("_left_", "_0_").replace("_right_", "_1_") for item in list(df.columns)]
+
+    # TEMP
+    labels = df["labels"].to_list()
+
+    data = []
+
+    for row_index, row in df.iterrows():
+        current_row = np.empty(shape=(len(ast.literal_eval(row["leftEar_X"])),
+                                      len(BODY_IDENTIFIERS + HAND_IDENTIFIERS),
+                                      2)
+                               )
+        for index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
+            current_row[:, index, 0] = ast.literal_eval(row[identifier + "_X"])
+            current_row[:, index, 1] = ast.literal_eval(row[identifier + "_Y"])
+
+        data.append(current_row)
+
+    return data, labels
+
+
+def tensor_to_dictionary(landmarks_tensor: torch.Tensor) -> dict:
+
+    data_array = landmarks_tensor.numpy()
+    output = {}
+
+    for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
+        output[identifier] = data_array[:, landmark_index]
+
+    return output
+
+
+def dictionary_to_tensor(landmarks_dict: dict) -> torch.Tensor:
+
+    output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS + HAND_IDENTIFIERS), 2))
+
+    for landmark_index, identifier in enumerate(BODY_IDENTIFIERS + HAND_IDENTIFIERS):
+        output[:, landmark_index, 0] = [frame[0] for frame in landmarks_dict[identifier]]
+        output[:, landmark_index, 1] = [frame[1] for frame in landmarks_dict[identifier]]
+
+    return torch.from_numpy(output)
+
+
+def random_augmentation(augmentations, augmentations_prob, depth_map):
+    if augmentations and random.random() < augmentations_prob:
+        selected_aug = randrange(4)
+        if selected_aug == 0:
+            depth_map = augment_arm_joint_rotate(depth_map, 0.3, (-4, 4))
+        elif selected_aug == 1:
+            depth_map = augment_shear(depth_map, "perspective", (0, 0.1))
+        elif selected_aug == 2:
+            depth_map = augment_shear(depth_map, "squeeze", (0, 0.15))
+        elif selected_aug == 3:
+            depth_map = augment_rotate(depth_map, (-13, 13))
+
+    return depth_map
+
+
+def collate_fn_triplet_padd(batch):
+    '''
+    Padds batch of variable length
+
+    note: it converts things ToTensor manually here since the ToTensor transform
+    assume it takes in images rather than arbitrary tensors.
+    '''
+    # batch: list of length batch_size, each element contains ouput of dataset
+    # MASKING
+    anchor_lengths = [element[0].shape[0] for element in batch]
+    max_anchor_l = max(anchor_lengths)
+    positive_lengths = [element[1].shape[0] for element in batch]
+    max_positive_l = max(positive_lengths)
+    negative_lengths = [element[2].shape[0] for element in batch]
+    max_negative_l = max(negative_lengths)
+
+    anchor_mask = [[False] * anchor_lengths[n] + [True] * (max_anchor_l - anchor_lengths[n])
+                   for n in range(len(batch))]
+    positive_mask = [[False] * positive_lengths[n] + [True] * (max_positive_l - positive_lengths[n])
+                     for n in range(len(batch))]
+    negative_mask = [[False] * negative_lengths[n] + [True] * (max_negative_l - negative_lengths[n])
+                     for n in range(len(batch))]
+
+    # PADDING
+    anchor_batch = [element[0] for element in batch]
+    positive_batch = [element[1] for element in batch]
+    negative_batch = [element[2] for element in batch]
+
+    anchor_batch = pad_sequence(anchor_batch, batch_first=True)
+    positive_batch = pad_sequence(positive_batch, batch_first=True)
+    negative_batch = pad_sequence(negative_batch, batch_first=True)
+
+    return anchor_batch, positive_batch, negative_batch, \
+        torch.Tensor(anchor_mask), torch.Tensor(positive_mask), torch.Tensor(negative_mask)
+
+
+def collate_fn_padd(batch):
+    '''
+    Padds batch of variable length
+
+    note: it converts things ToTensor manually here since the ToTensor transform
+    assume it takes in images rather than arbitrary tensors.
+    '''
+    # batch: list of length batch_size, each element contains ouput of dataset
+    # MASKING
+    anchor_lengths = [element[0].shape[0] for element in batch]
+    max_anchor_l = max(anchor_lengths)
+
+    anchor_mask = [[False] * anchor_lengths[n] + [True] * (max_anchor_l - anchor_lengths[n])
+                   for n in range(len(batch))]
+
+    # PADDING
+    anchor_batch = [element[0] for element in batch]
+    anchor_batch = pad_sequence(anchor_batch, batch_first=True)
+
+    labels = torch.Tensor([element[1] for element in batch])
+
+    return anchor_batch, labels, torch.Tensor(anchor_mask)
--- a/datasets/embedding_dataset.py
+++ b/datasets/embedding_dataset.py
@@ -0,0 +1,103 @@
+import torch
+import torch.utils.data as torch_data
+from random import sample
+from typing import List
+import numpy as np
+
+from datasets.datasets_utils import load_dataset, tensor_to_dictionary, dictionary_to_tensor, \
+    random_augmentation
+from normalization.body_normalization import normalize_single_dict as normalize_single_body_dict
+from normalization.hand_normalization import normalize_single_dict as normalize_single_hand_dict
+
+
+class SLREmbeddingDataset(torch_data.Dataset):
+    """Advanced object representation of the WLASL dataset for loading triplet used in triplet loss utilizing the
+    Torch's built-in Dataset properties"""
+
+    data: List[np.ndarray]
+    labels: List[np.ndarray]
+
+    def __init__(self, dataset_filename: str, triplet=True, transform=None, augmentations=False,
+                 augmentations_prob=0.5, normalize=True):
+        """
+        Initiates the HPOESDataset with the pre-loaded data from the h5 file.
+
+        :param dataset_filename: Path to the h5 file
+        :param transform: Any data transformation to be applied (default: None)
+        """
+
+        loaded_data = load_dataset(dataset_filename)
+        data, labels = loaded_data[0], loaded_data[1]
+
+        self.data = data
+        self.labels = labels
+        self.targets = list(labels)
+        self.transform = transform
+        self.triplet = triplet
+        self.augmentations = augmentations
+        self.augmentations_prob = augmentations_prob
+        self.normalize = normalize
+
+    def __getitem__(self, idx):
+        """
+        Allocates, potentially transforms and returns the item at the desired index.
+
+        :param idx: Index of the item
+        :return: Tuple containing both the depth map and the label
+        """
+        depth_map_a = torch.from_numpy(np.copy(self.data[idx]))
+        label = torch.Tensor([self.labels[idx]])
+
+        depth_map_a = tensor_to_dictionary(depth_map_a)
+
+        if self.triplet:
+            positive_indexes = list(np.where(np.array(self.labels) == self.labels[idx])[0])
+            positive_index_sample = sample(positive_indexes, 2)
+            positive_index = positive_index_sample[0] if positive_index_sample[0] != idx else positive_index_sample[1]
+            negative_indexes = list(np.where(np.array(self.labels) != self.labels[idx])[0])
+            negative_index = sample(negative_indexes, 1)[0]
+            # TODO: implement hard triplets
+
+            depth_map_p = torch.from_numpy(np.copy(self.data[positive_index]))
+            depth_map_n = torch.from_numpy(np.copy(self.data[negative_index]))
+
+            depth_map_p = tensor_to_dictionary(depth_map_p)
+            depth_map_n = tensor_to_dictionary(depth_map_n)
+
+        # TODO: Add Data augmentation to positive and negative ?
+
+        # Apply potential augmentations
+        depth_map_a = random_augmentation(self.augmentations, self.augmentations_prob, depth_map_a)
+
+        if self.normalize:
+            depth_map_a = normalize_single_body_dict(depth_map_a)
+            depth_map_a = normalize_single_hand_dict(depth_map_a)
+            if self.triplet:
+                depth_map_p = normalize_single_body_dict(depth_map_p)
+                depth_map_p = normalize_single_hand_dict(depth_map_p)
+                depth_map_n = normalize_single_body_dict(depth_map_n)
+                depth_map_n = normalize_single_hand_dict(depth_map_n)
+
+        depth_map_a = dictionary_to_tensor(depth_map_a)
+        # Move the landmark position interval to improve performance
+        depth_map_a = depth_map_a - 0.5
+
+        if self.triplet:
+            depth_map_p = dictionary_to_tensor(depth_map_p)
+            depth_map_p = depth_map_p - 0.5
+            depth_map_n = dictionary_to_tensor(depth_map_n)
+            depth_map_n = depth_map_n - 0.5
+
+        if self.transform:
+            depth_map_a = self.transform(depth_map_a)
+            if self.triplet:
+                depth_map_p = self.transform(depth_map_p)
+                depth_map_n = self.transform(depth_map_n)
+
+        if self.triplet:
+            return depth_map_a, depth_map_p, depth_map_n
+
+        return depth_map_a, label
+
+    def __len__(self):
+        return len(self.labels)