Initial Commit
This commit is contained in:
21
Dockerfile
21
Dockerfile
@@ -1,13 +1,8 @@
|
|||||||
FROM pytorch/pytorch
|
FROM ubuntu:20.04
|
||||||
|
ADD requirements.txt /requirements.txt
|
||||||
WORKDIR /app
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
COPY ./requirements.txt /app/
|
RUN apt-get update
|
||||||
|
RUN apt-get install ffmpeg libsm6 libxext6 git -y
|
||||||
RUN pip install -r requirements.txt
|
RUN apt-get install -y libglib2.0-0
|
||||||
RUN apt-get -y update
|
RUN apt-get -y install python3-pip
|
||||||
RUN apt-get -y install git
|
RUN pip install -r /requirements.txt
|
||||||
RUN apt-get install ffmpeg libsm6 libxext6 -y
|
|
||||||
|
|
||||||
COPY . /app/
|
|
||||||
RUN git config --global --add safe.directory /app
|
|
||||||
CMD ./train.sh
|
|
||||||
@@ -30,6 +30,7 @@ class CzechSLRDataset(torch_data.Dataset):
|
|||||||
self.data = data
|
self.data = data
|
||||||
self.labels = labels
|
self.labels = labels
|
||||||
self.targets = list(labels)
|
self.targets = list(labels)
|
||||||
|
|
||||||
self.num_labels = num_labels
|
self.num_labels = num_labels
|
||||||
self.transform = transform
|
self.transform = transform
|
||||||
|
|
||||||
|
|||||||
20
export_label_id.py
Normal file
20
export_label_id.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
# read data/wlasl/wlasl_class_list.txt
|
||||||
|
|
||||||
|
labels = {}
|
||||||
|
with open("data/sign_to_prediction_index_map.json", "r") as f:
|
||||||
|
sign_to_prediction_index_map = json.load(f)
|
||||||
|
|
||||||
|
# switch key and value
|
||||||
|
for key, value in sign_to_prediction_index_map.items():
|
||||||
|
labels[value] = key
|
||||||
|
|
||||||
|
|
||||||
|
if os.path.exists("data/processed/id_to_label.json"):
|
||||||
|
os.remove("data/processed/id_to_label.json")
|
||||||
|
|
||||||
|
with open("data/processed/id_to_label.json", "w") as f:
|
||||||
|
json.dump(labels, f)
|
||||||
|
|
||||||
|
|
||||||
@@ -12,12 +12,14 @@ def train_epoch(model, dataloader, criterion, optimizer, device, scheduler=None)
|
|||||||
running_loss = 0.0
|
running_loss = 0.0
|
||||||
model.train(True)
|
model.train(True)
|
||||||
for i, data in enumerate(dataloader):
|
for i, data in enumerate(dataloader):
|
||||||
|
|
||||||
inputs, labels = data
|
inputs, labels = data
|
||||||
inputs = inputs.squeeze(0).to(device)
|
inputs = inputs.squeeze(0).to(device)
|
||||||
labels = labels.to(device, dtype=torch.long)
|
labels = labels.to(device, dtype=torch.long)
|
||||||
|
|
||||||
optimizer.zero_grad()
|
optimizer.zero_grad()
|
||||||
outputs = model(inputs).expand(1, -1, -1)
|
outputs = model(inputs).expand(1, -1, -1)
|
||||||
|
|
||||||
loss = criterion(outputs[0], labels[0])
|
loss = criterion(outputs[0], labels[0])
|
||||||
loss.backward()
|
loss.backward()
|
||||||
optimizer.step()
|
optimizer.step()
|
||||||
@@ -159,7 +161,7 @@ def evaluate(model, dataloader, device, print_stats=False):
|
|||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
pred_correct, pred_all = 0, 0
|
pred_correct, pred_all = 0, 0
|
||||||
stats = {i: [0, 0] for i in range(101)}
|
stats = {i: [0, 0] for i in range(251)}
|
||||||
|
|
||||||
for i, data in enumerate(dataloader):
|
for i, data in enumerate(dataloader):
|
||||||
inputs, labels = data
|
inputs, labels = data
|
||||||
|
|||||||
@@ -62,23 +62,19 @@ def map_blazepose_keypoint(column):
|
|||||||
|
|
||||||
|
|
||||||
def map_blazepose_df(df):
|
def map_blazepose_df(df):
|
||||||
to_drop = []
|
|
||||||
renamings = {}
|
|
||||||
for column in df.columns:
|
|
||||||
mapped_column = map_blazepose_keypoint(column)
|
|
||||||
if mapped_column:
|
|
||||||
renamings[column] = mapped_column
|
|
||||||
else:
|
|
||||||
to_drop.append(column)
|
|
||||||
df = df.rename(columns=renamings)
|
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
for index, row in df.iterrows():
|
||||||
|
|
||||||
sequence_size = len(row["leftEar_Y"])
|
|
||||||
lsx = row["leftShoulder_X"]
|
lsx = row["leftShoulder_X"]
|
||||||
rsx = row["rightShoulder_X"]
|
rsx = row["rightShoulder_X"]
|
||||||
lsy = row["leftShoulder_Y"]
|
lsy = row["leftShoulder_Y"]
|
||||||
rsy = row["rightShoulder_Y"]
|
rsy = row["rightShoulder_Y"]
|
||||||
|
# convert all to list
|
||||||
|
lsx = lsx[1:-1].split(",")
|
||||||
|
rsx = rsx[1:-1].split(",")
|
||||||
|
lsy = lsy[1:-1].split(",")
|
||||||
|
rsy = rsy[1:-1].split(",")
|
||||||
|
sequence_size = len(lsx)
|
||||||
|
|
||||||
neck_x = []
|
neck_x = []
|
||||||
neck_y = []
|
neck_y = []
|
||||||
# Treat each element of the sequence (analyzed frame) individually
|
# Treat each element of the sequence (analyzed frame) individually
|
||||||
@@ -88,5 +84,4 @@ def map_blazepose_df(df):
|
|||||||
df.loc[index, "neck_X"] = str(neck_x)
|
df.loc[index, "neck_X"] = str(neck_x)
|
||||||
df.loc[index, "neck_Y"] = str(neck_y)
|
df.loc[index, "neck_Y"] = str(neck_y)
|
||||||
|
|
||||||
df.drop(columns=to_drop, inplace=True)
|
|
||||||
return df
|
return df
|
||||||
|
|||||||
@@ -5,23 +5,30 @@ import pandas as pd
|
|||||||
from normalization.hand_normalization import normalize_hands_full
|
from normalization.hand_normalization import normalize_hands_full
|
||||||
from normalization.body_normalization import normalize_body_full
|
from normalization.body_normalization import normalize_body_full
|
||||||
|
|
||||||
DATASET_PATH = './data'
|
DATASET_PATH = './data/wlasl'
|
||||||
# Load the dataset
|
# Load the dataset
|
||||||
df = pd.read_csv(os.path.join(DATASET_PATH, "WLASL_test_15fps.csv"), encoding="utf-8")
|
df = pd.read_csv(os.path.join(DATASET_PATH, "WLASL100_train.csv"), encoding="utf-8")
|
||||||
|
|
||||||
|
print(df.head())
|
||||||
|
print(df.columns)
|
||||||
|
|
||||||
# Retrieve metadata
|
# Retrieve metadata
|
||||||
video_size_heights = df["video_size_height"].to_list()
|
video_size_heights = df["video_height"].to_list()
|
||||||
video_size_widths = df["video_size_width"].to_list()
|
video_size_widths = df["video_width"].to_list()
|
||||||
|
|
||||||
# Delete redundant (non-related) properties
|
# Delete redundant (non-related) properties
|
||||||
del df["video_size_height"]
|
del df["video_height"]
|
||||||
del df["video_size_width"]
|
del df["video_width"]
|
||||||
|
|
||||||
# Temporarily remove other relevant metadata
|
# Temporarily remove other relevant metadata
|
||||||
labels = df["labels"].to_list()
|
labels = df["labels"].to_list()
|
||||||
video_fps = df["video_fps"].to_list()
|
video_fps = df["fps"].to_list()
|
||||||
del df["labels"]
|
del df["labels"]
|
||||||
del df["video_fps"]
|
del df["fps"]
|
||||||
|
del df["split"]
|
||||||
|
del df["video_id"]
|
||||||
|
del df["label_name"]
|
||||||
|
del df["length"]
|
||||||
|
|
||||||
# Convert the strings into lists
|
# Convert the strings into lists
|
||||||
|
|
||||||
@@ -42,6 +49,6 @@ df, invalid_row_indexes = normalize_body_full(df)
|
|||||||
|
|
||||||
# Return the metadata back to the dataset
|
# Return the metadata back to the dataset
|
||||||
df["labels"] = labels
|
df["labels"] = labels
|
||||||
df["video_fps"] = video_fps
|
df["fps"] = video_fps
|
||||||
|
|
||||||
df.to_csv(os.path.join(DATASET_PATH, "WLASL_test_15fps_normalized.csv"), encoding="utf-8", index=False)
|
df.to_csv(os.path.join(DATASET_PATH, "wlasl_train_norm.csv"), encoding="utf-8", index=False)
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
146
preprocessing/create_google_asl_landmarks_dataset.py
Normal file
146
preprocessing/create_google_asl_landmarks_dataset.py
Normal file
@@ -0,0 +1,146 @@
|
|||||||
|
import os
|
||||||
|
import os.path as op
|
||||||
|
import pandas as pd
|
||||||
|
from tqdm.auto import tqdm
|
||||||
|
import json
|
||||||
|
|
||||||
|
def create(train_landmark_files, train_csv, dataset_folder, test_size):
|
||||||
|
os.makedirs(dataset_folder, exist_ok=True)
|
||||||
|
|
||||||
|
# load json sign_to_prediciton_index_map.json
|
||||||
|
with open('data/sign_to_prediction_index_map.json', 'r') as f:
|
||||||
|
sign_to_prediction_index_map = json.load(f)
|
||||||
|
|
||||||
|
train_df = pd.read_csv(train_csv)
|
||||||
|
video_data = []
|
||||||
|
|
||||||
|
mapping = {
|
||||||
|
'pose_0': 'nose',
|
||||||
|
'pose_1': 'leftEye',
|
||||||
|
'pose_2': 'rightEye',
|
||||||
|
'pose_3': 'leftEar',
|
||||||
|
'pose_4': 'rightEar',
|
||||||
|
'pose_5': 'leftShoulder',
|
||||||
|
'pose_6': 'rightShoulder',
|
||||||
|
'pose_7': 'leftElbow',
|
||||||
|
'pose_8': 'rightElbow',
|
||||||
|
'pose_9': 'leftWrist',
|
||||||
|
'pose_10': 'rightWrist',
|
||||||
|
|
||||||
|
'left_hand_0': 'wrist_left',
|
||||||
|
'left_hand_1': 'thumbCMC_left',
|
||||||
|
'left_hand_2': 'thumbMP_left',
|
||||||
|
'left_hand_3': 'thumbIP_left',
|
||||||
|
'left_hand_4': 'thumbTip_left',
|
||||||
|
'left_hand_5': 'indexMCP_left',
|
||||||
|
'left_hand_6': 'indexPIP_left',
|
||||||
|
'left_hand_7': 'indexDIP_left',
|
||||||
|
'left_hand_8': 'indexTip_left',
|
||||||
|
'left_hand_9': 'middleMCP_left',
|
||||||
|
'left_hand_10': 'middlePIP_left',
|
||||||
|
'left_hand_11': 'middleDIP_left',
|
||||||
|
'left_hand_12': 'middleTip_left',
|
||||||
|
'left_hand_13': 'ringMCP_left',
|
||||||
|
'left_hand_14': 'ringPIP_left',
|
||||||
|
'left_hand_15': 'ringDIP_left',
|
||||||
|
'left_hand_16': 'ringTip_left',
|
||||||
|
'left_hand_17': 'littleMCP_left',
|
||||||
|
'left_hand_18': 'littlePIP_left',
|
||||||
|
'left_hand_19': 'littleDIP_left',
|
||||||
|
'left_hand_20': 'littleTip_left',
|
||||||
|
|
||||||
|
'right_hand_0': 'wrist_right',
|
||||||
|
'right_hand_1': 'thumbCMC_right',
|
||||||
|
'right_hand_2': 'thumbMP_right',
|
||||||
|
'right_hand_3': 'thumbIP_right',
|
||||||
|
'right_hand_4': 'thumbTip_right',
|
||||||
|
'right_hand_5': 'indexMCP_right',
|
||||||
|
'right_hand_6': 'indexPIP_right',
|
||||||
|
'right_hand_7': 'indexDIP_right',
|
||||||
|
'right_hand_8': 'indexTip_right',
|
||||||
|
'right_hand_9': 'middleMCP_right',
|
||||||
|
'right_hand_10': 'middlePIP_right',
|
||||||
|
'right_hand_11': 'middleDIP_right',
|
||||||
|
'right_hand_12': 'middleTip_right',
|
||||||
|
'right_hand_13': 'ringMCP_right',
|
||||||
|
'right_hand_14': 'ringPIP_right',
|
||||||
|
'right_hand_15': 'ringDIP_right',
|
||||||
|
'right_hand_16': 'ringTip_right',
|
||||||
|
'right_hand_17': 'littleMCP_right',
|
||||||
|
'right_hand_18': 'littlePIP_right',
|
||||||
|
'right_hand_19': 'littleDIP_right',
|
||||||
|
'right_hand_20': 'littleTip_right',
|
||||||
|
}
|
||||||
|
|
||||||
|
columns = []
|
||||||
|
for k,v in mapping.items():
|
||||||
|
columns.append(f'{v}_X')
|
||||||
|
columns.append(f'{v}_Y')
|
||||||
|
|
||||||
|
for _, row in tqdm(train_df.head(6000).iterrows(), total=6000):
|
||||||
|
path, participant_id, sequence_id, sign = row['path'], row['participant_id'], row['sequence_id'], row['sign']
|
||||||
|
parquet_file = os.path.join(train_landmark_files, str(participant_id), f"{sequence_id}.parquet")
|
||||||
|
|
||||||
|
if not os.path.exists(parquet_file):
|
||||||
|
print(f"{parquet_file} not found. Skipping.")
|
||||||
|
continue
|
||||||
|
|
||||||
|
landmark_data = pd.read_parquet(parquet_file)
|
||||||
|
|
||||||
|
# all nan to 0
|
||||||
|
landmark_data = landmark_data.fillna(0)
|
||||||
|
|
||||||
|
# create a new dataframe with the correct column names (each mapping with x and y coordinates)
|
||||||
|
new_landmark_data = pd.DataFrame(columns=columns)
|
||||||
|
|
||||||
|
# add each row of the parquet file to the correct column (use mapping based on {type}_{index})
|
||||||
|
# for each frame, construct the new row
|
||||||
|
|
||||||
|
frame_column = landmark_data['frame']
|
||||||
|
# get unique frames
|
||||||
|
frames = frame_column.unique()
|
||||||
|
# sort
|
||||||
|
frames.sort()
|
||||||
|
new_row = {}
|
||||||
|
|
||||||
|
for frame_id in frames:
|
||||||
|
# get all rows for this frame
|
||||||
|
frame_data = landmark_data.loc[landmark_data['frame'] == frame_id]
|
||||||
|
# construct new row
|
||||||
|
for _, row in frame_data.iterrows():
|
||||||
|
t = f"{row['type']}_{row['landmark_index']}"
|
||||||
|
if t in mapping:
|
||||||
|
c = mapping[t]
|
||||||
|
new_row.setdefault(f"{c}_X", []).append(row['x'])
|
||||||
|
new_row.setdefault(f"{c}_Y", []).append(row['y'])
|
||||||
|
|
||||||
|
|
||||||
|
d = pd.DataFrame({k: [v] for k, v in new_row.items()})
|
||||||
|
|
||||||
|
# add to new dataframe
|
||||||
|
new_landmark_data = pd.concat([new_landmark_data, d], axis=0, ignore_index=True)
|
||||||
|
|
||||||
|
# set nan values to 0
|
||||||
|
new_landmark_data = new_landmark_data.fillna(0)
|
||||||
|
|
||||||
|
video_dict = {'path': path,
|
||||||
|
'participant_id': participant_id,
|
||||||
|
'sequence_id': sequence_id,
|
||||||
|
'sign': sign,
|
||||||
|
'labels': sign_to_prediction_index_map[sign]
|
||||||
|
}
|
||||||
|
|
||||||
|
# add these columns to the landmark data using concat
|
||||||
|
new_landmark_data = pd.concat([pd.DataFrame(video_dict, index=[0]), new_landmark_data], axis=1)
|
||||||
|
|
||||||
|
video_data.append(new_landmark_data)
|
||||||
|
|
||||||
|
video_data = pd.concat(video_data, axis=0, ignore_index=True)
|
||||||
|
video_data.to_csv(os.path.join(dataset_folder, 'spoter.csv'), index=False)
|
||||||
|
|
||||||
|
train_landmark_files = 'data/train_landmark_files'
|
||||||
|
train_csv = 'data/train.csv'
|
||||||
|
dataset_folder = 'data/processed'
|
||||||
|
test_size = 0.25
|
||||||
|
|
||||||
|
create(train_landmark_files, train_csv, dataset_folder, test_size)
|
||||||
@@ -76,8 +76,8 @@ def create(args):
|
|||||||
|
|
||||||
os.makedirs(dataset_folder, exist_ok=True)
|
os.makedirs(dataset_folder, exist_ok=True)
|
||||||
|
|
||||||
shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/id_to_label.json'), dataset_folder)
|
# shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/id_to_label.json'), dataset_folder)
|
||||||
shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/WLASL_v0.3.json'), dataset_folder)
|
# shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/WLASL_v0.3.json'), dataset_folder)
|
||||||
|
|
||||||
wlasl_json_fn = op.join(dataset_folder, 'WLASL_v0.3.json')
|
wlasl_json_fn = op.join(dataset_folder, 'WLASL_v0.3.json')
|
||||||
|
|
||||||
|
|||||||
32
preprocessing/split_dataset.py
Normal file
32
preprocessing/split_dataset.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import json
|
||||||
|
|
||||||
|
from normalization.blazepose_mapping import map_blazepose_df
|
||||||
|
|
||||||
|
# split the dataset into train and test set
|
||||||
|
dataset = "data/processed/spoter.csv"
|
||||||
|
|
||||||
|
# read the dataset
|
||||||
|
df = pd.read_csv(dataset)
|
||||||
|
df = map_blazepose_df(df)
|
||||||
|
|
||||||
|
with open("data/sign_to_prediction_index_map.json", "r") as f:
|
||||||
|
sign_to_prediction_index_max = json.load(f)
|
||||||
|
|
||||||
|
|
||||||
|
# filter df to make sure each sign has at least 4 samples
|
||||||
|
df = df[df["sign"].map(df["sign"].value_counts()) > 4]
|
||||||
|
|
||||||
|
# use the path column to split the dataset
|
||||||
|
paths = df["path"].unique()
|
||||||
|
|
||||||
|
# split the dataset into train and test set
|
||||||
|
train_paths = paths[:int(len(paths) * 0.8)]
|
||||||
|
|
||||||
|
# create the train and test set
|
||||||
|
train_df = df[df["path"].isin(train_paths)]
|
||||||
|
test_df = df[~df["path"].isin(train_paths)]
|
||||||
|
|
||||||
|
# save the train and test set
|
||||||
|
train_df.to_csv("data/processed/spoter_train.csv", index=False)
|
||||||
|
test_df.to_csv("data/processed/spoter_test.csv", index=False)
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
pandas
|
||||||
bokeh==2.4.3
|
bokeh==2.4.3
|
||||||
boto3>=1.9
|
boto3>=1.9
|
||||||
clearml==1.6.4
|
clearml==1.6.4
|
||||||
@@ -6,9 +7,8 @@ matplotlib==3.5.3
|
|||||||
mediapipe==0.8.11
|
mediapipe==0.8.11
|
||||||
notebook==6.5.2
|
notebook==6.5.2
|
||||||
opencv-python==4.6.0.66
|
opencv-python==4.6.0.66
|
||||||
pandas==1.1.5
|
|
||||||
pandas==1.1.5
|
|
||||||
plotly==5.11.0
|
plotly==5.11.0
|
||||||
scikit-learn==1.0.2
|
scikit-learn==1.0.2
|
||||||
torchvision==0.13.0
|
torch
|
||||||
|
torchvision
|
||||||
tqdm==4.54.1
|
tqdm==4.54.1
|
||||||
|
|||||||
20
train.sh
20
train.sh
@@ -1,14 +1,14 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
python -m train \
|
python -m train \
|
||||||
--save_checkpoints_every -1 \
|
--save_checkpoints_every 10 \
|
||||||
--experiment_name "augment_rotate_75_x8" \
|
--experiment_name "augment_rotate_75_x8" \
|
||||||
--epochs 10 \
|
--epochs 300 \
|
||||||
--optimizer "SGD" \
|
--optimizer "ADAM" \
|
||||||
--lr 0.001 \
|
--lr 0.001 \
|
||||||
--batch_size 32 \
|
--batch_size 16 \
|
||||||
--dataset_name "wlasl" \
|
--dataset_name "processed" \
|
||||||
--training_set_path "WLASL100_train.csv" \
|
--training_set_path "spoter_train.csv" \
|
||||||
--validation_set_path "WLASL100_test.csv" \
|
--validation_set_path "spoter_test.csv" \
|
||||||
--vector_length 32 \
|
--vector_length 32 \
|
||||||
--epoch_iters -1 \
|
--epoch_iters -1 \
|
||||||
--scheduler_factor 0 \
|
--scheduler_factor 0 \
|
||||||
@@ -16,9 +16,7 @@ python -m train \
|
|||||||
--filter_easy_triplets \
|
--filter_easy_triplets \
|
||||||
--triplet_loss_margin 1 \
|
--triplet_loss_margin 1 \
|
||||||
--dropout 0.2 \
|
--dropout 0.2 \
|
||||||
--start_mining_hard=200 \
|
|
||||||
--hard_mining_pre_batch_multipler=16 \
|
|
||||||
--hard_mining_pre_batch_mining_count=5 \
|
|
||||||
--augmentations_prob=0.75 \
|
--augmentations_prob=0.75 \
|
||||||
--hard_mining_scheduler_triplets_threshold=0 \
|
--hard_mining_scheduler_triplets_threshold=0 \
|
||||||
# --normalize_embeddings \
|
--normalize_embeddings \
|
||||||
|
--num_classes 100 \
|
||||||
Reference in New Issue
Block a user