Some changes to allow training with kaggle data
This commit is contained in:
@@ -61,20 +61,25 @@ def map_blazepose_keypoint(column):
|
|||||||
return f"{mapped}_{hand}{suffix}"
|
return f"{mapped}_{hand}{suffix}"
|
||||||
|
|
||||||
|
|
||||||
def map_blazepose_df(df):
|
def map_blazepose_df(df, rename=True):
|
||||||
|
to_drop = []
|
||||||
|
if rename:
|
||||||
|
renamings = {}
|
||||||
|
for column in df.columns:
|
||||||
|
mapped_column = map_blazepose_keypoint(column)
|
||||||
|
if mapped_column:
|
||||||
|
renamings[column] = mapped_column
|
||||||
|
else:
|
||||||
|
to_drop.append(column)
|
||||||
|
df = df.rename(columns=renamings)
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
for index, row in df.iterrows():
|
||||||
|
|
||||||
|
sequence_size = len(row["leftEar_Y"])
|
||||||
lsx = row["leftShoulder_X"]
|
lsx = row["leftShoulder_X"]
|
||||||
rsx = row["rightShoulder_X"]
|
rsx = row["rightShoulder_X"]
|
||||||
lsy = row["leftShoulder_Y"]
|
lsy = row["leftShoulder_Y"]
|
||||||
rsy = row["rightShoulder_Y"]
|
rsy = row["rightShoulder_Y"]
|
||||||
# convert all to list
|
|
||||||
lsx = lsx[1:-1].split(",")
|
|
||||||
rsx = rsx[1:-1].split(",")
|
|
||||||
lsy = lsy[1:-1].split(",")
|
|
||||||
rsy = rsy[1:-1].split(",")
|
|
||||||
sequence_size = len(lsx)
|
|
||||||
|
|
||||||
neck_x = []
|
neck_x = []
|
||||||
neck_y = []
|
neck_y = []
|
||||||
# Treat each element of the sequence (analyzed frame) individually
|
# Treat each element of the sequence (analyzed frame) individually
|
||||||
@@ -84,4 +89,5 @@ def map_blazepose_df(df):
|
|||||||
df.loc[index, "neck_X"] = str(neck_x)
|
df.loc[index, "neck_X"] = str(neck_x)
|
||||||
df.loc[index, "neck_Y"] = str(neck_y)
|
df.loc[index, "neck_Y"] = str(neck_y)
|
||||||
|
|
||||||
return df
|
df.drop(columns=to_drop, inplace=True)
|
||||||
|
return df
|
||||||
@@ -5,30 +5,30 @@ import pandas as pd
|
|||||||
from normalization.hand_normalization import normalize_hands_full
|
from normalization.hand_normalization import normalize_hands_full
|
||||||
from normalization.body_normalization import normalize_body_full
|
from normalization.body_normalization import normalize_body_full
|
||||||
|
|
||||||
DATASET_PATH = './data/wlasl'
|
DATASET_PATH = './data/processed'
|
||||||
# Load the dataset
|
# Load the dataset
|
||||||
df = pd.read_csv(os.path.join(DATASET_PATH, "WLASL100_train.csv"), encoding="utf-8")
|
df = pd.read_csv(os.path.join(DATASET_PATH, "spoter_train.csv"), encoding="utf-8")
|
||||||
|
|
||||||
print(df.head())
|
print(df.head())
|
||||||
print(df.columns)
|
print(df.columns)
|
||||||
|
|
||||||
# Retrieve metadata
|
# Retrieve metadata
|
||||||
video_size_heights = df["video_height"].to_list()
|
# video_size_heights = df["video_height"].to_list()
|
||||||
video_size_widths = df["video_width"].to_list()
|
# video_size_widths = df["video_width"].to_list()
|
||||||
|
|
||||||
# Delete redundant (non-related) properties
|
# Delete redundant (non-related) properties
|
||||||
del df["video_height"]
|
# del df["video_height"]
|
||||||
del df["video_width"]
|
# del df["video_width"]
|
||||||
|
|
||||||
# Temporarily remove other relevant metadata
|
# Temporarily remove other relevant metadata
|
||||||
labels = df["labels"].to_list()
|
labels = df["labels"].to_list()
|
||||||
video_fps = df["fps"].to_list()
|
signs = df["sign"].to_list()
|
||||||
|
|
||||||
del df["labels"]
|
del df["labels"]
|
||||||
del df["fps"]
|
del df["sign"]
|
||||||
del df["split"]
|
del df["path"]
|
||||||
del df["video_id"]
|
del df["participant_id"]
|
||||||
del df["label_name"]
|
del df["sequence_id"]
|
||||||
del df["length"]
|
|
||||||
|
|
||||||
# Convert the strings into lists
|
# Convert the strings into lists
|
||||||
|
|
||||||
@@ -41,7 +41,7 @@ for column in df.columns:
|
|||||||
|
|
||||||
# Perform the normalizations
|
# Perform the normalizations
|
||||||
df = normalize_hands_full(df)
|
df = normalize_hands_full(df)
|
||||||
df, invalid_row_indexes = normalize_body_full(df)
|
# df, invalid_row_indexes = normalize_body_full(df)
|
||||||
|
|
||||||
# Clear lists of items from deleted rows
|
# Clear lists of items from deleted rows
|
||||||
# labels = [t for i, t in enumerate(labels) if i not in invalid_row_indexes]
|
# labels = [t for i, t in enumerate(labels) if i not in invalid_row_indexes]
|
||||||
@@ -49,6 +49,6 @@ df, invalid_row_indexes = normalize_body_full(df)
|
|||||||
|
|
||||||
# Return the metadata back to the dataset
|
# Return the metadata back to the dataset
|
||||||
df["labels"] = labels
|
df["labels"] = labels
|
||||||
df["fps"] = video_fps
|
df["sign"] = signs
|
||||||
|
|
||||||
df.to_csv(os.path.join(DATASET_PATH, "wlasl_train_norm.csv"), encoding="utf-8", index=False)
|
df.to_csv(os.path.join(DATASET_PATH, "spoter_train_norm.csv"), encoding="utf-8", index=False)
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
|||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from preprocessing.create_wlasl_landmarks_dataset import parse_create_args, create
|
from preprocessing.create_fingerspelling_dataset import parse_create_args, create
|
||||||
from preprocessing.extract_mediapipe_landmarks import parse_extract_args, extract
|
from preprocessing.extract_mediapipe_landmarks import parse_extract_args, extract
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
172
preprocessing/create_fingerspelling_dataset.py
Normal file
172
preprocessing/create_fingerspelling_dataset.py
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
import os
|
||||||
|
import os.path as op
|
||||||
|
import json
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
import cv2
|
||||||
|
import mediapipe as mp
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from utils import get_logger
|
||||||
|
from tqdm.auto import tqdm
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from normalization.blazepose_mapping import map_blazepose_df
|
||||||
|
|
||||||
|
BASE_DATA_FOLDER = 'data/'
|
||||||
|
|
||||||
|
mp_drawing = mp.solutions.drawing_utils
|
||||||
|
mp_drawing_styles = mp.solutions.drawing_styles
|
||||||
|
mp_hands = mp.solutions.hands
|
||||||
|
mp_holistic = mp.solutions.holistic
|
||||||
|
pose_landmarks = mp_holistic.PoseLandmark
|
||||||
|
hand_landmarks = mp_holistic.HandLandmark
|
||||||
|
|
||||||
|
|
||||||
|
def get_landmarks_names():
|
||||||
|
'''
|
||||||
|
Returns landmark names for mediapipe holistic model
|
||||||
|
'''
|
||||||
|
pose_lmks = ','.join([f'{lmk.name.lower()}_x,{lmk.name.lower()}_y' for lmk in pose_landmarks])
|
||||||
|
left_hand_lmks = ','.join([f'left_hand_{lmk.name.lower()}_x,left_hand_{lmk.name.lower()}_y'
|
||||||
|
for lmk in hand_landmarks])
|
||||||
|
right_hand_lmks = ','.join([f'right_hand_{lmk.name.lower()}_x,right_hand_{lmk.name.lower()}_y'
|
||||||
|
for lmk in hand_landmarks])
|
||||||
|
lmks_names = f'{pose_lmks},{left_hand_lmks},{right_hand_lmks}'
|
||||||
|
return lmks_names
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_str(arr, precision=6):
|
||||||
|
if isinstance(arr, np.ndarray):
|
||||||
|
values = []
|
||||||
|
for val in arr:
|
||||||
|
if val == 0:
|
||||||
|
values.append('0')
|
||||||
|
else:
|
||||||
|
values.append(f'{val:.{precision}f}')
|
||||||
|
return f"[{','.join(values)}]"
|
||||||
|
else:
|
||||||
|
return str(arr)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_create_args(parser):
|
||||||
|
parser.add_argument('--landmarks-dataset', '-lmks', required=True,
|
||||||
|
help='Path to folder with landmarks npy files. \
|
||||||
|
You need to run `extract_mediapipe_landmarks.py` script first')
|
||||||
|
parser.add_argument('--dataset-folder', '-df', default='data/wlasl',
|
||||||
|
help='Path to folder where original `WLASL_v0.3.json` and `id_to_label.json` are stored. \
|
||||||
|
Note that final CSV files will be saved in this folder too.')
|
||||||
|
parser.add_argument('--videos-folder', '-videos', default=None,
|
||||||
|
help='Path to folder with videos. If None, then no information of videos (fps, length, \
|
||||||
|
width and height) will be stored in final csv file')
|
||||||
|
parser.add_argument('--num-classes', '-nc', default=100, type=int, help='Number of classes to use in WLASL dataset')
|
||||||
|
parser.add_argument('--create-new-split', action='store_true')
|
||||||
|
parser.add_argument('--test-size', '-ts', default=0.25, type=float,
|
||||||
|
help='Test split percentage size. Only required if --create-new-split is set')
|
||||||
|
|
||||||
|
|
||||||
|
# python3 preprocessing.py --landmarks-dataset=data/landmarks -videos data/wlasl/videos
|
||||||
|
def create(args):
|
||||||
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
|
landmarks_dataset = args.landmarks_dataset
|
||||||
|
videos_folder = args.videos_folder
|
||||||
|
dataset_folder = args.dataset_folder
|
||||||
|
num_classes = args.num_classes
|
||||||
|
test_size = args.test_size
|
||||||
|
|
||||||
|
os.makedirs(dataset_folder, exist_ok=True)
|
||||||
|
|
||||||
|
# shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/id_to_label.json'), dataset_folder)
|
||||||
|
# shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/WLASL_v0.3.json'), dataset_folder)
|
||||||
|
|
||||||
|
# get files in landmarks_dataset folder
|
||||||
|
landmarks_files = os.listdir(landmarks_dataset)
|
||||||
|
|
||||||
|
video_data = []
|
||||||
|
for i, file in enumerate(tqdm(landmarks_files)):
|
||||||
|
|
||||||
|
# split by !
|
||||||
|
label = file.split('!')[0]
|
||||||
|
subset = file.split('!')[1].split('.')[0]
|
||||||
|
|
||||||
|
# remove npy and set mp4
|
||||||
|
video_id = file.replace('.npy', "")
|
||||||
|
|
||||||
|
|
||||||
|
video_dict = {'video_id': video_id,
|
||||||
|
'label_name': label,
|
||||||
|
'split': subset}
|
||||||
|
|
||||||
|
if videos_folder is not None:
|
||||||
|
cap = cv2.VideoCapture(op.join(videos_folder, f'{video_id}.mp4'))
|
||||||
|
if not cap.isOpened():
|
||||||
|
logger.warning(f'Video {video_id}.mp4 not found')
|
||||||
|
continue
|
||||||
|
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
|
||||||
|
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
|
||||||
|
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||||
|
length = cap.get(cv2.CAP_PROP_FRAME_COUNT) / float(cap.get(cv2.CAP_PROP_FPS))
|
||||||
|
video_info = {'video_width': width,
|
||||||
|
'video_height': height,
|
||||||
|
'fps': fps,
|
||||||
|
'length': length}
|
||||||
|
video_dict.update(video_info)
|
||||||
|
video_data.append(video_dict)
|
||||||
|
|
||||||
|
df_video = pd.DataFrame(video_data)
|
||||||
|
video_ids = df_video['video_id'].unique()
|
||||||
|
lmks_data = []
|
||||||
|
lmks_names = get_landmarks_names().split(',')
|
||||||
|
|
||||||
|
# get labels from df_video
|
||||||
|
labels = df_video['label_name'].unique()
|
||||||
|
# map labels to ids
|
||||||
|
label_to_id = {label: i for i, label in enumerate(labels)}
|
||||||
|
|
||||||
|
# add label_id column to df_video
|
||||||
|
df_video['labels'] = df_video['label_name'].map(label_to_id)
|
||||||
|
|
||||||
|
# export to json file as id to label
|
||||||
|
id_to_label = {i: label for label, i in label_to_id.items()}
|
||||||
|
with open(op.join(dataset_folder, 'id_to_label.json'), 'w') as f:
|
||||||
|
json.dump(id_to_label, f, indent=4)
|
||||||
|
|
||||||
|
for video_id in video_ids:
|
||||||
|
lmk_fn = op.join(landmarks_dataset, f'{video_id}.npy')
|
||||||
|
if not op.exists(lmk_fn):
|
||||||
|
logger.warning(f'{lmk_fn} file not found. Skipping')
|
||||||
|
continue
|
||||||
|
lmk = np.load(lmk_fn).T
|
||||||
|
lmks_dict = {'video_id': video_id}
|
||||||
|
for lmk_, name in zip(lmk, lmks_names):
|
||||||
|
lmks_dict[name] = lmk_
|
||||||
|
lmks_data.append(lmks_dict)
|
||||||
|
|
||||||
|
df_lmks = pd.DataFrame(lmks_data)
|
||||||
|
print(df_lmks)
|
||||||
|
df = pd.merge(df_video, df_lmks)
|
||||||
|
print(df)
|
||||||
|
aux_columns = ['split', 'video_id', 'labels', 'label_name']
|
||||||
|
if videos_folder is not None:
|
||||||
|
aux_columns += ['video_width', 'video_height', 'fps', 'length']
|
||||||
|
df_aux = df[aux_columns]
|
||||||
|
df = map_blazepose_df(df)
|
||||||
|
df = pd.concat([df, df_aux], axis=1)
|
||||||
|
if args.create_new_split:
|
||||||
|
df_train, df_test = train_test_split(df, test_size=test_size, stratify=df['labels'], random_state=42)
|
||||||
|
else:
|
||||||
|
print(df['split'].unique())
|
||||||
|
df_train = df[(df['split'] == 'train') | (df['split'] == 'val')]
|
||||||
|
df_test = df[df['split'] == 'test']
|
||||||
|
|
||||||
|
print(f'Num classes: {num_classes}')
|
||||||
|
print(df_train['labels'].value_counts())
|
||||||
|
assert set(df_train['labels'].unique()) == set(df_test['labels'].unique(
|
||||||
|
)), 'The labels for train and test dataframe are different. We recommend to download the dataset again, or to use \
|
||||||
|
the --create-new-split flag'
|
||||||
|
for split, df_split in zip(['train', 'val'],
|
||||||
|
[df_train, df_test]):
|
||||||
|
fn_out = op.join(dataset_folder, f'fingerspelling_{split}.csv')
|
||||||
|
(df_split.reset_index(drop=True)
|
||||||
|
.applymap(convert_to_str)
|
||||||
|
.to_csv(fn_out, index=False))
|
||||||
@@ -4,6 +4,8 @@ import pandas as pd
|
|||||||
from tqdm.auto import tqdm
|
from tqdm.auto import tqdm
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
from normalization.blazepose_mapping import map_blazepose_df
|
||||||
|
|
||||||
def create(train_landmark_files, train_csv, dataset_folder, test_size):
|
def create(train_landmark_files, train_csv, dataset_folder, test_size):
|
||||||
os.makedirs(dataset_folder, exist_ok=True)
|
os.makedirs(dataset_folder, exist_ok=True)
|
||||||
|
|
||||||
@@ -17,15 +19,15 @@ def create(train_landmark_files, train_csv, dataset_folder, test_size):
|
|||||||
mapping = {
|
mapping = {
|
||||||
'pose_0': 'nose',
|
'pose_0': 'nose',
|
||||||
'pose_1': 'leftEye',
|
'pose_1': 'leftEye',
|
||||||
'pose_2': 'rightEye',
|
'pose_4': 'rightEye',
|
||||||
'pose_3': 'leftEar',
|
'pose_7': 'leftEar',
|
||||||
'pose_4': 'rightEar',
|
'pose_8': 'rightEar',
|
||||||
'pose_5': 'leftShoulder',
|
'pose_11': 'leftShoulder',
|
||||||
'pose_6': 'rightShoulder',
|
'pose_12': 'rightShoulder',
|
||||||
'pose_7': 'leftElbow',
|
'pose_13': 'leftElbow',
|
||||||
'pose_8': 'rightElbow',
|
'pose_14': 'rightElbow',
|
||||||
'pose_9': 'leftWrist',
|
'pose_15': 'leftWrist',
|
||||||
'pose_10': 'rightWrist',
|
'pose_16': 'rightWrist',
|
||||||
|
|
||||||
'left_hand_0': 'wrist_left',
|
'left_hand_0': 'wrist_left',
|
||||||
'left_hand_1': 'thumbCMC_left',
|
'left_hand_1': 'thumbCMC_left',
|
||||||
@@ -77,7 +79,7 @@ def create(train_landmark_files, train_csv, dataset_folder, test_size):
|
|||||||
columns.append(f'{v}_X')
|
columns.append(f'{v}_X')
|
||||||
columns.append(f'{v}_Y')
|
columns.append(f'{v}_Y')
|
||||||
|
|
||||||
for _, row in tqdm(train_df.head(6000).iterrows(), total=6000):
|
for _, row in tqdm(train_df.head(10000).iterrows(), total=10000):
|
||||||
path, participant_id, sequence_id, sign = row['path'], row['participant_id'], row['sequence_id'], row['sign']
|
path, participant_id, sequence_id, sign = row['path'], row['participant_id'], row['sequence_id'], row['sign']
|
||||||
parquet_file = os.path.join(train_landmark_files, str(participant_id), f"{sequence_id}.parquet")
|
parquet_file = os.path.join(train_landmark_files, str(participant_id), f"{sequence_id}.parquet")
|
||||||
|
|
||||||
@@ -136,6 +138,7 @@ def create(train_landmark_files, train_csv, dataset_folder, test_size):
|
|||||||
video_data.append(new_landmark_data)
|
video_data.append(new_landmark_data)
|
||||||
|
|
||||||
video_data = pd.concat(video_data, axis=0, ignore_index=True)
|
video_data = pd.concat(video_data, axis=0, ignore_index=True)
|
||||||
|
video_data = map_blazepose_df(video_data, rename=False)
|
||||||
video_data.to_csv(os.path.join(dataset_folder, 'spoter.csv'), index=False)
|
video_data.to_csv(os.path.join(dataset_folder, 'spoter.csv'), index=False)
|
||||||
|
|
||||||
train_landmark_files = 'data/train_landmark_files'
|
train_landmark_files = 'data/train_landmark_files'
|
||||||
|
|||||||
@@ -110,6 +110,7 @@ def create(args):
|
|||||||
'length': length}
|
'length': length}
|
||||||
video_dict.update(video_info)
|
video_dict.update(video_info)
|
||||||
video_data.append(video_dict)
|
video_data.append(video_dict)
|
||||||
|
|
||||||
df_video = pd.DataFrame(video_data)
|
df_video = pd.DataFrame(video_data)
|
||||||
video_ids = df_video['video_id'].unique()
|
video_ids = df_video['video_id'].unique()
|
||||||
lmks_data = []
|
lmks_data = []
|
||||||
@@ -126,9 +127,7 @@ def create(args):
|
|||||||
lmks_data.append(lmks_dict)
|
lmks_data.append(lmks_dict)
|
||||||
|
|
||||||
df_lmks = pd.DataFrame(lmks_data)
|
df_lmks = pd.DataFrame(lmks_data)
|
||||||
print(df_lmks)
|
|
||||||
df = pd.merge(df_video, df_lmks)
|
df = pd.merge(df_video, df_lmks)
|
||||||
print(df)
|
|
||||||
aux_columns = ['split', 'video_id', 'labels', 'label_name']
|
aux_columns = ['split', 'video_id', 'labels', 'label_name']
|
||||||
if videos_folder is not None:
|
if videos_folder is not None:
|
||||||
aux_columns += ['video_width', 'video_height', 'fps', 'length']
|
aux_columns += ['video_width', 'video_height', 'fps', 'length']
|
||||||
|
|||||||
@@ -132,6 +132,12 @@ def extract(args):
|
|||||||
ret, image_orig = cap.read()
|
ret, image_orig = cap.read()
|
||||||
height, width = image_orig.shape[:2]
|
height, width = image_orig.shape[:2]
|
||||||
landmarks_video = []
|
landmarks_video = []
|
||||||
|
|
||||||
|
# make sure fps is 20 by determining the number of frames to be skipped
|
||||||
|
frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
|
||||||
|
frame_skip = (frame_rate // 20) - 1
|
||||||
|
|
||||||
|
|
||||||
with tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT))) as pbar:
|
with tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT))) as pbar:
|
||||||
with mp_holistic.Holistic(
|
with mp_holistic.Holistic(
|
||||||
static_image_mode=False,
|
static_image_mode=False,
|
||||||
@@ -145,6 +151,9 @@ def extract(args):
|
|||||||
print(e)
|
print(e)
|
||||||
landmarks = get_landmarks(image_orig, holistic, debug=True)
|
landmarks = get_landmarks(image_orig, holistic, debug=True)
|
||||||
ret, image_orig = cap.read()
|
ret, image_orig = cap.read()
|
||||||
|
for _ in range(frame_skip):
|
||||||
|
ret, image_orig = cap.read()
|
||||||
|
pbar.update(1)
|
||||||
landmarks_video.append(landmarks)
|
landmarks_video.append(landmarks)
|
||||||
pbar.update(1)
|
pbar.update(1)
|
||||||
landmarks_video = np.vstack(landmarks_video)
|
landmarks_video = np.vstack(landmarks_video)
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ dataset = "data/processed/spoter.csv"
|
|||||||
|
|
||||||
# read the dataset
|
# read the dataset
|
||||||
df = pd.read_csv(dataset)
|
df = pd.read_csv(dataset)
|
||||||
df = map_blazepose_df(df)
|
|
||||||
|
|
||||||
with open("data/sign_to_prediction_index_map.json", "r") as f:
|
with open("data/sign_to_prediction_index_map.json", "r") as f:
|
||||||
sign_to_prediction_index_max = json.load(f)
|
sign_to_prediction_index_max = json.load(f)
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
pandas
|
pandas
|
||||||
bokeh==2.4.3
|
bokeh==2.4.3
|
||||||
boto3>=1.9
|
boto3>=1.9
|
||||||
clearml==1.6.4
|
|
||||||
ipywidgets==8.0.4
|
ipywidgets==8.0.4
|
||||||
matplotlib==3.5.3
|
matplotlib==3.5.3
|
||||||
mediapipe==0.8.11
|
mediapipe==0.8.11
|
||||||
@@ -9,6 +8,7 @@ notebook==6.5.2
|
|||||||
opencv-python==4.6.0.66
|
opencv-python==4.6.0.66
|
||||||
plotly==5.11.0
|
plotly==5.11.0
|
||||||
scikit-learn==1.0.2
|
scikit-learn==1.0.2
|
||||||
|
clearml==1.10.3
|
||||||
torch
|
torch
|
||||||
torchvision
|
torchvision
|
||||||
tqdm==4.54.1
|
tqdm==4.54.1
|
||||||
|
|||||||
4
train.py
4
train.py
@@ -15,7 +15,7 @@ from torchvision import transforms
|
|||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import copy
|
import copy
|
||||||
|
import numpy as np
|
||||||
from datasets import CzechSLRDataset, SLREmbeddingDataset, collate_fn_triplet_padd, collate_fn_padd
|
from datasets import CzechSLRDataset, SLREmbeddingDataset, collate_fn_triplet_padd, collate_fn_padd
|
||||||
from models import SPOTER, SPOTER_EMBEDDINGS, train_epoch, evaluate, train_epoch_embedding, \
|
from models import SPOTER, SPOTER_EMBEDDINGS, train_epoch, evaluate, train_epoch_embedding, \
|
||||||
train_epoch_embedding_online, evaluate_embedding
|
train_epoch_embedding_online, evaluate_embedding
|
||||||
@@ -32,7 +32,7 @@ except ImportError:
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
PROJECT_NAME = "spoter"
|
PROJECT_NAME = "SpoterEmbedding"
|
||||||
CLEARML = "clearml"
|
CLEARML = "clearml"
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
19
train.sh
19
train.sh
@@ -1,22 +1,21 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
python -m train \
|
python -m train \
|
||||||
--save_checkpoints_every 10 \
|
--save_checkpoints_every 10 \
|
||||||
--experiment_name "augment_rotate_75_x8" \
|
--experiment_name "basic" \
|
||||||
--epochs 300 \
|
--epochs 300 \
|
||||||
--optimizer "ADAM" \
|
--optimizer "ADAM" \
|
||||||
--lr 0.001 \
|
--lr 0.0001 \
|
||||||
--batch_size 16 \
|
--batch_size 16 \
|
||||||
--dataset_name "processed" \
|
--dataset_name "GoogleWLASL" \
|
||||||
--training_set_path "spoter_train.csv" \
|
--training_set_path "spoter_train.csv" \
|
||||||
--validation_set_path "spoter_test.csv" \
|
--validation_set_path "spoter_test.csv" \
|
||||||
--vector_length 32 \
|
--vector_length 32 \
|
||||||
--epoch_iters -1 \
|
--epoch_iters -1 \
|
||||||
--scheduler_factor 0 \
|
--scheduler_factor 0.2 \
|
||||||
--hard_triplet_mining "in_batch" \
|
--hard_triplet_mining "None" \
|
||||||
--filter_easy_triplets \
|
--filter_easy_triplets \
|
||||||
--triplet_loss_margin 1 \
|
--triplet_loss_margin 2 \
|
||||||
--dropout 0.2 \
|
--dropout 0.2 \
|
||||||
--augmentations_prob=0.75 \
|
--tracker=clearml \
|
||||||
--hard_mining_scheduler_triplets_threshold=0 \
|
--dataset_loader=clearml \
|
||||||
--normalize_embeddings \
|
--dataset_project="SpoterEmbedding"
|
||||||
--num_classes 100 \
|
|
||||||
|
|||||||
1632
visualize_data.ipynb
Normal file
1632
visualize_data.ipynb
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user