Some changes to allow training with kaggle data

This commit is contained in:
2023-04-13 14:55:16 +00:00
parent c49645d7bc
commit 7c973f1b88
13 changed files with 1933 additions and 102 deletions

View File

@@ -0,0 +1,172 @@
import os
import os.path as op
import json
import shutil
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
from utils import get_logger
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from normalization.blazepose_mapping import map_blazepose_df
BASE_DATA_FOLDER = 'data/'
mp_drawing = mp.solutions.drawing_utils
mp_drawing_styles = mp.solutions.drawing_styles
mp_hands = mp.solutions.hands
mp_holistic = mp.solutions.holistic
pose_landmarks = mp_holistic.PoseLandmark
hand_landmarks = mp_holistic.HandLandmark
def get_landmarks_names():
'''
Returns landmark names for mediapipe holistic model
'''
pose_lmks = ','.join([f'{lmk.name.lower()}_x,{lmk.name.lower()}_y' for lmk in pose_landmarks])
left_hand_lmks = ','.join([f'left_hand_{lmk.name.lower()}_x,left_hand_{lmk.name.lower()}_y'
for lmk in hand_landmarks])
right_hand_lmks = ','.join([f'right_hand_{lmk.name.lower()}_x,right_hand_{lmk.name.lower()}_y'
for lmk in hand_landmarks])
lmks_names = f'{pose_lmks},{left_hand_lmks},{right_hand_lmks}'
return lmks_names
def convert_to_str(arr, precision=6):
if isinstance(arr, np.ndarray):
values = []
for val in arr:
if val == 0:
values.append('0')
else:
values.append(f'{val:.{precision}f}')
return f"[{','.join(values)}]"
else:
return str(arr)
def parse_create_args(parser):
parser.add_argument('--landmarks-dataset', '-lmks', required=True,
help='Path to folder with landmarks npy files. \
You need to run `extract_mediapipe_landmarks.py` script first')
parser.add_argument('--dataset-folder', '-df', default='data/wlasl',
help='Path to folder where original `WLASL_v0.3.json` and `id_to_label.json` are stored. \
Note that final CSV files will be saved in this folder too.')
parser.add_argument('--videos-folder', '-videos', default=None,
help='Path to folder with videos. If None, then no information of videos (fps, length, \
width and height) will be stored in final csv file')
parser.add_argument('--num-classes', '-nc', default=100, type=int, help='Number of classes to use in WLASL dataset')
parser.add_argument('--create-new-split', action='store_true')
parser.add_argument('--test-size', '-ts', default=0.25, type=float,
help='Test split percentage size. Only required if --create-new-split is set')
# python3 preprocessing.py --landmarks-dataset=data/landmarks -videos data/wlasl/videos
def create(args):
logger = get_logger(__name__)
landmarks_dataset = args.landmarks_dataset
videos_folder = args.videos_folder
dataset_folder = args.dataset_folder
num_classes = args.num_classes
test_size = args.test_size
os.makedirs(dataset_folder, exist_ok=True)
# shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/id_to_label.json'), dataset_folder)
# shutil.copy(os.path.join(BASE_DATA_FOLDER, 'wlasl/WLASL_v0.3.json'), dataset_folder)
# get files in landmarks_dataset folder
landmarks_files = os.listdir(landmarks_dataset)
video_data = []
for i, file in enumerate(tqdm(landmarks_files)):
# split by !
label = file.split('!')[0]
subset = file.split('!')[1].split('.')[0]
# remove npy and set mp4
video_id = file.replace('.npy', "")
video_dict = {'video_id': video_id,
'label_name': label,
'split': subset}
if videos_folder is not None:
cap = cv2.VideoCapture(op.join(videos_folder, f'{video_id}.mp4'))
if not cap.isOpened():
logger.warning(f'Video {video_id}.mp4 not found')
continue
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
fps = cap.get(cv2.CAP_PROP_FPS)
length = cap.get(cv2.CAP_PROP_FRAME_COUNT) / float(cap.get(cv2.CAP_PROP_FPS))
video_info = {'video_width': width,
'video_height': height,
'fps': fps,
'length': length}
video_dict.update(video_info)
video_data.append(video_dict)
df_video = pd.DataFrame(video_data)
video_ids = df_video['video_id'].unique()
lmks_data = []
lmks_names = get_landmarks_names().split(',')
# get labels from df_video
labels = df_video['label_name'].unique()
# map labels to ids
label_to_id = {label: i for i, label in enumerate(labels)}
# add label_id column to df_video
df_video['labels'] = df_video['label_name'].map(label_to_id)
# export to json file as id to label
id_to_label = {i: label for label, i in label_to_id.items()}
with open(op.join(dataset_folder, 'id_to_label.json'), 'w') as f:
json.dump(id_to_label, f, indent=4)
for video_id in video_ids:
lmk_fn = op.join(landmarks_dataset, f'{video_id}.npy')
if not op.exists(lmk_fn):
logger.warning(f'{lmk_fn} file not found. Skipping')
continue
lmk = np.load(lmk_fn).T
lmks_dict = {'video_id': video_id}
for lmk_, name in zip(lmk, lmks_names):
lmks_dict[name] = lmk_
lmks_data.append(lmks_dict)
df_lmks = pd.DataFrame(lmks_data)
print(df_lmks)
df = pd.merge(df_video, df_lmks)
print(df)
aux_columns = ['split', 'video_id', 'labels', 'label_name']
if videos_folder is not None:
aux_columns += ['video_width', 'video_height', 'fps', 'length']
df_aux = df[aux_columns]
df = map_blazepose_df(df)
df = pd.concat([df, df_aux], axis=1)
if args.create_new_split:
df_train, df_test = train_test_split(df, test_size=test_size, stratify=df['labels'], random_state=42)
else:
print(df['split'].unique())
df_train = df[(df['split'] == 'train') | (df['split'] == 'val')]
df_test = df[df['split'] == 'test']
print(f'Num classes: {num_classes}')
print(df_train['labels'].value_counts())
assert set(df_train['labels'].unique()) == set(df_test['labels'].unique(
)), 'The labels for train and test dataframe are different. We recommend to download the dataset again, or to use \
the --create-new-split flag'
for split, df_split in zip(['train', 'val'],
[df_train, df_test]):
fn_out = op.join(dataset_folder, f'fingerspelling_{split}.csv')
(df_split.reset_index(drop=True)
.applymap(convert_to_str)
.to_csv(fn_out, index=False))

View File

@@ -4,6 +4,8 @@ import pandas as pd
from tqdm.auto import tqdm
import json
from normalization.blazepose_mapping import map_blazepose_df
def create(train_landmark_files, train_csv, dataset_folder, test_size):
os.makedirs(dataset_folder, exist_ok=True)
@@ -17,15 +19,15 @@ def create(train_landmark_files, train_csv, dataset_folder, test_size):
mapping = {
'pose_0': 'nose',
'pose_1': 'leftEye',
'pose_2': 'rightEye',
'pose_3': 'leftEar',
'pose_4': 'rightEar',
'pose_5': 'leftShoulder',
'pose_6': 'rightShoulder',
'pose_7': 'leftElbow',
'pose_8': 'rightElbow',
'pose_9': 'leftWrist',
'pose_10': 'rightWrist',
'pose_4': 'rightEye',
'pose_7': 'leftEar',
'pose_8': 'rightEar',
'pose_11': 'leftShoulder',
'pose_12': 'rightShoulder',
'pose_13': 'leftElbow',
'pose_14': 'rightElbow',
'pose_15': 'leftWrist',
'pose_16': 'rightWrist',
'left_hand_0': 'wrist_left',
'left_hand_1': 'thumbCMC_left',
@@ -77,7 +79,7 @@ def create(train_landmark_files, train_csv, dataset_folder, test_size):
columns.append(f'{v}_X')
columns.append(f'{v}_Y')
for _, row in tqdm(train_df.head(6000).iterrows(), total=6000):
for _, row in tqdm(train_df.head(10000).iterrows(), total=10000):
path, participant_id, sequence_id, sign = row['path'], row['participant_id'], row['sequence_id'], row['sign']
parquet_file = os.path.join(train_landmark_files, str(participant_id), f"{sequence_id}.parquet")
@@ -136,6 +138,7 @@ def create(train_landmark_files, train_csv, dataset_folder, test_size):
video_data.append(new_landmark_data)
video_data = pd.concat(video_data, axis=0, ignore_index=True)
video_data = map_blazepose_df(video_data, rename=False)
video_data.to_csv(os.path.join(dataset_folder, 'spoter.csv'), index=False)
train_landmark_files = 'data/train_landmark_files'

View File

@@ -110,6 +110,7 @@ def create(args):
'length': length}
video_dict.update(video_info)
video_data.append(video_dict)
df_video = pd.DataFrame(video_data)
video_ids = df_video['video_id'].unique()
lmks_data = []
@@ -126,9 +127,7 @@ def create(args):
lmks_data.append(lmks_dict)
df_lmks = pd.DataFrame(lmks_data)
print(df_lmks)
df = pd.merge(df_video, df_lmks)
print(df)
aux_columns = ['split', 'video_id', 'labels', 'label_name']
if videos_folder is not None:
aux_columns += ['video_width', 'video_height', 'fps', 'length']

View File

@@ -132,6 +132,12 @@ def extract(args):
ret, image_orig = cap.read()
height, width = image_orig.shape[:2]
landmarks_video = []
# make sure fps is 20 by determining the number of frames to be skipped
frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
frame_skip = (frame_rate // 20) - 1
with tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT))) as pbar:
with mp_holistic.Holistic(
static_image_mode=False,
@@ -145,6 +151,9 @@ def extract(args):
print(e)
landmarks = get_landmarks(image_orig, holistic, debug=True)
ret, image_orig = cap.read()
for _ in range(frame_skip):
ret, image_orig = cap.read()
pbar.update(1)
landmarks_video.append(landmarks)
pbar.update(1)
landmarks_video = np.vstack(landmarks_video)

View File

@@ -8,7 +8,6 @@ dataset = "data/processed/spoter.csv"
# read the dataset
df = pd.read_csv(dataset)
df = map_blazepose_df(df)
with open("data/sign_to_prediction_index_map.json", "r") as f:
sign_to_prediction_index_max = json.load(f)