import json import os from PIL import Image import numpy as np from pycocotools.mask import encode, decode, frPyObjects from tqdm import tqdm import copy from natsort import natsorted import cv2 import argparse parser = argparse.ArgumentParser() parser.add_argument('--setting', required=True, choices=['ego-exo', 'exo-ego'], help="ego-exo or exo-ego") args = parser.parse_args() if __name__ == '__main__': # 定义相关路径,获取takes root_path = '/scratch/yuqian_fu/data_segswap_test' #save_path = os.path.join(root_path, 'egoexo_test_framelevel.json') save_path = "/home/yuqian_fu/Projects/PSALM/exoego_test_framelevel_new.json" split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json" with open(split_path, "r") as fp: data_split = json.load(fp) takes = data_split["test"] #takes = ["8c952699-0c25-453b-92dd-52b0580248db"] # 计数 new_img_id = 0 # 存储保存的数据 egoexo_dataset = [] for take in tqdm(takes): # 获取本take下的注释 vid_root_path = os.path.join(root_path, take) anno_path = os.path.join(vid_root_path, "annotation.json") with open(anno_path, 'r') as fp: annotations = json.load(fp) objs = natsorted(list(annotations["masks"].keys())) #print("len(objs):", len(objs)) coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)} #print(f"coco_id_to_cont_id:{coco_id_to_cont_id}") objs_after = [] for obj in annotations['masks']: cams = annotations['masks'][obj].keys() #print("cams:", cams) # debug exo_cams = [x for x in cams if 'aria' not in x] ego_cams = [x for x in cams if 'aria' in x] # print("obj", obj) # debug # print("ego_cams:", ego_cams) # debug # print("exo_cams:", exo_cams) # debug for ego in ego_cams: for exo in exo_cams: # ego -> exo if args.setting == 'ego-exo': for idx in annotations['masks'][obj][ego].keys(): first_frame_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg') first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) first_frame_anns = [] ego_mask = decode(annotations['masks'][obj][ego][idx]) h, w = ego_mask.shape ego_mask = cv2.resize(ego_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST) area = ego_mask.sum().astype(float) segmentation = encode(np.asfortranarray(ego_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation["size"], } first_frame_anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': float(coco_id_to_cont_id[obj]), } ) objs_after.append(float(coco_id_to_cont_id[obj])) sample_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg') sample_img_relpath = os.path.relpath(sample_img_path, root_path) anns = [] # 获取h,w exo_img = cv2.imread(sample_img_path) h, w, _ = exo_img.shape #print("h,w", exo_img.shape) # debug binary_mask = np.zeros((h, w)) binary_mask = binary_mask.astype(np.uint8) area = float(0) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation["size"], } anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': float(coco_id_to_cont_id[obj]), } ) # 其余信息 image_info = { 'file_name': sample_img_relpath, 'height': h, 'width': w, } # 汇总一个pairs的信息 sample = { 'image': sample_img_relpath, 'image_info': image_info, 'anns': anns, 'first_frame_image': first_frame_img_relpath, 'first_frame_anns': first_frame_anns, 'new_img_id': new_img_id, 'video_name': take, } egoexo_dataset.append(sample) new_img_id += 1 # exo -> ego elif args.setting == 'exo-ego': #print("len:", len(annotations['masks'][obj][exo].keys())) # debug for idx in annotations['masks'][obj][exo].keys(): first_frame_img_path = os.path.join(f'{vid_root_path}/{exo}/', f'{idx}.jpg') first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) first_frame_anns = [] exo_mask = decode(annotations['masks'][obj][exo][idx]) h, w = exo_mask.shape exo_mask = cv2.resize(exo_mask, (w // 4, h // 4), interpolation=cv2.INTER_NEAREST) area = exo_mask.sum().astype(float) segmentation = encode(np.asfortranarray(exo_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation["size"], } first_frame_anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': float(coco_id_to_cont_id[obj]), } ) objs_after.append(float(coco_id_to_cont_id[obj])) sample_img_path = os.path.join(f'{vid_root_path}/{ego}/', f'{idx}.jpg') sample_img_relpath = os.path.relpath(sample_img_path, root_path) anns = [] # 获取h,w ego_img = cv2.imread(sample_img_path) h, w, _ = ego_img.shape #print("h,w", ego_img.shape) # debug binary_mask = np.zeros((h, w)) binary_mask = binary_mask.astype(np.uint8) area = float(0) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation["size"], } anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': float(coco_id_to_cont_id[obj]), } ) # 其余信息 image_info = { 'file_name': sample_img_relpath, 'height': h, 'width': w, } # 汇总一个pairs的信息 sample = { 'image': sample_img_relpath, 'image_info': image_info, 'anns': anns, 'first_frame_image': first_frame_img_relpath, 'first_frame_anns': first_frame_anns, 'new_img_id': new_img_id, 'video_name': take, } egoexo_dataset.append(sample) new_img_id += 1 else: raise Exception(f"Setting {args.setting} not recognized.") #print("objs_after:", set(objs_after)) # debug with open(save_path, 'w') as f: json.dump(egoexo_dataset, f) print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')