import json import os from PIL import Image import numpy as np from pycocotools.mask import encode, decode, frPyObjects from tqdm import tqdm import copy from natsort import natsorted import cv2 if __name__ == '__main__': # 定义相关路径,获取takes root_path = '/scratch/yuqian_fu/data_segswap_test' save_path = os.path.join(root_path, 'egoexo_test_framelevel.json') split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json" with open(split_path, "r") as fp: data_split = json.load(fp) takes = data_split["test"] # 计数 new_img_id = 0 # 存储保存的数据 egoexo_dataset = [] for take in tqdm(takes): # 获取本take下的注释 vid_root_path = os.path.join(root_path, take) anno_path = os.path.join(vid_root_path, "annotation.json") with open(anno_path, 'r') as fp: annotations = json.load(fp) # 取出本take下的所有物体 objs = natsorted(list(annotations["masks"].keys())) coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)} print(f"coco_id_to_cont_id:{coco_id_to_cont_id}") # 获取相机 valid_cams = os.listdir(vid_root_path) valid_cams.remove("annotation.json") ego_cams = [] exo_cams = [] for vc in valid_cams: if 'aria' in vc: ego_cams.append(vc) else: exo_cams.append(vc) if len(ego_cams) > 1: print(f"Warning: {take} has more than one ego camera, only the first one will be used.") ego = ego_cams[0] # 获取帧数 vid_ego_path = os.path.join(vid_root_path, ego) ego_frames = natsorted(os.listdir(vid_ego_path)) idxs = [f.split(".")[0] for f in ego_frames] # 匹配所有exo for exo in exo_cams: vid_exo_path = os.path.join(vid_root_path, exo) # 开始按帧数处理 for idx in idxs: '''query''' # 定义query_cam_path filename = f"{idx}.jpg" first_frame_img_path = os.path.join(vid_ego_path, filename) first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path) # 确定ego相机下的每一帧的物体 ego_obj = [] for obj in objs: if idx in annotations['masks'][obj][ego].keys(): # debug: ego可能会报相机的key error ego_obj.append(obj) print("len of ego_obj:", len(ego_obj)) # debug: 确定idx的数据类型 # 提取每个物体的注释,定义first_frame_anns first_frame_anns = [] for obj in ego_obj: segmentation_tmp = annotations["masks"][obj][ego][idx] binary_mask = decode(segmentation_tmp) h, w = binary_mask.shape binary_mask = cv2.resize(binary_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST) area = binary_mask.sum().astype(float) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation["size"], } first_frame_anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': float(coco_id_to_cont_id[obj]), } ) '''target''' # 定义target_cam_path sample_img_path = os.path.join(vid_exo_path, filename) sample_img_relpath = os.path.relpath(sample_img_path, root_path) # 获取h,w exo_img = cv2.imread(sample_img_path) h, w, _ = exo_img.shape #定义anns,推理不需要,全部设置为null mask anns = [] for obj in ego_obj: binary_mask = np.zeros((h, w)) binary_mask = binary_mask.astype(np.uint8) area = float(0) segmentation = encode(np.asfortranarray(binary_mask)) segmentation = { 'counts': segmentation['counts'].decode('ascii'), 'size': segmentation["size"], } anns.append( { 'segmentation': segmentation, 'area': area, 'category_id': float(coco_id_to_cont_id[obj]), } ) '''其余信息''' image_info = { 'file_name': sample_img_relpath, 'height': h, 'width': w, } '''汇总一个pairs的信息''' sample = { 'image': sample_img_relpath, 'image_info': image_info, 'anns': anns, 'first_frame_image': first_frame_img_relpath, 'first_frame_anns': first_frame_anns, 'new_img_id': new_img_id, 'video_name': take, } egoexo_dataset.append(sample) new_img_id += 1 with open(save_path, 'w') as f: json.dump(egoexo_dataset, f) print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')