import json
import os
from PIL import Image
import numpy as np
from pycocotools.mask import encode, decode, frPyObjects
from tqdm import tqdm
import copy
from natsort import natsorted
import cv2

if __name__ == '__main__':
    # 定义相关路径，获取takes
    root_path = '/scratch/yuqian_fu/data_segswap_test'
    save_path = os.path.join(root_path, 'egoexo_test_framelevel.json')
    split_path = "/home/yuqian_fu/Projects/ego-exo4d-relation/correspondence/split.json"
    with open(split_path, "r") as fp:
        data_split = json.load(fp)
    takes = data_split["test"]
    

    # 计数
    new_img_id = 0
    # 存储保存的数据
    egoexo_dataset = []

    for take in tqdm(takes):
        # 获取本take下的注释
        vid_root_path = os.path.join(root_path, take)
        anno_path = os.path.join(vid_root_path, "annotation.json")
        with open(anno_path, 'r') as fp:
            annotations = json.load(fp)

        # 取出本take下的所有物体
        objs = natsorted(list(annotations["masks"].keys()))
        coco_id_to_cont_id = {coco_id: cont_id + 1 for cont_id, coco_id in enumerate(objs)}
        print(f"coco_id_to_cont_id:{coco_id_to_cont_id}")

        # 获取相机
        valid_cams = os.listdir(vid_root_path)
        valid_cams.remove("annotation.json")
        ego_cams = []
        exo_cams = []
        for vc in valid_cams:
            if 'aria' in vc:
                ego_cams.append(vc)
            else:
                exo_cams.append(vc)
        if len(ego_cams) > 1:
            print(f"Warning: {take} has more than one ego camera, only the first one will be used.")
        ego = ego_cams[0]

        # 获取帧数
        vid_ego_path = os.path.join(vid_root_path, ego)
        ego_frames = natsorted(os.listdir(vid_ego_path))
        idxs = [f.split(".")[0] for f in ego_frames]

        # 匹配所有exo
        for exo in exo_cams:
            vid_exo_path = os.path.join(vid_root_path, exo)

            # 开始按帧数处理
            for idx in idxs:
                '''query'''
                # 定义query_cam_path
                filename = f"{idx}.jpg"
                first_frame_img_path = os.path.join(vid_ego_path, filename)
                first_frame_img_relpath = os.path.relpath(first_frame_img_path, root_path)
                # 确定ego相机下的每一帧的物体
                ego_obj = []
                for obj in objs:
                    if idx in annotations['masks'][obj][ego].keys():  # debug: ego可能会报相机的key error
                        ego_obj.append(obj)
                print("len of ego_obj:", len(ego_obj)) # debug: 确定idx的数据类型
                # 提取每个物体的注释，定义first_frame_anns
                first_frame_anns = []
                for obj in ego_obj:
                    segmentation_tmp = annotations["masks"][obj][ego][idx]
                    binary_mask = decode(segmentation_tmp)
                    h, w = binary_mask.shape
                    binary_mask = cv2.resize(binary_mask, (w // 2, h // 2), interpolation=cv2.INTER_NEAREST)
                    area = binary_mask.sum().astype(float)
                    segmentation = encode(np.asfortranarray(binary_mask))
                    segmentation = {
                        'counts': segmentation['counts'].decode('ascii'),
                        'size': segmentation["size"],
                    }
                    first_frame_anns.append(
                        {
                            'segmentation': segmentation,
                            'area': area,
                            'category_id': float(coco_id_to_cont_id[obj]),
                        }
                    )


                '''target'''
                # 定义target_cam_path
                sample_img_path = os.path.join(vid_exo_path, filename)
                sample_img_relpath = os.path.relpath(sample_img_path, root_path)

                # 获取h,w
                exo_img = cv2.imread(sample_img_path)
                h, w, _ = exo_img.shape

                #定义anns，推理不需要，全部设置为null mask
                anns = []
                for obj in ego_obj:
                    binary_mask = np.zeros((h, w))
                    binary_mask = binary_mask.astype(np.uint8)
                    area = float(0)
                    segmentation = encode(np.asfortranarray(binary_mask))
                    segmentation = {
                        'counts': segmentation['counts'].decode('ascii'),
                        'size': segmentation["size"],
                    }
                    anns.append(
                        {
                            'segmentation': segmentation,
                            'area': area,
                            'category_id': float(coco_id_to_cont_id[obj]),
                        }
                    )
                

                '''其余信息'''
                image_info = {
                'file_name': sample_img_relpath,
                'height': h,
                'width': w,
                    }
                

                '''汇总一个pairs的信息'''
                sample = {
                'image': sample_img_relpath,
                'image_info': image_info,
                'anns': anns,
                'first_frame_image': first_frame_img_relpath,
                'first_frame_anns': first_frame_anns,
                'new_img_id': new_img_id,
                'video_name': take,
                }
                egoexo_dataset.append(sample)
                new_img_id += 1

        
    with open(save_path, 'w') as f:
        json.dump(egoexo_dataset, f)
    print(f'Save at {save_path}. Total sample: {len(egoexo_dataset)}')