# Copyright (c) Alibaba, Inc. and its affiliates.
import argparse
import os
import os.path as osp
from os.path import join, exists, isdir, basename, abspath
from glob import glob
from tqdm import tqdm
import random
import cv2
import numpy as np
import json
import warnings
import math

import torch
import torch.nn.functional as F

import sys; sys.path.append('./')
from utils.general import gaussian2D

try:
    from segment_anything import SamPredictor, sam_model_registry

    sam_checkpoint = "./weights/sam_vit_h_4b8939.pth"
    model_type = "vit_h"
    device = "cuda"

    sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device) #.half()  Warning: Precision Drops
    dtype = next(sam.named_parameters())[1].dtype
    predictor = SamPredictor(sam)
except:
    warnings.warn('It is recommended to install segment-anything for better pseudo masks. See instructions in README.md.')
    predictor = None


############ utils ############

def _readlines(path):
    with open(path, 'r') as f:
        lines = f.read().splitlines()
    
    return lines


def check_break(act):
    flag = False
    prev_pos, prev_neg = False, False
    for x in act:
        if x:
            if not prev_pos:
                prev_pos = True
            elif prev_neg:
                flag = True
                break
        elif prev_pos:
            prev_neg = True
    
    return flag


def check_center(crop):
    h, w = crop.shape
    indices = torch.nonzero(crop)
    yc, xc = indices.float().mean(dim=0)
    
    s = 0.15
    return ((yc - h/2.).abs() > h * s) | ((xc - w/2.).abs() > w * s)


def segment_image(image, labels, width, height):
    if len(labels) == 0:
        return torch.zeros(image.shape[:2], dtype=torch.float16).to(device), np.full((0,), False)

    if max(width, height) > 1024:
        mask = torch.zeros((height, width), dtype=torch.float16).to(device)
        invalid = np.full((len(labels),), False)

        # overlap = 20  # pixel
        nx, ny = math.ceil(width / 1024), math.ceil(height / 1024)
        width_, height_ = width // nx, height // ny
        xc, yc, w, h = labels[:, -4:].T
        x1, y1, x2, y2 = xc - w / 2., yc - h / 2., xc + w / 2., yc + h / 2.
        for j in range(ny):
            for i in range(nx):
                grid = np.array([i / nx, j / ny, (i+1) / nx, (j+1) / ny], dtype=labels.dtype)
                indices = (grid[0] < x2) & (x1 < grid[2]) & (grid[1] < y2) & (y1 < grid[3])
                if indices.sum() == 0:
                    continue
                
                x1_, y1_, x2_, y2_ = (x1[indices] - grid[0]).clip(0, 1/nx), \
                                        (y1[indices] - grid[1]).clip(0, 1/ny), \
                                        (x2[indices] - grid[0]).clip(0, 1/nx), \
                                        (y2[indices] - grid[1]).clip(0, 1/ny)
                xc_, yc_, w_, h_ = (x1_ + x2_) / 2., (y1_ + y2_) / 2., (x2_ - x1_), (y2_ - y1_)
                labels_ = np.stack((labels[indices, 0], xc_, yc_, w_, h_), axis=1)

                x1_, y1_, x2_, y2_ = width_*i, height_*j, width_*(i+1), height_*(j+1)
                mask_k, invalid_k = segment_image(image[y1_:y2_, x1_:x2_], labels_, width_, height_)
                mask[y1_:y2_, x1_:x2_] = mask_k
                invalid[indices] |= invalid_k

        return mask, invalid
    
    c, xc, yc, w, h = labels.T
    x1, y1, x2, y2 = (xc - w / 2.) * width, (yc - h / 2.) * height, \
                    (xc + w / 2.) * width, (yc + h / 2.) * height
    input_boxes = np.stack((x1, y1, x2, y2), axis=1)
    input_boxes = torch.from_numpy(input_boxes).long().to(device)

    predictor.set_image(image)
    transformed_boxes = predictor.transform.apply_boxes_torch(input_boxes, image.shape[:2]).to(dtype)
    masks, _, _ = predictor.predict_torch(
        point_coords=None,
        point_labels=None,
        boxes=transformed_boxes,
        multimask_output=False,
        return_logits=True
    )
    # (batch_size) x (num_predicted_masks_per_input=1) x H x W
    mask = masks.sigmoid().squeeze(1).max(dim=0)[0].half()
    
    invalid = np.full((len(masks),), False)
    for i, (x1, y1, x2, y2) in enumerate(input_boxes.cpu()):
        crop = mask[y1:y2, x1:x2] > 0.5
        invalid[i] = check_center(crop) | check_break(crop.sum(dim=1)) | check_break(crop.sum(dim=0))
    
    return mask, invalid


def gen_mask(label_path, image, cls_ratio=False, thresh=0.5, sam_only=False):
    if cls_ratio:
        cls_ratio = [1.83, 5.35, 13.82, 1.00, 5.80, 11.25, 30.11, 44.63, 24.45, 4.89]  # train set
    stride = 1
    # area_min, area_max = 4 * 4 * stride * stride, 6 * 6 * stride * stride
    area_min, area_max = 4 * 4 * 100, 6 * 6 * 50  # for 1920*1080
    min_size = 1e6

    save_path = label_path.replace('/labels/', '/masks/').replace('.txt', '.npy')
    os.makedirs(osp.dirname(save_path), exist_ok=True)
    
    height, width, _ = image.shape
    nx, ny = width // stride, height // stride

    labels = np.loadtxt(label_path, delimiter=' ').reshape(-1, 5)

    mask = np.zeros((ny, nx), dtype=np.float16)
    weight = np.ones_like(mask)
    
    if predictor is not None:
        sam_res, invalid = segment_image(image, labels, width, height)
        if stride != 1:
            sam_res = F.interpolate(sam_res[None, None, ...].float(), size=(ny, nx), mode='bilinear', align_corners=False)[0, 0]
            # sam_res = F.interpolate(sam_res[None, None, ...].float(), size=(ny, nx), mode='nearest')[0, 0]
        sam_res = (sam_res > 0.5).half().numpy()

    c, xc, yc, w, h = labels.T
    x1, y1, x2, y2 = ((xc - w / 2.) * nx).astype(np.int32).clip(0), \
                        ((yc - h / 2.) * ny).astype(np.int32).clip(0), \
                        np.ceil((xc + w / 2.) * nx).astype(np.int32).clip(0, nx), \
                        np.ceil((yc + h / 2.) * ny).astype(np.int32).clip(0, ny)
    input_boxes = np.stack((x1, y1, x2, y2), axis=1)
    
    for i, (x1, y1, x2, y2) in enumerate(input_boxes):
        w, h = x2 - x1, y2 - y1
        gaussian = gaussian2D((h, w), sigma=None, thresh=thresh).astype(mask.dtype)
        
        if predictor is not None:
            sam_mask = sam_res[y1:y2, x1:x2].copy()
            if sam_only:
                gaussian = sam_mask
            else:
                if invalid[i] == 0 and sam_mask.sum() / (w * h) > 0.25:
                    gaussian *= sam_mask
                np.maximum(gaussian, sam_mask * thresh, out=gaussian)
        
        masked_hm = mask[y1:y2, x1:x2]
        np.maximum(masked_hm, gaussian, out=masked_hm)

        area = w * h / (width * height) * (1920 * 1080)
        # min_size = min(min_size, area)
        r_size = max(area_min / area, 1.) ** 2
        # elif area > area_max:
        #     r_size = area / area_max
        if cls_ratio:
            r_cls = cls_ratio[int(c[i])] ** 0.7
        else:
            r_cls = 1.0
        r = max(r_size, r_cls)
        masked_wt = weight[y1:y2, x1:x2]
        curr_wt = np.zeros_like(masked_wt) + math.log(r) + 1.
        curr_wt *= (gaussian > 0).astype(mask.dtype)
        np.maximum(masked_wt, curr_wt, out=masked_wt)
    
    np.save(save_path, np.stack((mask, weight), axis=-1))


############ scripts ############


def prepare_visdrone():
    
    name_dict = {'0': 'ignored regions', '1': 'pedestrian', '2': 'people',
                 '3': 'bicycle', '4': 'car', '5': 'van', '6': 'truck',
                 '7': 'tricycle', '8': 'awning-tricycle', '9': 'bus',
                 '10': 'motor', '11': 'others'}
    split_dict = {'test-dev': 'test-dev.txt', 'val': 'val.txt', 'train': 'train.txt'}
    root = opt.dataset

    os.makedirs(join(root, 'split'), exist_ok=True)
    for sub_dir in glob(join(root, 'VisDrone2019-DET-*')):
        os.makedirs(join(sub_dir, 'labels'), exist_ok=True)
        images = sorted(glob(join(sub_dir, 'images', '*.jpg')))
        if 'test-challenge' in sub_dir:
            with open(join(root, 'split', 'test-challenge.txt'), 'w+') as f:
                f.writelines([line + '\n' for line in images])
            continue

        data_paths = []
        for image_path in tqdm(images):
            image = cv2.imread(image_path)
            height, width, _ = image.shape
            label_path = image_path.replace('images', 'annotations').replace('.jpg', '.txt')
            # if "masked" in label_path:  # avoid repeated processing
            #     continue
            assert exists(label_path)
            label_lines = []
            masked = False
            # <bbox_left>,<bbox_top>,<bbox_width>,<bbox_height>,<score>,<object_category>,<truncation>,<occlusion>
            for line in _readlines(label_path):
                if line[-1] == ',':
                    line = line[:-1]
                x1, y1, w, h, score, cls, truncation, occlusion = list(map(int, line.split(',')))
                if cls in [0, 11]:
                    image[y1:y1 + h, x1:x1 + w, :] = 85
                    masked = True
                elif truncation < 2 and (occlusion < 2 or True):
                    xc, yc = x1 + w / 2., y1 + h / 2.
                    label_lines.append(('%d' + ' %.6f' * 4 + '\n') %
                                       (cls - 1, xc / width, yc / height, w / width, h / height))
                else:
                    pass
            if masked:
                image_path = image_path.replace('.jpg', '_masked.jpg')
                cv2.imwrite(image_path, image)
            # for consistency
            label_path = image_path.replace('images', 'annotations').replace('.jpg', '.txt')
            with open(label_path, 'w+') as f:
                f.writelines(label_lines)

            gen_mask(label_path, image, cls_ratio=True)

            data_paths.append(image_path + '\n')
        
        with open(join(root, 'split', split_dict[basename(sub_dir)[17:]]), 'w+') as f:
            f.writelines(data_paths)


def prepare_uavdt():
    root = opt.dataset
    data_dir = join(root, 'UAV-benchmark-M')
    attr_dir = join(root, 'M_attr')
    label_dir = join(root, 'UAV-benchmark-MOTD_v1.0', 'GT')
    split_dir = join(root, 'split')
    
    # mask images, it takes minutes
    for i, path in enumerate(glob(join(label_dir, '*ignore.txt'))):
        labels = np.loadtxt(path, usecols=(0, 2, 3, 4, 5, 8), dtype=int, delimiter=',')
        vid_name = basename(path).split('_')[0]
        for frameID, x1, y1, w, h, _ in tqdm(labels, desc='%02d/50' % (i + 1)):
            masked_path = join(data_dir, vid_name, 'img%06d_masked.jpg' % frameID)
            input_path = masked_path if exists(masked_path) else masked_path.replace('_masked.jpg', '.jpg')
            image = cv2.imread(input_path)
            image[y1:y1 + h, x1:x1 + w] = (127, 127, 127)
            cv2.imwrite(masked_path, image)

    data_split = {}
    for mode in ['train', 'test']:
        data_split[mode] = []
        for path in glob(join(attr_dir, mode, '*.txt')):
            data_split[mode].append(basename(path)[:5])
    k = 10
    sep = len(data_split['train']) // k
    random.shuffle(data_split['train'])
    data_split['train'], data_split['valid'] = data_split['train'][sep:], data_split['train'][:sep]

    os.mkdir(split_dir)
    for mode in ['train', 'valid', 'test']:
        with open(join(split_dir, '%s_video.txt' % mode), 'w+') as f:
            f.writelines(vid + '\n' for vid in data_split[mode])
        image_paths = []
        for video_name in tqdm(data_split[mode], desc=mode):
            ignore_path = join(label_dir, '%s_gt_ignore.txt' % video_name)
            label_path = join(label_dir, '%s_gt_whole.txt' % video_name)
        
            # the warning caused by empty file doesn't matter
            ignores = np.loadtxt(ignore_path, usecols=(0, 2, 3, 4, 5), dtype=int, delimiter=',')
            ignore_dict = {}
            for frameID, x1, y1, w, h in ignores:
                xyxy = np.array([[x1, y1, x1 + w, y1 + h]])
                if frameID in ignore_dict:
                    ignore_dict[frameID] = np.concatenate((ignore_dict[frameID], xyxy), axis=0)
                else:
                    ignore_dict[frameID] = xyxy
        
            labels = np.loadtxt(label_path, usecols=(0, 2, 3, 4, 5, 8), dtype=int, delimiter=',')
            label_dict = {}
            for frameID, x1, y1, w, h, cls in labels:
                xc, yc = x1 + w / 2., y1 + h / 2.
                if frameID in ignore_dict:
                    ignore_regions = ignore_dict[frameID]
                    if np.logical_and(
                            np.logical_and(ignore_regions[:, 0] < xc, xc < ignore_regions[:, 2]),
                            np.logical_and(ignore_regions[:, 1] < yc, yc < ignore_regions[:, 3])
                    ).sum() > 0:
                        continue
            
                box = [cls, xc, yc, w, h]
                if frameID in label_dict:
                    label_dict[frameID].append(box)
                else:
                    label_dict[frameID] = [box]
        
            for frameID, bboxes in label_dict.items():
                image_path = join(data_dir, video_name, 'img%06d_masked.jpg' % frameID)
                if not exists(image_path):
                    image_path = image_path.replace('_masked.jpg', '.jpg')
            
                image = cv2.imread(image_path)
                height, width, _ = image.shape
                label_path = image_path.replace('.jpg', '.txt')
                with open(label_path, 'w+') as f:
                    for cls, xc, yc, w, h in bboxes:
                        assert 1 <= cls <= 3  # cls - 1
                        # treat all categories to one, 'car'
                        f.write('%d %.6f %.6f %.6f %.6f\n' % (0, xc / width, yc / height, w / width, h / height))

                gen_mask(label_path, image)
                image_paths.append(image_path + '\n')
    
        with open(join(split_dir, '%s.txt' % mode), 'w+') as f:
            f.writelines(image_paths)
        if mode == 'train':
            with open(join(split_dir, '%s_ds.txt' % mode), 'w+') as f:
                f.writelines(image_paths[::10])


def prepare_tinyperson():
    root = opt.dataset
    label_file_dict = {'train': join(root, 'mini_annotations', 'tiny_set_train_all_erase.json'),
                       'test': join(root, 'mini_annotations', 'tiny_set_test_all.json')}
    image_dir = join(root, 'erase_with_uncertain_dataset')
    split_dir = join(root, 'split')

    os.mkdir(split_dir)
    for mode in ['train', 'test']:
        with open(label_file_dict[mode], 'r') as f:
            anno = json.load(f)
    
        image_dict = {}
        for item in anno['images']:
            file_name, width, height = item['file_name'], item['width'], item['height']
            file_path = join(image_dir, mode, file_name)
            image_dict[item['id']] = {'shape': [width, height], 'bboxes': [], 'image_path': file_path}
        
        for item in anno['annotations']:
            if item['ignore'] or item['uncertain']:
                continue
            _id, (x1, y1, w, h) = item['image_id'], item['bbox']
            (width, height) = image_dict[_id]['shape']
            xc, yc, w, h = (x1 + w / 2.) / width, (y1 + h / 2.) / height, w / width, h / height
            image_dict[_id]['bboxes'].append('0 %.6f %.6f %.6f %.6f\n' % (xc, yc, w, h))
    
        paths = []
        for item in image_dict.values():
            image_path = item['image_path']
            label_path = item['image_path'][:-4] + '.txt'
            with open(label_path, 'w+') as f:
                f.writelines(item['bboxes'])
            
            image = cv2.imread(image_path)
            gen_mask(label_path, image)
            paths.append(image_path + '\n')
        
        if mode == 'train':
            with open(join(split_dir, 'trainval.txt'), 'w+') as f:
                f.writelines(paths)
            k = 10
            random.shuffle(paths)
            sep = len(paths) // k
            with open(join(split_dir, 'train.txt'), 'w+') as f:
                f.writelines(paths[sep:])
            with open(join(split_dir, 'valid.txt'), 'w+') as f:
                f.writelines(paths[:sep])
        else:
            with open(join(split_dir, 'test.txt'), 'w+') as f:
                f.writelines(paths)
                

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset', type=str, default='VisDrone', help='dataset, e.g., VisDrone, UAVDT, and TinyPerson')
    opt = parser.parse_args()

    assert exists(opt.dataset)
    dataset = opt.dataset.lower()
    if 'visdrone' in dataset:
        prepare_visdrone()
    elif 'uavdt' in dataset:
        prepare_uavdt()
    elif 'tinyperson' in dataset:
        prepare_tinyperson()
    else:
        print('%s is coming soon.' % opt.dataset)