import torch as tt
from torch.utils.data import DataLoader
import os
import json
import numpy as np
import random
from tqdm.auto import tqdm
import sys

from python.utils import CustomTripletLoss
from python.data_loader import GraphDataset
from python.model import GGNN

seed = 0
random.seed(seed)
np.random.seed(seed)
tt.manual_seed(seed)

CUDA = True


def load_dataset(directory, filename, batch_size, shuffle, targets, hidden_size, annotation_size,
                 max_nodes=300, edge_types=3, target_edge_type=1, num_workers=4, max_targets=7):
    """
    Load a .json file into memory, create a GraphDataset out of it and return a DataLoader for it
    :param directory:       the directory from which to load the file
    :param filename:        the name of the file
    :param batch_size:      batch size used to initialize the DataLoader
    :param shuffle:         if True, shuffle the graphs on each pass
    :param targets:         Can be either "generate", "generateOnPass", or a key to the json dictionary
                            from which to load the targets
                            "generate": generate targets once and keep them this way (validation)
                            "generateOnPass": generate new targets at each epoch (training)
    :param hidden_size:     the size of node embedding in GGNN that will be used on this dataset
    :param max_nodes:       maximum number of nodes per graph
    :param edge_types:      number of different edge-types. Does not include the edges added to
                            the undirected graph
    :param annotation_size: the size of annotations (initial embedddings) for each node
    :param target_edge_type:the type of edge that is to be predicted
    :return:                a dataloader object
    """
    full_path = os.path.join(directory, filename) # full path to the file
    print("Loading data from %s" % full_path)
    with open(full_path, 'r') as f:
        data = json.load(f)
    dataset =  GraphDataset(data,
                            hidden_size=hidden_size,
                            max_nodes=max_nodes,
                            edge_types=edge_types,
                            annotation_size=annotation_size,
                            targets=targets,
                            target_edge_type=target_edge_type,
                            max_targets=max_targets)
    return DataLoader(dataset, shuffle=shuffle, batch_size=batch_size, num_workers=num_workers)


def run_epoch(model, optimizer, criterion, data, epoch, is_training, useTqdm=False):
    """
    Run a given model for one epoch on the given dataset and return the loss and the accuracy
    :param data:         a DataLoader object that works on a GraphDataset
    :param epoch:        epoch id (for logging purposes)
    :param is_training:  whether to train or just evaluate
    :param uesTqdm:      use tqdm for output
    :return:             mean loss, mean accuracy
    """
    if CUDA:
        model.cuda()

    if is_training:
        model.train()
        if useTqdm:
            print("Epoch %d. Training" % epoch)
    else:
        model.eval()
        if useTqdm:
            print("Epoch %d. Evaluating" % epoch)

    total_loss = 0
    total_acc = 0
    step = 0
    if useTqdm:
        batches = tqdm(data)
    else:
        batches = data

    for adj_matrix, features, src, mask in batches:
        step += 1

        optimizer.zero_grad()
        model.zero_grad()

        batch_size = adj_matrix.shape[0]
        option_size = adj_matrix.shape[1]

        # TODO: move these view functions in the GraphDataset
        adj_matrix = adj_matrix.view(-1, adj_matrix.shape[2], adj_matrix.shape[3]).float()
        src = src.view(-1).long()
        features = features.view(-1, features.shape[2], features.shape[3]).float()

        if CUDA:  # move to CUDA, if possible
            mask = mask.cuda()
            features = features.cuda()
            adj_matrix = adj_matrix.cuda()
            src = src.cuda()

        distances = model(features, adj_matrix, src, batch_size, option_size)

        loss, acc = criterion(distances, mask)
        total_acc += acc.cpu().data.numpy().item()
        total_loss += loss.cpu().data.numpy().item()

        if useTqdm:
            batches.set_description("Acc: step=%.2f, m.=%.3f. M. loss=%.3f" % (acc, total_acc / step, total_loss / step))

        if is_training:
            loss.backward(retain_graph=True)
            tt.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

    if not useTqdm:
        print("Epoch: %d, Acc: %.4f, Loss: %.4f" % (epoch, total_acc/step, total_loss/step))
    return total_loss/step, total_acc/step


def train(model, epochs, optimizer, criterion, patience, train_data, val_data, best_model_file,
          useTqdm=False):
    """
    Train a given model for a given number of epochs. Use early stopping with given patience.
    :param model:     a GGNN model
    :param epochs:    maximum number of epochs to run the model for
    :param patience:  patience value to use for early stopping
    :param train_data:training dataset
    :apram val_data:  validation dataset
    :param best_model_file: file to save the best model to
    :param uesTqdm:   use tqdm for output
    """

    best_val_loss, best_val_loss_epoch = float("inf"), 0
    for epoch in range(epochs):
        run_epoch(model, optimizer, criterion, train_data, epoch, True, useTqdm)
        val_loss, _ = run_epoch(model, optimizer, criterion, val_data, epoch, False, useTqdm)

        if val_loss < best_val_loss:
            tt.save(model.state_dict(), best_model_file)
            if useTqdm:
                print("Best epoch so far. Saving to '%s')" % (best_model_file))
            best_val_loss = val_loss
            best_val_loss_epoch = epoch
        elif epoch - best_val_loss_epoch >= patience:  # early stopping
            print("Early Stopping after %i epochs." % epoch)
            break


def train_test(dim, lr, steps):
    DIR = 'data/graphs/newMethod' + str(dim) + '/'
    test_data = load_dataset(DIR, 'test.json', batch_size=20, shuffle=False, targets="targets_1",
                             hidden_size=dim * 2, annotation_size=dim * 2, max_targets=15,
                             num_workers=1)
    train_data = load_dataset(DIR, "train.json", batch_size=20, shuffle=True,
                              targets="generateOnPass", hidden_size=dim * 2, annotation_size=dim * 2,
                              max_targets=15, num_workers=1)
    val_data = load_dataset(DIR, 'valid.json', batch_size=20, shuffle=False, targets="generate",
                            hidden_size=dim * 2, annotation_size=dim * 2, max_targets=15,
                            num_workers=1)

    model = GGNN(state_dim=dim * 2,
                 annotation_dim=dim * 2,
                 n_edge_types=3,
                 n_nodes=300,
                 n_steps=steps)
    criterion = CustomTripletLoss(margin=0.5)
    optimizer = tt.optim.Adam(model.parameters(), lr=lr)

    best_filename = "best"+str(dim)+"-" + str(lr) + "_" + str(steps) + ".model"

    train(model, epochs=30, optimizer=optimizer, criterion=criterion, patience=3,
          train_data=train_data, val_data=val_data, best_model_file=best_filename, useTqdm=False
          )

    model.load_state_dict(tt.load(best_filename))
    loss, acc = run_epoch(model, optimizer, criterion, test_data, 1, False, useTqdm=False)
    print(loss, acc)


def test_model(dim, steps, candidates, model_file):
    DIR = '../data/graphs/newMethod' + str(dim) + '/'
    test_data = load_dataset(DIR, 'test.json', batch_size=1, shuffle=False,
                             targets="targets_" + str(candidates), hidden_size=dim * 2,
                             annotation_size=dim * 2, num_workers=1, max_targets=candidates)

    model = GGNN(state_dim=dim * 2,
                 annotation_dim=dim * 2,
                 n_edge_types=3,
                 n_nodes=300,
                 n_steps=steps)
    criterion = CustomTripletLoss(margin=0.5, binary_acc=False)
    optimizer = tt.optim.Adam(model.parameters(), lr=0.001)

    model.load_state_dict(tt.load(model_file))
    loss, acc = run_epoch(model, optimizer, criterion, test_data, 1, False, useTqdm=True)
    print(loss, acc)


if __name__ == "__main__":
    if sys.argv[1] == "train":
        train_test(int(sys.argv[2]), float(sys.argv[3]), int(sys.argv[4]))
    elif sys.argv[1] == "test":
        test_model(int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]), sys.argv[5])