/* This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 *
 * Copyright (C) 2014 Nanjing University, Nanjing, China
 */
 
package napping.core;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import napping.utills.Stats;

/**
 *
 * @author Qing Da <daq@lamda.nju.edu.cn>
 */
public class Experiment {

    class ParallelExecute implements Runnable {

        private Trajectory rollout;
        private Task task;
        private Policy mp;
        private State initialState;
        private int maxStep;
        private Random random;
        boolean isStochastic;

        public ParallelExecute(Task task, Policy mp, State initialState, int maxStep, boolean isStochastic, int seed) {
            this.task = task;
            this.mp = mp;
            this.initialState = initialState;
            this.maxStep = maxStep;
            this.isStochastic = isStochastic;
            this.random = new Random(seed);
        }

        @Override
        public void run() {
            List<Tuple> samples = runTask(task, initialState, mp, maxStep, isStochastic, random);
            rollout = new Trajectory(task, samples);
        }

        public Trajectory getRollout() {
            return rollout;
        }
    }

    public static List<Tuple> runTask(Task task, State initalState, Policy mp, int maxStep, boolean isStochastic, Random random) {
        List<Tuple> samples = new ArrayList<Tuple>();

        State s = initalState;
        int step = 0;
        while (step < maxStep) {
            Action action = isStochastic ? mp.makeDecisionS(s, task, random) : mp.makeDecisionD(s, task, random);
            State sPrime = task.transition(s, action, random);
            double reward = task.immediateReward(sPrime);
            samples.add(new Tuple(s, action, reward, sPrime));

            s = sPrime;
            step = step + 1;

            if (task.isComplete(s)) {
                break;
            }
        }

        return samples;
    }

    public double[][] conductTesting(Policy mp, List<Task> tasks,
            State initialState, int maxStep, int bais, boolean isPara, int interval, Random random) {
        int numIteration = mp.getNumIteration() - bais;
        List<double[]> resultList = new ArrayList<double[]>();

        double[] avaRewards = new double[tasks.size()];
        double[] avaSteps = new double[tasks.size()];
        for (int iteration = 0; iteration <= numIteration; iteration += interval) {
            mp.setNumIteration(iteration + bais);
            System.out.println("numIteration = " + mp.numIteration);

            List<ParallelExecute> list = new ArrayList<ParallelExecute>();
            ExecutorService exec = Executors.newFixedThreadPool(23);
            for (int i = 0; i < tasks.size(); i++) {
                ParallelExecute run = new ParallelExecute(tasks.get(i), mp, tasks.get(i).getInitialState(), maxStep, true, random.nextInt());
                list.add(run);
                if (isPara) {
                    exec.execute(run);
                } else {
                    run.run();
                }
            }
            if (isPara) {
                exec.shutdown();
                try {
                    while (!exec.awaitTermination(10, TimeUnit.SECONDS)) {
                    }
                } catch (InterruptedException ex) {
                    ex.printStackTrace();
                }
            }

            int avaStep = 0;
            for (int i = 0; i < list.size(); i++) {
                Trajectory rollout = list.get(i).getRollout();
                System.out.print(rollout.samples.size() + " ");
                avaStep += rollout.samples.size();
                avaRewards[i] = 0;
                for (Tuple t : rollout.samples) {
                    avaRewards[i] += t.reward;
                }
                avaRewards[i] /= rollout.samples.size();
                avaSteps[i] = rollout.samples.size();
            }
            avaStep /= list.size();
            System.out.println("->" + avaStep);
            double[] mean_std_reward = Stats.mean_std(avaRewards);
            double[] mean_std_step = Stats.mean_std(avaSteps);
            double[] records = new double[5];
            records[0] = iteration;
            records[1] = mean_std_reward[0];
            records[2] = mean_std_reward[1];
            records[3] = mean_std_step[0];
            records[4] = mean_std_step[1];
            resultList.add(records);
        }
        mp.setNumIteration(numIteration + bais);

        double[][] results = new double[resultList.size()][];
        for (int i = 0; i < resultList.size(); i++) {
            results[i] = resultList.get(i);
        }
        return results;
    }
}
