/*
 * Decompiled with CFR 0.152.
 */
package aima.learning.reinforcement;

import aima.learning.reinforcement.MDPAgent;
import aima.probability.decision.MDP;
import aima.probability.decision.MDPPerception;
import aima.probability.decision.MDPPolicy;
import aima.probability.decision.MDPUtilityFunction;
import aima.util.FrequencyCounter;

public class PassiveTDAgent<STATE_TYPE, ACTION_TYPE>
extends MDPAgent<STATE_TYPE, ACTION_TYPE> {
    private MDPPolicy<STATE_TYPE, ACTION_TYPE> policy;
    private MDPUtilityFunction<STATE_TYPE> utilityFunction;
    private FrequencyCounter<STATE_TYPE> stateCount;
    private Double previousReward;

    public PassiveTDAgent(MDP<STATE_TYPE, ACTION_TYPE> mdp, MDPPolicy<STATE_TYPE, ACTION_TYPE> policy) {
        super(mdp.emptyMdp());
        this.policy = policy;
        this.utilityFunction = new MDPUtilityFunction();
        this.stateCount = new FrequencyCounter();
    }

    @Override
    public ACTION_TYPE decideAction(MDPPerception<STATE_TYPE> perception) {
        if (!this.utilityFunction.hasUtilityFor(perception.getState())) {
            this.utilityFunction.setUtility(perception.getState(), perception.getReward());
            this.mdp.setReward(perception.getState(), perception.getReward());
        }
        if (this.previousState != null) {
            this.stateCount.incrementFor(this.previousState);
            this.utilityFunction = this.updateUtilityFunction(1.0);
        }
        if (this.mdp.isTerminalState(this.currentState)) {
            this.previousState = null;
            this.previousAction = null;
            this.previousReward = null;
        } else {
            this.previousState = this.currentState;
            this.previousAction = this.policy.getAction(this.currentState);
            this.previousReward = this.currentReward;
        }
        return (ACTION_TYPE)this.previousAction;
    }

    private MDPUtilityFunction<STATE_TYPE> updateUtilityFunction(double gamma) {
        MDPUtilityFunction<Object> uf = this.utilityFunction.copy();
        double u_s = this.utilityFunction.getUtility(this.previousState);
        double gammaUtilDIff = gamma * this.utilityFunction.getUtility(this.currentState) - this.utilityFunction.getUtility(this.previousState);
        double alphaTerm = this.stateCount.probabilityOf(this.previousState) * (this.previousReward + gammaUtilDIff);
        uf.setUtility(this.previousState, u_s + alphaTerm);
        return uf;
    }

    public MDPUtilityFunction<STATE_TYPE> getUtilityFunction() {
        return this.utilityFunction;
    }
}

