package aima.learning.reinforcement;

import aima.probability.Randomizer;
import aima.probability.decision.MDP;
import aima.probability.decision.MDPPerception;

/* loaded from: input_file:aima/learning/reinforcement/MDPAgent.class */
public abstract class MDPAgent<STATE_TYPE, ACTION_TYPE> {
    protected MDP<STATE_TYPE, ACTION_TYPE> mdp;
    protected STATE_TYPE currentState;
    protected Double currentReward;
    protected STATE_TYPE previousState;
    protected ACTION_TYPE previousAction;

    public MDPAgent(MDP<STATE_TYPE, ACTION_TYPE> mdp) {
        this.mdp = mdp;
        this.currentState = mdp.getInitialState();
        this.currentReward = Double.valueOf(mdp.getRewardFor(this.currentState));
    }

    public MDPPerception<STATE_TYPE> execute(ACTION_TYPE action_type, Randomizer randomizer) {
        MDPPerception<STATE_TYPE> execute = this.mdp.execute(this.currentState, action_type, randomizer);
        updateFromPerception(execute);
        return execute;
    }

    public void updateFromPerception(MDPPerception<STATE_TYPE> mDPPerception) {
        this.currentState = mDPPerception.getState();
        this.currentReward = Double.valueOf(mDPPerception.getReward());
    }

    public void executeTrial(Randomizer randomizer) {
        ACTION_TYPE decideAction;
        this.currentState = this.mdp.getInitialState();
        this.currentReward = Double.valueOf(this.mdp.getRewardFor(this.mdp.getInitialState()));
        this.previousState = null;
        this.previousAction = null;
        MDPPerception<STATE_TYPE> mDPPerception = new MDPPerception<>(this.currentState, this.currentReward.doubleValue());
        do {
            decideAction = decideAction(mDPPerception);
            if (decideAction != null) {
                mDPPerception = execute(decideAction, randomizer);
            }
        } while (decideAction != null);
    }

    public abstract ACTION_TYPE decideAction(MDPPerception<STATE_TYPE> mDPPerception);

    public Double getCurrentReward() {
        return this.currentReward;
    }

    public void setCurrentReward(Double d) {
        this.currentReward = d;
    }

    public ACTION_TYPE getPreviousAction() {
        return this.previousAction;
    }

    public void setPreviousAction(ACTION_TYPE action_type) {
        this.previousAction = action_type;
    }

    public STATE_TYPE getPreviousState() {
        return this.previousState;
    }

    public void setPreviousState(STATE_TYPE state_type) {
        this.previousState = state_type;
    }

    public STATE_TYPE getCurrentState() {
        return this.currentState;
    }
}
