// -*- Mode: c++ -*- // copyright (c) 2004 by Christos Dimitrakakis // $Id: ann_policy.h,v 1.3 2005/08/05 09:02:58 berniw Exp $ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #ifndef ANN_POLICY_H #define ANN_POLICY_H #include /** A type of discrete action policy using a neural network for function approximation. Constructor arguments offer the additional option \c separate_actions. This is useful for the case of eligibility traces. It allows to use clearing actions traces, since it uses a separate approximator for each action, rather than a single approximator with many outputs. The class has essentially the same interface as DiscretePolicy. A major difference is the fact that you must supply a \c real \em vector that represents the state. Note that using Q-learning with eligibility traces in this class can result in divergence theoretically. */ class ANN_Policy : public DiscretePolicy { protected: ANN* J; ///< Evaluation network ANN** Ja; ///< Evaluation networks (for \c separate_actions case) real* ps; ///< Previous state vector \deprecated real* JQs; ///< Placeholder for evaluation vector (\c separate_actions) real J_ps_pa; ///< Evaluation of last action real* delta_vector; ///< Scratch vector for TD error bool eligibility; ///< eligibility option bool separate_actions; ///< Single/separate evaluation option public: /// Make a new policy ANN_Policy (int n_states, int n_actions, int n_hidden = 0, real alpha=0.1, real gamma=0.8, real lambda=0.8, bool eligibility = false, bool softmax = false, real randomness=0.1, real init_eval=0.0, bool separate_actions = false); virtual ~ANN_Policy(); /// Select an action, given a vector of real numbers which /// represents the state. virtual int SelectAction(real* s, real r, int forced_a=-1); /// Reset eligibility traces. virtual void Reset(); /// Return the last action value. virtual real getLastActionValue () {return J_ps_pa;} /// \deprecated Get the probabilities of all actions - call after SelectAction(). virtual real* getActionProbabilities () { real sum = 0.0; int i; for (i=0; i