// -*- Mode: c++ -*- // copyright (c) 2004 by Christos Dimitrakakis // $Id: ann_policy.cpp,v 1.3 2005/08/05 09:02:58 berniw Exp $ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * * * ***************************************************************************/ #include ANN_Policy::ANN_Policy (int n_states, int n_actions, int n_hidden, real alpha, real gamma, real lambda, bool eligibility, bool softmax, real randomness, real init_eval, bool separate_actions) : DiscretePolicy (n_states, n_actions, alpha, gamma, lambda, softmax, randomness, init_eval) { this->separate_actions = separate_actions; this->eligibility = eligibility; if (eligibility) { message ("Using eligibility traces"); } if (separate_actions) { message ("Separate actions"); J = NULL; Ja = new ANN* [n_actions]; JQs = new real [n_actions]; for (int i=0; i 0) { ANN_AddHiddenLayer (Ja[i], n_hidden); } ANN_Init (Ja[i]); ANN_SetOutputsToLinear(Ja[i]); ANN_SetBatchMode(Ja[i], false); Ja[i]->eligibility_traces = eligibility; ANN_SetLambda(Ja[i],lambda*gamma); ANN_SetLearningRate (Ja[i], alpha); } } else { JQs = NULL; Ja = NULL; J = NewANN (n_states, n_actions); if (n_hidden > 0) { ANN_AddHiddenLayer (J, n_hidden); } ANN_Init (J); ANN_SetOutputsToLinear(J); ANN_SetBatchMode(J, false); J->eligibility_traces = eligibility; ANN_SetLambda(J,lambda*gamma); ANN_SetLearningRate (J, alpha); } ps = new real [n_states]; delta_vector = new real [n_actions]; J_ps_pa = 0.0; } ANN_Policy::~ANN_Policy() { delete [] ps; delete [] delta_vector; if (separate_actions) { for (int i=0; i=n_actions) { fprintf (stderr, "Action %d out of bounds\n", a); } switch (learning_method) { case Sarsa: amax = a; break; case QLearning: amax = argmax; break; default: amax = a; fprintf (stderr, "Unknown learning method\n"); } if (pa>=0) { // do not update at start of episode real delta = r + gamma*Q_s[amax] - J_ps_pa; tdError = delta; for (int j=0; jeligibility_traces) { delta_vector[pa] = 1.0; ANN_Delta_Train (J, delta_vector, delta); } else { delta_vector[pa] = delta; ANN_Delta_Train (J, delta_vector, 0.0); } } } //printf ("%d %d #STATE\n", min_el_state, max_el_state); // printf ("Q[%d,%d]=%f r=%f e=%f ad=%f gl=%f #QV\n", // ps, pa, Q[ps][pa], r, e[ps][pa], ad, gl); J_ps_pa = Q_s[a]; pa = a; return a; } void ANN_Policy::Reset() { if (separate_actions) { for (int i=0; iconfidence = confidence; this->zeta = zeta; if (separate_actions) { for (int i=0; i