// // $Source: /cvsroot/gambit/gambit/sources/libgambit/behav.imp,v $ // $Date: 2006/03/28 15:16:46 $ // $Revision: 1.20 $ // // DESCRIPTION: // Implementation of behavior profile classes // // This file is part of Gambit // Copyright (c) 2002, The Gambit Project // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. // #include "behav.h" namespace Gambit { //======================================================================== // MixedBehavProfile: Lifecycle //======================================================================== template MixedBehavProfile::MixedBehavProfile(const MixedBehavProfile &p_profile) : DVector(p_profile), m_support(p_profile.m_support), m_cacheValid(false), m_realizProbs(p_profile.m_realizProbs), m_beliefs(p_profile.m_beliefs), m_nvals(p_profile.m_nvals), m_bvals(p_profile.m_bvals), m_nodeValues(p_profile.m_nodeValues), m_infosetValues(p_profile.m_infosetValues), m_actionValues(p_profile.m_actionValues), m_gripe(p_profile.m_gripe) { m_realizProbs = (T) 0.0; m_beliefs = (T) 0.0; m_nodeValues = (T) 0.0; m_infosetValues = (T) 0.0; m_actionValues = (T) 0.0; m_gripe = (T) 0.0; } template MixedBehavProfile::MixedBehavProfile(const BehavSupport &p_support) : DVector(p_support.NumActions()), m_support(p_support), m_cacheValid(false), m_realizProbs(p_support.GetGame()->NumNodes()), m_beliefs(p_support.GetGame()->NumNodes()), m_nvals(p_support.GetGame()->NumNodes()), m_bvals(p_support.GetGame()->NumNodes()), m_nodeValues(p_support.GetGame()->NumNodes(), p_support.GetGame()->NumPlayers()), m_infosetValues(p_support.GetGame()->NumInfosets()), m_actionValues(p_support.GetGame()->NumActions()), m_gripe(p_support.GetGame()->NumActions()) { m_realizProbs = (T) 0.0; m_beliefs = (T) 0.0; m_nodeValues = (T) 0.0; m_infosetValues = (T) 0.0; m_actionValues = (T) 0.0; m_gripe = (T) 0.0; Centroid(); } template void MixedBehavProfile::BehaviorStrat(int pl, GameNodeRep *p_node) { for (int i = 1; i <= p_node->children.Length(); i++) { GameNodeRep *child = p_node->children[i]; if (p_node->GetPlayer() && p_node->GetPlayer()->GetNumber() == pl) { if (m_nvals[p_node->number] > (T) 0 && m_nvals[child->number] > (T) 0) { (*this)(p_node->GetPlayer()->GetNumber(), p_node->GetInfoset()->GetNumber(), m_support.GetIndex(p_node->GetInfoset()->GetAction(i))) = m_nvals[child->number] / m_nvals[p_node->number]; } } BehaviorStrat(pl, child); } } template void MixedBehavProfile::RealizationProbs(const MixedStrategyProfile &mp, int pl, const Array &actions, GameNodeRep *node) { T prob; for (int i = 1; i <= node->children.Length(); i++) { if (node->GetPlayer() && !node->GetPlayer()->IsChance()) { if (node->GetPlayer()->GetNumber() == pl) { if (actions[node->GetInfoset()->GetNumber()] == i) prob = (T) 1; else prob = (T) 0; } else if (GetSupport().Contains(node->GetInfoset()->GetAction(i))) prob = (T) 1 / (T) GetSupport().NumActions(node->GetPlayer()->GetNumber(), node->GetInfoset()->GetNumber()); else { prob = (T) 0; } } else { // n.GetPlayer() == 0 prob = node->infoset->GetActionProb(i); } GameNode child = node->children[i]; m_bvals[child->number] = prob * m_bvals[node->number]; m_nvals[child->number] += m_bvals[child->number]; RealizationProbs(mp, pl, actions, child); } } template MixedBehavProfile::MixedBehavProfile(const MixedStrategyProfile &p_profile) : DVector(p_profile.GetGame()->NumActions()), m_support(p_profile.GetGame()), m_cacheValid(false), m_realizProbs(m_support.GetGame()->NumNodes()), m_beliefs(m_support.GetGame()->NumNodes()), m_nvals(m_support.GetGame()->NumNodes()), m_bvals(m_support.GetGame()->NumNodes()), m_nodeValues(m_support.GetGame()->NumNodes(), m_support.GetGame()->NumPlayers()), m_infosetValues(m_support.GetGame()->NumInfosets()), m_actionValues(m_support.GetGame()->NumActions()), m_gripe(m_support.GetGame()->NumActions()) { m_realizProbs = (T) 0.0; m_beliefs = (T) 0.0; m_nodeValues = (T) 0.0; m_infosetValues = (T) 0.0; m_actionValues = (T) 0.0; m_gripe = (T) 0.0; ((Vector &) *this).operator=((T)0); GameNodeRep *root = m_support.GetGame()->GetRoot(); const StrategySupport &support = p_profile.GetSupport(); GameRep *game = m_support.GetGame(); for (GamePlayerIterator player = game->Players(); !player.AtEnd(); player++) { m_nvals = (T) 0; m_bvals = (T) 0; for (SupportStrategyIterator strategy = support.Strategies(player); !strategy.AtEnd(); strategy++) { if (p_profile[strategy] > (T) 0) { const Array &actions = strategy->m_behav; m_bvals[root->GetNumber()] = p_profile[strategy]; RealizationProbs(p_profile, player->GetNumber(), actions, root); } } m_nvals[1] = (T) 1; // set the root nval BehaviorStrat(player->GetNumber(), root); } } template MixedBehavProfile &MixedBehavProfile::operator=(const MixedBehavProfile &p_profile) { if (this != &p_profile && m_support == p_profile.m_support) { Invalidate(); DVector::operator=(p_profile); m_support = p_profile.m_support; } return *this; } //======================================================================== // MixedBehavProfile: Operator overloading //======================================================================== template bool MixedBehavProfile::operator==(const MixedBehavProfile &p_profile) const { return (m_support == p_profile.m_support && (DVector &) *this == (DVector &) p_profile); } //======================================================================== // MixedBehavProfile: General data access //======================================================================== template void MixedBehavProfile::Centroid(void) { T center; for (int pl = 1; pl <= this->dvlen.Length(); pl++) for (int iset = 1; iset <= this->dvlen[pl]; iset++) if (m_support.NumActions(pl,iset) > 0) { center = ((T) 1 / (T) m_support.NumActions(pl, iset)); int act; for (act = 1; act <= this->svlen[this->dvidx[pl] + iset - 1]; act++) this->dvptr[pl][iset][act] = center; } } //======================================================================== // MixedBehavProfile: Interesting quantities //======================================================================== // // The p_definedOnly parameter allows us to compute the LiapValue for profiles // which are incomplete. Some methods -- such as the sequence form // methods -- return all zeroes for all action probabilities at // information sets sufficiently far off the equilibrium path. // In such cases, *any* completion is Nash. // // This is really a hack because we don't have a proper way yet of // indicating this. // template T MixedBehavProfile::GetLiapValue(bool p_definedOnly) const { static const T BIG1 = (T) 10000; static const T BIG2 = (T) 100; T x, result = ((T) 0), avg, sum; // HACK: force it to recompute data. FIX THIS. m_cacheValid = false; ComputeSolutionData(); for (int i = 1; i <= m_support.GetGame()->NumPlayers(); i++) { for (int iset = 1; iset <= m_support.GetGame()->GetPlayer(i)->NumInfosets(); iset++) { avg = sum = (T)0; for (int act = 1; act <= m_support.NumActions(i, iset); act++) { GameActionRep *action = m_support.GetAction(i, iset, act); x = GetActionProb(action); avg += x * m_actionValues(action->GetInfoset()->GetPlayer()->GetNumber(), action->GetInfoset()->GetNumber(), action->m_number); sum += x; if (x > (T)0) x = (T)0; result += BIG1 * x * x; // add penalty for neg probabilities } for (int act = 1; act <= m_support.NumActions(i, iset); act++) { x = ActionValue(m_support.GetAction(i, iset, act)) - avg; if (x < (T)0) x = (T)0; result += x * x; // add penalty if not best response } x = sum - (T)1; if (!p_definedOnly || sum >= (T) 1.0e-4) { result += BIG2 * x * x; // add penalty for sum not equal to 1 } } } return result; } template const T &MixedBehavProfile::GetRealizProb(const GameNode &node) const { ComputeSolutionData(); return m_realizProbs[node->number]; } template const T &MixedBehavProfile::GetBeliefProb(const GameNode &node) const { ComputeSolutionData(); return m_beliefs[node->number]; } template Vector MixedBehavProfile::GetNodeValue(const GameNode &node) const { ComputeSolutionData(); return m_nodeValues.Row(node->number); } template T MixedBehavProfile::GetInfosetProb(const GameInfoset &iset) const { ComputeSolutionData(); T prob = (T) 0; for (int i = 1; i <= iset->NumMembers(); i++) { prob += m_realizProbs[iset->GetMember(i)->number]; } return prob; } template const T &MixedBehavProfile::GetInfosetValue(const GameInfoset &iset) const { ComputeSolutionData(); return m_infosetValues(iset->GetPlayer()->GetNumber(), iset->GetNumber()); } template T MixedBehavProfile::GetActionProb(const GameAction &action) const { if (action->GetInfoset()->GetPlayer()->IsChance()) { GameInfosetRep *infoset = action->GetInfoset(); return infoset->GetActionProb(action->GetNumber()); } else if (!m_support.Contains(action)) { return (T) 0.0; } else { return (*this)(action->GetInfoset()->GetPlayer()->GetNumber(), action->GetInfoset()->GetNumber(), m_support.GetIndex(action)); } } template const T &MixedBehavProfile::GetActionValue(const GameAction &act) const { ComputeSolutionData(); return m_actionValues(act->GetInfoset()->GetPlayer()->GetNumber(), act->GetInfoset()->GetNumber(), act->m_number); } template const T &MixedBehavProfile::GetRegret(const GameAction &act) const { ComputeSolutionData(); return m_gripe(act->GetInfoset()->GetPlayer()->GetNumber(), act->GetInfoset()->GetNumber(), act->m_number); } template void MixedBehavProfile::GetPayoff(GameNodeRep *node, const T &prob, int player, T &value) const { if (node->outcome) { value += prob * node->outcome->GetPayoff(player); } if (node->children.Length()) { int pl = node->infoset->m_player->m_number, iset = node->infoset->m_number; if (pl == 0) { // chance player for (int act = 1; act <= node->NumChildren(); act++) { GetPayoff(node->GetChild(act), prob * node->infoset->GetActionProb(act), player, value); } } else { for (int act = 1; act <= m_support.NumActions(pl, iset); act++) { GameActionRep *action = m_support.GetAction(pl, iset, act); GetPayoff(node->GetChild(action->GetNumber()), prob * GetActionProb(action), player, value); } } } } template T MixedBehavProfile::GetPayoff(int player) const { T value = (T) 0; GetPayoff(m_support.GetGame()->GetRoot(), (T) 1, player, value); return value; } // // The following routines compute the derivatives of quantities as // the probability of the action 'p_oppAction' is changed. // See Turocy (2001), "Computing the Quantal Response Equilibrium // Correspondence" for details. // These assume that the profile is interior (totally mixed), // and that the game is of perfect recall // template T MixedBehavProfile::DiffActionValue(const GameAction &p_action, const GameAction &p_oppAction) const { ComputeSolutionData(); T deriv = (T) 0; GameInfoset infoset = p_action->GetInfoset(); GamePlayer player = p_action->GetInfoset()->GetPlayer(); for (int i = 1; i <= infoset->NumMembers(); i++) { GameNode member = infoset->GetMember(i); GameNode child = member->GetChild(p_action->m_number); deriv += DiffRealizProb(member, p_oppAction) * (m_nodeValues(child->number, player->GetNumber()) - m_actionValues(p_action->GetInfoset()->GetPlayer()->GetNumber(), p_action->GetInfoset()->GetNumber(), p_action->m_number)); deriv += m_realizProbs[member->number] * DiffNodeValue(member->GetChild(p_action->m_number), player, p_oppAction); } return deriv / GetInfosetProb(p_action->GetInfoset()); } template T MixedBehavProfile::DiffRealizProb(const GameNode &p_node, const GameAction &p_oppAction) const { ComputeSolutionData(); T deriv = (T) 1; bool isPrec = false; GameNode node = p_node; while (node->GetParent()) { GameAction prevAction = node->GetPriorAction(); if (prevAction != p_oppAction) { deriv *= GetActionProb(prevAction); } else { isPrec = true; } node = node->GetParent(); } return (isPrec) ? deriv : (T) 0.0; } template T MixedBehavProfile::DiffNodeValue(const GameNode &p_node, const GamePlayer &p_player, const GameAction &p_oppAction) const { ComputeSolutionData(); if (p_node->NumChildren() > 0) { GameInfoset infoset = p_node->GetInfoset(); if (infoset == p_oppAction->GetInfoset()) { // We've encountered the action; since we assume perfect recall, // we won't encounter it again, and the downtree value must // be the same. return m_nodeValues(p_node->GetChild(p_oppAction->GetNumber())->number, p_player->GetNumber()); } else { T deriv = (T) 0; for (int act = 1; act <= infoset->NumActions(); act++) { deriv += (DiffNodeValue(p_node->GetChild(act), p_player, p_oppAction) * GetActionProb(infoset->GetAction(act))); } return deriv; } } else { // If we reach a terminal node and haven't encountered p_oppAction, // derivative wrt this path is zero. return (T) 0; } } //======================================================================== // MixedBehavProfile: Cached profile information //======================================================================== template void MixedBehavProfile::ComputeSolutionDataPass2(const GameNode &node) const { if (node->outcome) { for (int pl = 1; pl <= m_support.GetGame()->NumPlayers(); pl++) { m_nodeValues(node->number, pl) += node->outcome->GetPayoff(pl); } } GameInfoset iset = node->infoset; if (iset) { T infosetProb = (T) 0; for (int i = 1; i <= iset->NumMembers(); i++) { infosetProb += m_realizProbs[iset->GetMember(i)->number]; } if (infosetProb != infosetProb * (T) 0) { m_beliefs[node->number] = m_realizProbs[node->number] / infosetProb; } // push down payoffs from outcomes attached to non-terminal nodes for (int child = 1; child <= node->NumChildren(); child++) { m_nodeValues.SetRow(node->GetChild(child)->number, m_nodeValues.Row(node->number)); } for (int pl = 1; pl <= m_support.GetGame()->NumPlayers(); pl++) { m_nodeValues(node->number, pl) = (T) 0; } for (int child = 1; child <= node->NumChildren(); child++) { GameNode childNode = node->GetChild(child); ComputeSolutionDataPass2(childNode); GameAction act = childNode->GetPriorAction(); for (int pl = 1; pl <= m_support.GetGame()->NumPlayers(); pl++) { m_nodeValues(node->number, pl) += GetActionProb(act) * m_nodeValues(childNode->number, pl); } if (!iset->IsChanceInfoset()) { T &cpay = m_actionValues(act->GetInfoset()->GetPlayer()->GetNumber(), act->GetInfoset()->GetNumber(), act->m_number); if (infosetProb != infosetProb * (T) 0) { cpay += m_beliefs[node->number] * m_nodeValues(childNode->number, iset->GetPlayer()->GetNumber()); } else { cpay = (T) 0; } } } } } // compute realization probabilities for nodes and isets. template void MixedBehavProfile::ComputeSolutionDataPass1(const GameNode &node) const { if (node->GetParent()) { m_realizProbs[node->number] = m_realizProbs[node->GetParent()->number] * GetActionProb(node->GetPriorAction()); } else { m_realizProbs[node->number] = (T) 1; } if (node->GetInfoset()) { for (int i = 1; i <= node->NumChildren(); i++) { ComputeSolutionDataPass1(node->GetChild(i)); } } } template void MixedBehavProfile::ComputeSolutionData(void) const { if (!m_cacheValid) { m_actionValues = (T) 0; m_nodeValues = (T) 0; m_infosetValues = (T) 0; m_gripe = (T) 0; ComputeSolutionDataPass1(m_support.GetGame()->GetRoot()); ComputeSolutionDataPass2(m_support.GetGame()->GetRoot()); // At this point, mark the cache as value, so calls to GetInfosetValue() // don't create a loop. m_cacheValid = true; for (int pl = 1; pl <= m_support.GetGame()->NumPlayers(); pl++) { for (int iset = 1; iset <= m_support.GetGame()->NumInfosets()[pl]; iset++) { GameInfoset infoset = m_support.GetGame()->GetPlayer(pl)->GetInfoset(iset); m_infosetValues(infoset->GetPlayer()->GetNumber(), infoset->GetNumber()) = (T) 0; for (int act = 1; act <= infoset->NumActions(); act++) { GameAction action = infoset->GetAction(act); m_infosetValues(infoset->GetPlayer()->GetNumber(), infoset->GetNumber()) += GetActionProb(action) * ActionValue(action); } for (int act = 1; act <= infoset->NumActions(); act++) { GameAction action = infoset->GetAction(act); m_gripe(action->GetInfoset()->GetPlayer()->GetNumber(), action->GetInfoset()->GetNumber(), action->m_number) = (ActionValue(action) - GetInfosetValue(infoset)) * GetInfosetProb(infoset); } } } } } template bool MixedBehavProfile::IsDefinedAt(GameInfoset p_infoset) const { for (int act = 1; act <= p_infoset->NumActions(); act++) { if (GetActionProb(p_infoset->GetAction(act)) > (T) 0) { return true; } } return false; } } // end namespace Gambit