(*
Daniel Reeves 2001.03.15
Empirically determine best-response strategies to given strategies for
games where the action (eg, a bid) is a single number and the payoff
depends on a private piece of information, also a number (eg, your
valuation), known as the agent's type.
TODO: empirically find a nash equilibrium by starting with a seed
strategy and proceeding to best-response fixed-point.
*)
WriteString["stderr", "[Loading Equilibria.m]\n"];
BeginPackage["Equilibria`", {"MonteCarlo`", "Util`"}];
(* All non-private symbols must be listed below... *)
(* The following should be redefined by the program using this library... *)
payoff;
minType;
maxType;
minAction;
maxAction;
randType;
defaultConfidenceLevel;
(* The following are the functions available in this library, with their
options listed after... *)
psHash; (* makeData *)
psHashDomain; (* TODO: see other to-do's about this *)
allTypes;
ps;
makeTypeActionPairs;
(*
probLessThan;
maxRange;
*)
showBestAction; {confidence, compareFuncs};
bestActionRange; {confidence};
showBestResponse; {confidence, compareFuncs};
(* TODO: Protect["*"]; Or do Protect on all the symbols above... *)
Begin["`Private`"]; (****************************************************)
payoff::usage = "payoff[myType, myAction, otherActions] gives the payoff to an
agent of type myType doing action myAction when its opponents
are doing otherActions. This should be redefined by the
program using this library.";
payoff[myType_, myAction_, otherActions_List] := 0
minType::usage = "minType gives the minimum value for an agent's type. This
can be redefined by the program using this library.";
minType = 0;
maxType::usage = "maxType give the maximum value for an agent's type. This
can be redefined by the program using this library.";
maxType = 1;
minAction::usage = "minAction[myType] is a function mapping an agent's type
to the minimum value for an action that it can/would do.
This can be redefined by the program using this library.";
minAction[myType_:0] := 0
(* TODO: both this definition and the overriding one have to be defined in
exactly the same way (eg, both need to have the default arg like that)
otherwise the new one won't override this one. *)
maxAction::usage = "maxAction[myType] is a function mapping an agent's type
to the maximum value for an action that it can/would do.
This can be redefined by the program using this library.";
maxAction[myType_:0] := 1
randType::usage = "randType[] is a function which generates an agent type
according to the common knowledge distribution from which
agent types are drawn. This can be redefined by the program
using this library.";
randType[] := Random[]
defaultConfidenceLevel::usage = "defaultConfidenceLevel is used for functions
that compute mean confidence intervals, either
to show error bars (showBestAction,
showBestResponse) or to compute a range
directly (bestActionRange). This can be
redefined by the program using this library.";
defaultConfidenceLevel = .95;
(****************************************************************************)
(* TODO: psHash[strategies][type, action] or psHash[s][t][a] *)
psHash::usage = "psHash maps type, action, and strategy list to statistics
{n, s, ss} for the expected payoff for an agent of that type
using that strategy against those strategies.";
psHash[type_, action_, strategies_] = {0, 0, 0}
baHash::usage = "(ba = best action). caches action range we need to try";
baHash[type_, strategies_] = {minAction[type], maxAction[type]}
makeData::usage = "makeData[myType, strategies] returns a list of
{action, {n,s,ss}} pairs for the given type, based on
psHash.";
makeData[myType_, strategies_List] := {#[[2]], psHash@@#}& /@
Union[{{myType, minAction[myType], strategies},
{myType, maxAction[myType], strategies}},
Select[psHashDomain, (#[[1]]==myType && #[[3]]==strategies)&]];
(* TODO: using the global psHashDomain updated by ps[] instead of
making the expensive domain[psHash] call each time; below too *)
allTypes::usage = "allTypes[strategies] returns a list of the types cached
in psHash.";
allTypes[strategies_] := Union[First /@ Select[psHashDomain,
#[[3]]===strategies&]]
(****************************************************************************)
(* TODO: we might be able to simulate more points in less time by flooring
the time constraint to an integer so we can use TimeConstrained. *)
(* TODO: allow an arbitrary distribution for agent types *)
ps::usage = "ps[myType, myAction, strategies] (ps = payoff stats) does a
montecarlo simulation to estimate expected payoff for an agent of
a certain type doing a certain action, when up against given
opponent strategies (functions mapping a type to an action).
It computes and caches (in psHash) the list {n,s,ss} for each
type-action simulated -- {n,s,ss} is the number of trials
simulated for the type-action pair, the sum of the payoffs, and
the sum of the squares of the payoffs. To do the simulation,
this function uses the random type generator randType which
returns a random agent type according to the common knowledge
distribution. It also assumes the existence of the payoff
function which maps my type, my action, and opponent actions to
my payoff. Additionally, myType and myAction can be lists of
types/actions, in which case the function returns a list of
pairs -- {type,{n,s,ss}} if given a list of types, or
{action,{n,s,ss}} if given a list of actions, or
{{type,action},{n,s,ss}} if given both a list of types and a
list of actions (same thing if given a list of type-action
pairs).";
ps[myType_/;Head[myType]=!=List,
myAction_/;Head[myAction]=!=List, strategies_List] :=
Module[{x, n = 0, s = 0, ss = 0},
(* TODO: simulate till the point of diminishing returns *)
While[n < 5000, (* TODO: don't hardcode *)
x = payoff[myType, myAction, #[randType[]]& /@ strategies];
n++; s += x; ss += x^2;];
AbortProtect[psHash[myType, myAction, strategies] += {n, s, ss};
(* TODO: maybe also update a global list for domain[psHash]
since it's so expensive to call domain[psHash] a lot *)
(* TODO: did it but this might be too expensive itself... *)
If[psHash[myType, myAction, strategies] == {n, s, ss},
psHashDomain = Union[psHashDomain, {{myType, myAction, strategies}}];
]
]
]
ps[myTypeList_List, myAction_/;Head[myAction]=!=List, strategies_List] :=
Scan[ps[#, myAction, strategies]&, myTypeList]
ps[myType_/;Head[myType]=!=List, myActionList_List, strategies_List] :=
Scan[ps[myType, #, strategies]&, myActionList]
(* note this does the type-action pairs with the least simulations first *)
(* TODO: could bias the time spent on each type-action pair so that they
even themselves out. *)
(* TODO: for now, I just leave off the one that has the most simulations *)
ps[typeActionPairs:{{_,_}..}, strategies_List] :=
Scan[ps[Sequence @@ #, strategies]&,
Drop[Sort[typeActionPairs,
typeActionPairNeedierFunc[strategies]], -1]]
ps[myTypeList_List, myActionList_List, strategies_List] :=
ps[Distribute[{myTypeList, myActionList}, List], strategies]
(* TODO: this should look at how big the 95% confidence intervals are and
choose the type-action pair that has the biggest interval.
But {0,0,0} should always be considered neediest. *)
typeActionPairNeedierFunc::usage = "typeActionPairNeedierFunc[strategies][ta1,
ta2] returns whether tp1 should be simulated
before tp2, ie, tp1 has fewer simulations
under its belt so far.";
typeActionPairNeedierFunc[strategies_List] := Function[{tp1, tp2},
psHash[Sequence@@tp1, strategies][[1]] <
psHash[Sequence@@tp2, strategies][[1]]
]
tryActions::usage = "tryActions[type, strategies, actionDelta] returns a list
of actions to try (returns {type,action} pairs where all
the types are the same) for the current type. It decides
on an action range that we're almost certain contains the
best action.";
tryActions[type_, strategies_List, actionDelta_] := Module[{b, min, max, l},
{b, {min, max}} = bestActionRange[type, strategies, confidence->1.-1/10^6];
(*
l = Range[min, max, actionDelta]; (* TODO: don't make the whole range *)
If[Last[l]=!=max, AppendTo[l, max]];
If[Length[l]>=5, l = Drop[l, {3, -3}]];
l = Union[l, {b}];
{type, #}& /@ l
*)
{type, #}& /@ Union[Range[min, max, actionDelta], {max}]
]
makeTypeActionPairs::usage = "makeTypeActionPairs[typeDelta, actionDelta]
constructs a list of type-action pairs. Options
for minimum and maximum type can be specified,
but only matter if they are within the ranges set
by minType and maxType.";
makeTypeActionPairs[typeDelta_, actionDelta_, strategies_List,
tMin_:-Infinity, tMax_:Infinity] :=
Module[{start, t1, t2, ans},
start = TimeUsed[];
t1 = Max[minType, Min[maxType, tMin]];
t2 = Max[minType, Min[maxType, tMax]];
ans = tryActions[#, strategies, actionDelta]& /@ Range[t1, t2, typeDelta];
Print["[makeTypeActionPairs: ", Plus@@(Length/@ans), " -> ", Length /@ ans,
(*
Quotient[t2-t1+typeDelta, typeDelta], " types, ",
Length[ans], " pairs, ",
N[Length[ans]/Quotient[t2-t1+typeDelta, typeDelta]],
*)
" pairs per type",
" in ", seconds2str[TimeUsed[] - start], "]"]; (* TODO *)
Join @@ ans
]
(****************************************************************************)
showBestAction::usage = "showBestAction[myType, strategies] plots expected
payoff vs myAction based on psHash. Options allow
setting the confidence level to use in showing the
range for the true expected payoff for given actions.";
Options[showBestAction] = { confidence -> defaultConfidenceLevel,
compareFuncs -> {}};
showBestAction[myType_, strategies_List, opts___] :=
Module[{opt, x, p1, p2, p3, t, best, min, max, reallyMin, reallyMax},
opt[x_] := x /. {opts} /. Options[showBestAction];
p1 = plotWithErrorBars[opt[confidence], minAction, maxAction,
makeData[myType, strategies],
Identity];
(* TODO: color code the dots based on confidence... *)
{best, {min, max}} = bestActionRange[myType, strategies,
confidence->.95]; (* TODO hardcode? *)
{best, {reallyMin, reallyMax}} = bestActionRange[myType, strategies,
confidence->1.-1/10^6];
p2 = Graphics[{AbsolutePointSize[6],
RGBColor[0,0,1],
Point[{min, maxLikelihoodValue[psHash[myType, min, strategies]]}],
Point[{max, maxLikelihoodValue[psHash[myType, max, strategies]]}],
Point[{reallyMin, maxLikelihoodValue[psHash[myType, reallyMin,
strategies]]}],
Point[{reallyMax, maxLikelihoodValue[psHash[myType, reallyMax,
strategies]]}],
RGBColor[0,1,0],
Point[{best, maxLikelihoodValue[psHash[myType, best, strategies]]}]}];
p3 = Plot[Evaluate[#[t]& /@ opt[compareFuncs]], {t, minAction[myType],
maxAction[myType]},
PlotStyle->RGBColor[0,0,1],
DisplayFunction->Identity];
Show[p1, p2, p3, PlotRange -> All, DisplayFunction -> $DisplayFunction]
(* TODO: pass on any of opts except the ones
in Options[showBestResponse] to Show *)
]
maxLikelihoodValue[{n_, s_, ss_}] := If[n==0, 0, s/n]
maxLikelihoodValue[{x_, {n_, s_, ss_}}] := If[n==0, 0, s/n]
(* TODO: This is currently a very conservative estimate of the probability.
call the maxLikelihood values a and b
and let c be the midpoint between a and b, ie, (a+b)/2
This gives the probability that the true mean for a is less than
c times the probability that the true mean for b is greater than c.
(so the true probability of a < b will be greater)
If we could OR the above for all values of c from -inf to +inf, we'd
have the true answer... TODO!
TODO: actually, let's just use the meanDifferenceTest adapted for
when we only know {n,s,ss}...
*)
probLessThan::usage = "probLessThan[stats1, stats2] gives the probability that
the true mean of the distribution from which the sample
statistics stats1 were taken is less than the true mean
of the distribution from which the sample statistics
stats2 were taken.";
probLessThan[{n1_, s1_, ss1_}, {n2_, s2_, ss2_}] := Module[{c},
c = (maxLikelihoodValue[{n1,s1,ss1}] + maxLikelihoodValue[{n2,s2,ss2}]) / 2;
Which[n1 == 0 || n2 == 0,
1/2, (* TODO: does this make sense? *)
probRange[n1,s1,ss1,c,c] == 1 && probRange[n2,s2,ss2,c,c] == 1,
0,
True,
probRange[n1, s1, ss1, -Infinity, c] * probRange[n2, s2, ss2, c, Infinity]
]
]
offset::usage = "offset[n, me, list] finds me's position, adds n to that, and
returns the element at that position in the list, or the first
(last) if that would take us out of the list. This will crash
if me is not in list.";
offset[n_, me_, list_List] := Module[{pos},
pos = Position[list, me][[1,1]];
list[[Max[1, Min[Length[list], pos + n]]]]
]
maxRange::usage = "maxRange[conf, points] takes a set of x values with their
goodnesses expressed as {n, s, ss} and return a range of x
values that has probability conf of containing the best x --
actual return value is {maximumLikelihoodMax,
{lowerBoundOnMax, upperBoundOnMax}}.";
maxRange[conf_, points:{{_, {_, _, _}}..}] :=
Module[{sorted, likelyBest, candidates},
sorted = Sort[points, #1[[1]] < #2[[1]]&];
(* Print["sorted is ", First/@sorted]; *)
likelyBest = argMax[maxLikelihoodValue, sorted];
(* Print["likely best is ", likelyBest]; *)
candidates = (* points we're not so sure are worse than the likelyBest *)
Select[sorted,
(probLessThan[#[[2]], likelyBest[[2]]] <= conf
|| #[[2,1]] < 4)&]; (* almost no trials for this point *)
likelyBest = First[likelyBest];
candidates = Append[First /@ candidates, likelyBest];
sorted = First /@ sorted;
(* Print["candidates are ", candidates]; *)
{likelyBest,
{offset[-1, Min[candidates], sorted],
offset[+1, Max[candidates], sorted]}}
]
bestActionRange::usage = "bestActionRange[myType, strategies] is similar to
showBestAction but it automatically looks at the
graph and returns a range of actions that is likely
to contain the best action. (return value is of the
same form as maxRange)";
Options[bestActionRange] = { confidence -> defaultConfidenceLevel};
bestActionRange[myType_, strategies_List, opts___Rule] :=
Module[{opt, x, b, min, max},
opt[x_] := x /. {opts} /. Options[bestActionRange];
{b, {min, max}} = maxRange[opt[confidence], makeData[myType, strategies]];
(* TODO: baHash[myType, strategies] = {min, max} -- no, need higher conf *)
{b, {min, max}}
]
showBestResponse::usage = "showBestResponse[strategies] plots a best response
strategy (a strategy is a mapping from type to
action) to the given opponent strategies.";
Options[showBestResponse] = {confidence -> defaultConfidenceLevel,
compareFuncs -> {}};
showBestResponse[strategies_List, opts___Rule] :=
Module[{opt, x, p1, p2, p3, t, a, b},
opt[x_] := x /. {opts} /. Options[showBestResponse];
p1 = Plot[Evaluate[#[t]& /@ strategies], {t, minType, maxType},
Frame -> True, Axes -> False, DisplayFunction -> Identity];
p3 = Plot[#[t]& /@ opt[compareFuncs], {t, minType, maxType},
Frame -> True, Axes -> False, PlotStyle->RGBColor[0,0,1],
DisplayFunction->Identity];
p2 = plotWithErrorBars[
{#, bestActionRange[#, strategies, confidence -> opt[confidence]]}& /@
allTypes[strategies], Identity];
Show[p1, p2, p3, PlotRange -> All, DisplayFunction -> $DisplayFunction]
]
End[]; (* Private context *)
EndPackage[];