(*
   Daniel Reeves  2001.03.15
   Empirically determine best-response strategies to given strategies for 
   games where the action (eg, a bid) is a single number and the payoff 
   depends on a private piece of information, also a number (eg, your 
   valuation), known as the agent's type.

   TODO: empirically find a nash equilibrium by starting with a seed 
         strategy and proceeding to best-response fixed-point.
*)

WriteString["stderr", "[Loading Equilibria.m]\n"];

BeginPackage["Equilibria`", {"MonteCarlo`", "Util`"}];
(* All non-private symbols must be listed below... *)

(* The following should be redefined by the program using this library... *)
payoff;
minType;
maxType;
minAction;
maxAction;
randType;
defaultConfidenceLevel;

(* The following are the functions available in this library, with their
   options listed after... *)
psHash;   (* makeData *)
psHashDomain;  (* TODO: see other to-do's about this *)
allTypes;
ps;
makeTypeActionPairs;

(*
probLessThan;
maxRange;
*)

showBestAction;     {confidence, compareFuncs};
bestActionRange;    {confidence};
showBestResponse;   {confidence, compareFuncs};

(* TODO: Protect["*"];  Or do Protect on all the symbols above... *)


Begin["`Private`"];  (****************************************************)

payoff::usage = "payoff[myType, myAction, otherActions] gives the payoff to an 
                agent of type myType doing action myAction when its opponents 
                are doing otherActions.  This should be redefined by the 
                program using this library.";
payoff[myType_, myAction_, otherActions_List] := 0

minType::usage = "minType gives the minimum value for an agent's type.  This 
                 can be redefined by the program using this library.";
minType = 0;

maxType::usage = "maxType give the maximum value for an agent's type.  This 
                 can be redefined by the program using this library.";
maxType = 1;

minAction::usage = "minAction[myType] is a function mapping an agent's type 
                   to the minimum value for an action that it can/would do.  
                   This can be redefined by the program using this library.";
minAction[myType_:0] := 0
(* TODO: both this definition and the overriding one have to be defined in 
exactly the same way (eg, both need to have the default arg like that) 
otherwise the new one won't override this one. *)

maxAction::usage = "maxAction[myType] is a function mapping an agent's type 
                   to the maximum value for an action that it can/would do.  
                   This can be redefined by the program using this library.";
maxAction[myType_:0] := 1

randType::usage = "randType[] is a function which generates an agent type 
                  according to the common knowledge distribution from which 
                  agent types are drawn.  This can be redefined by the program 
                  using this library.";
randType[] := Random[]

defaultConfidenceLevel::usage = "defaultConfidenceLevel is used for functions 
                                that compute mean confidence intervals, either 
                                to show error bars (showBestAction, 
                                showBestResponse) or to compute a range 
                                directly (bestActionRange).  This can be 
                                redefined by the program using this library.";
defaultConfidenceLevel = .95;

(****************************************************************************)

(* TODO: psHash[strategies][type, action]  or psHash[s][t][a] *)
psHash::usage = "psHash maps type, action, and strategy list to statistics 
                {n, s, ss} for the expected payoff for an agent of that type 
                using that strategy against those strategies.";
psHash[type_, action_, strategies_] = {0, 0, 0}

baHash::usage = "(ba = best action). caches action range we need to try";
baHash[type_, strategies_] = {minAction[type], maxAction[type]}

makeData::usage = "makeData[myType, strategies] returns a list of 
                  {action, {n,s,ss}} pairs for the given type, based on 
                  psHash.";
makeData[myType_, strategies_List] := {#[[2]], psHash@@#}& /@ 
  Union[{{myType, minAction[myType], strategies}, 
         {myType, maxAction[myType], strategies}}, 
        Select[psHashDomain, (#[[1]]==myType && #[[3]]==strategies)&]];
  (* TODO: using the global psHashDomain updated by ps[] instead of 
           making the expensive domain[psHash] call each time; below too *)

allTypes::usage = "allTypes[strategies] returns a list of the types cached 
                  in psHash.";
allTypes[strategies_] := Union[First /@ Select[psHashDomain, 
                                               #[[3]]===strategies&]]

(****************************************************************************)

(* TODO: we might be able to simulate more points in less time by flooring
         the time constraint to an integer so we can use TimeConstrained. *)
(* TODO: allow an arbitrary distribution for agent types *)
ps::usage = "ps[myType, myAction, strategies] (ps = payoff stats) does a 
            montecarlo simulation to estimate expected payoff for an agent of 
            a certain type doing a certain action, when up against given 
            opponent strategies (functions mapping a type to an action).  
            It computes and caches (in psHash) the list {n,s,ss} for each 
            type-action simulated -- {n,s,ss} is the number of trials 
            simulated for the type-action pair, the sum of the payoffs, and 
            the sum of the squares of the payoffs.  To do the simulation, 
            this function uses the random type generator randType which 
            returns a random agent type according to the common knowledge 
            distribution.  It also assumes the existence of the payoff 
            function which maps my type, my action, and opponent actions to 
            my payoff.  Additionally, myType and myAction can be lists of 
            types/actions, in which case the function returns a list of 
            pairs -- {type,{n,s,ss}} if given a list of types, or 
            {action,{n,s,ss}} if given a list of actions, or 
            {{type,action},{n,s,ss}} if given both a list of types and a 
            list of actions (same thing if given a list of type-action 
            pairs).";
ps[myType_/;Head[myType]=!=List, 
   myAction_/;Head[myAction]=!=List, strategies_List] :=
  Module[{x, n = 0, s = 0, ss = 0},
    (* TODO: simulate till the point of diminishing returns *)
    While[n < 5000, (* TODO: don't hardcode *)
      x = payoff[myType, myAction, #[randType[]]& /@ strategies];
      n++; s += x; ss += x^2;];
    AbortProtect[psHash[myType, myAction, strategies] += {n, s, ss};
      (* TODO: maybe also update a global list for domain[psHash]
               since it's so expensive to call domain[psHash] a lot *)
      (* TODO: did it but this might be too expensive itself... *)
      If[psHash[myType, myAction, strategies] == {n, s, ss},
        psHashDomain = Union[psHashDomain, {{myType, myAction, strategies}}];
      ]
    ]
  ]

ps[myTypeList_List, myAction_/;Head[myAction]=!=List, strategies_List] :=
  Scan[ps[#, myAction, strategies]&, myTypeList]

ps[myType_/;Head[myType]=!=List, myActionList_List, strategies_List] :=
  Scan[ps[myType, #, strategies]&, myActionList]

(* note this does the type-action pairs with the least simulations first *)
(* TODO: could bias the time spent on each type-action pair so that they
         even themselves out. *)
(* TODO: for now, I just leave off the one that has the most simulations *)
ps[typeActionPairs:{{_,_}..}, strategies_List] := 
  Scan[ps[Sequence @@ #, strategies]&, 
       Drop[Sort[typeActionPairs, 
                 typeActionPairNeedierFunc[strategies]], -1]]

ps[myTypeList_List, myActionList_List, strategies_List] :=
  ps[Distribute[{myTypeList, myActionList}, List], strategies]


(* TODO: this should look at how big the 95% confidence intervals are and
         choose the type-action pair that has the biggest interval.
         But {0,0,0} should always be considered neediest. *)
typeActionPairNeedierFunc::usage = "typeActionPairNeedierFunc[strategies][ta1, 
                                   ta2] returns whether tp1 should be simulated
                                   before tp2, ie, tp1 has fewer simulations 
                                   under its belt so far.";
typeActionPairNeedierFunc[strategies_List] := Function[{tp1, tp2},
  psHash[Sequence@@tp1, strategies][[1]] < 
  psHash[Sequence@@tp2, strategies][[1]]
]

tryActions::usage = "tryActions[type, strategies, actionDelta] returns a list 
                    of actions to try (returns {type,action} pairs where all 
                    the types are the same) for the current type.  It decides 
                    on an action range that we're almost certain contains the 
                    best action.";
tryActions[type_, strategies_List, actionDelta_] := Module[{b, min, max, l},
  {b, {min, max}} = bestActionRange[type, strategies, confidence->1.-1/10^6];
 
(*
  l = Range[min, max, actionDelta];  (* TODO: don't make the whole range *)
  If[Last[l]=!=max, AppendTo[l, max]];
  If[Length[l]>=5, l = Drop[l, {3, -3}]];
  l = Union[l, {b}];

  {type, #}& /@ l
*)

  {type, #}& /@ Union[Range[min, max, actionDelta], {max}]
]

makeTypeActionPairs::usage = "makeTypeActionPairs[typeDelta, actionDelta] 
                             constructs a list of type-action pairs.  Options 
                             for minimum and maximum type can be specified, 
                             but only matter if they are within the ranges set 
                             by minType and maxType.";
makeTypeActionPairs[typeDelta_, actionDelta_, strategies_List, 
                    tMin_:-Infinity, tMax_:Infinity] := 
  Module[{start, t1, t2, ans},
    start = TimeUsed[];
    t1 = Max[minType, Min[maxType, tMin]];
    t2 = Max[minType, Min[maxType, tMax]]; 

    ans = tryActions[#, strategies, actionDelta]& /@ Range[t1, t2, typeDelta];

    Print["[makeTypeActionPairs: ", Plus@@(Length/@ans), " -> ", Length /@ ans,
(*
          Quotient[t2-t1+typeDelta, typeDelta], " types, ", 
          Length[ans], " pairs, ", 
          N[Length[ans]/Quotient[t2-t1+typeDelta, typeDelta]],
*)
          " pairs per type", 
          " in ", seconds2str[TimeUsed[] - start], "]"];   (* TODO *)

    Join @@ ans
  ]

(****************************************************************************)

showBestAction::usage = "showBestAction[myType, strategies] plots expected 
                        payoff vs myAction based on psHash.  Options allow 
                        setting the confidence level to use in showing the 
                        range for the true expected payoff for given actions.";
Options[showBestAction] = { confidence -> defaultConfidenceLevel, 
                            compareFuncs -> {}};
showBestAction[myType_, strategies_List, opts___] := 
Module[{opt, x, p1, p2, p3, t, best, min, max, reallyMin, reallyMax},
  opt[x_] := x /. {opts} /. Options[showBestAction];
  p1 = plotWithErrorBars[opt[confidence], minAction, maxAction,
                         makeData[myType, strategies], 
                         Identity];
  (* TODO: color code the dots based on confidence... *)
  {best, {min, max}} = bestActionRange[myType, strategies, 
                                       confidence->.95];  (* TODO hardcode? *)
  {best, {reallyMin, reallyMax}} = bestActionRange[myType, strategies,
                                                   confidence->1.-1/10^6];
                                       
  p2 = Graphics[{AbsolutePointSize[6],
         RGBColor[0,0,1],
         Point[{min, maxLikelihoodValue[psHash[myType, min, strategies]]}],
         Point[{max, maxLikelihoodValue[psHash[myType, max, strategies]]}],
         Point[{reallyMin, maxLikelihoodValue[psHash[myType, reallyMin, 
                                                     strategies]]}],
         Point[{reallyMax, maxLikelihoodValue[psHash[myType, reallyMax,
                                                     strategies]]}],
         RGBColor[0,1,0],
         Point[{best, maxLikelihoodValue[psHash[myType, best, strategies]]}]}];
  p3 = Plot[Evaluate[#[t]& /@ opt[compareFuncs]], {t, minAction[myType], 
                                                      maxAction[myType]}, 
                                                  PlotStyle->RGBColor[0,0,1],
                                                  DisplayFunction->Identity];
  Show[p1, p2, p3, PlotRange -> All, DisplayFunction -> $DisplayFunction]
     (* TODO: pass on any of opts except the ones 
              in Options[showBestResponse] to Show *)
]


maxLikelihoodValue[{n_, s_, ss_}] := If[n==0, 0, s/n]
maxLikelihoodValue[{x_, {n_, s_, ss_}}] := If[n==0, 0, s/n]


(* TODO: This is currently a very conservative estimate of the probability.
         call the maxLikelihood values a and b
         and let c be the midpoint between a and b, ie, (a+b)/2 
         This gives the probability that the true mean for a is less than 
           c times the probability that the true mean for b is greater than c.
         (so the true probability of a < b will be greater)
         If we could OR the above for all values of c from -inf to +inf, we'd 
           have the true answer... TODO!
         TODO: actually, let's just use the meanDifferenceTest adapted for
               when we only know {n,s,ss}...
*)
probLessThan::usage = "probLessThan[stats1, stats2] gives the probability that 
                      the true mean of the distribution from which the sample 
                      statistics stats1 were taken is less than the true mean 
                      of the distribution from which the sample statistics 
                      stats2 were taken.";
probLessThan[{n1_, s1_, ss1_}, {n2_, s2_, ss2_}] := Module[{c},
  c = (maxLikelihoodValue[{n1,s1,ss1}] + maxLikelihoodValue[{n2,s2,ss2}]) / 2;
  Which[n1 == 0 || n2 == 0,
    1/2,  (* TODO: does this make sense? *)
  probRange[n1,s1,ss1,c,c] == 1 && probRange[n2,s2,ss2,c,c] == 1,
    0,
  True,  
    probRange[n1, s1, ss1, -Infinity, c] * probRange[n2, s2, ss2, c, Infinity]
  ]
]

offset::usage = "offset[n, me, list] finds me's position, adds n to that, and 
                returns the element at that position in the list, or the first 
                (last) if that would take us out of the list.  This will crash 
                if me is not in list.";
offset[n_, me_, list_List] := Module[{pos},
  pos = Position[list, me][[1,1]];
  list[[Max[1, Min[Length[list], pos + n]]]]
]

maxRange::usage = "maxRange[conf, points] takes a set of x values with their 
                  goodnesses expressed as {n, s, ss} and return a range of x 
                  values that has probability conf of containing the best x --
                  actual return value is {maximumLikelihoodMax, 
                  {lowerBoundOnMax, upperBoundOnMax}}.";
maxRange[conf_, points:{{_, {_, _, _}}..}] := 
  Module[{sorted, likelyBest, candidates},
    sorted = Sort[points, #1[[1]] < #2[[1]]&];
      (* Print["sorted is ", First/@sorted]; *)
    likelyBest = argMax[maxLikelihoodValue, sorted];
      (* Print["likely best is ", likelyBest]; *)
    candidates = (* points we're not so sure are worse than the likelyBest *)
      Select[sorted, 
        (probLessThan[#[[2]], likelyBest[[2]]] <= conf 
         || #[[2,1]] < 4)&];   (* almost no trials for this point *)
    likelyBest = First[likelyBest];
    candidates = Append[First /@ candidates, likelyBest];
    sorted = First /@ sorted;
      (* Print["candidates are ", candidates]; *)
    {likelyBest, 
      {offset[-1, Min[candidates], sorted], 
       offset[+1, Max[candidates], sorted]}}
  ]


bestActionRange::usage = "bestActionRange[myType, strategies] is similar to 
                         showBestAction but it automatically looks at the 
                         graph and returns a range of actions that is likely 
                         to contain the best action. (return value is of the 
                         same form as maxRange)";
Options[bestActionRange] = { confidence -> defaultConfidenceLevel};
bestActionRange[myType_, strategies_List, opts___Rule] := 
Module[{opt, x, b, min, max},
  opt[x_] := x /. {opts} /. Options[bestActionRange];
  {b, {min, max}} = maxRange[opt[confidence], makeData[myType, strategies]];
  (* TODO: baHash[myType, strategies] = {min, max} -- no, need higher conf *)
  {b, {min, max}}
]

showBestResponse::usage = "showBestResponse[strategies] plots a best response 
                          strategy (a strategy is a mapping from type to 
                          action) to the given opponent strategies.";
Options[showBestResponse] = {confidence -> defaultConfidenceLevel,
                             compareFuncs -> {}};
showBestResponse[strategies_List, opts___Rule] := 
Module[{opt, x, p1, p2, p3, t, a, b},
  opt[x_] := x /. {opts} /. Options[showBestResponse];

  p1 = Plot[Evaluate[#[t]& /@ strategies], {t, minType, maxType}, 
            Frame -> True, Axes -> False, DisplayFunction -> Identity];

  p3 = Plot[#[t]& /@ opt[compareFuncs], {t, minType, maxType}, 
         Frame -> True, Axes -> False, PlotStyle->RGBColor[0,0,1],
         DisplayFunction->Identity];

  p2 = plotWithErrorBars[
         {#, bestActionRange[#, strategies, confidence -> opt[confidence]]}& /@
            allTypes[strategies], Identity];
   
  Show[p1, p2, p3, PlotRange -> All, DisplayFunction -> $DisplayFunction]
]

End[];   (* Private context *)
EndPackage[];