-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathNode.py
More file actions
74 lines (65 loc) · 2.48 KB
/
Copy pathNode.py
File metadata and controls
74 lines (65 loc) · 2.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from math import *
import numpy as np
#TODO
#Add Probabilities
# 1. total Visit Count (Done) N(s,a)
# 2. Total Action Value W(s,a)
# 3. Mean Action Value Q(s,a)
# 4. Prob from Model P(s,a)
#
# s = board representation
# a = move
class UCTNode:
""" A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
Crashes if state not specified.
"""
def __init__(self, move = None, parent = None, state = None, prev_score= None):
self.move = move # the move that got us to this node - "None" for the root node
self.parentNode = parent # "None" for the root node
self.childNodes = []
self.wins = 0
self.visits = 0
self.untriedMoves = state.get_plays() # future child nodes
self.player1_turn = state.player1_turn # the only part of the state that the Node needs later
self.puct_ratio = 2.8
self.prev_score = prev_score
def UCTSelectChild(self):
""" Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
exploration versus exploitation.
"""
#for c in self.childNodes:
# print("Move: {}, WR: {}, VT: {}, PS: {}, DP:{:.5f}, PB: {:.5f}".format(c.move["index"], c.wins/c.visits, c.visits, c.prev_score, sqrt(2*log(self.visits))/(1+c.visits), c.wins/c.visits + self.puct_ratio * c.prev_score * sqrt(2*log(self.visits))/(1+c.visits)))
#print()
s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + self.puct_ratio * c.prev_score * sqrt(self.visits)/(1+c.visits))[-1]
return s
def AddChild(self, m, s, p):
""" Remove m from untriedMoves and add a new child node for this move.
Return the added child node
"""
n = UCTNode(move = m, parent = self, state = s, prev_score = p)
self.untriedMoves.remove(m)
self.childNodes.append(n)
return n
def Update(self, result):
""" Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
"""
self.visits += 1
self.wins += result
def __repr__(self):
return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"
def TreeToString(self, indent):
s = self.IndentString(indent) + str(self)
for c in self.childNodes:
s += c.TreeToString(indent+1)
return s
def IndentString(self,indent):
s = "\n"
for i in range (1,indent+1):
s += "| "
return s
def ChildrenToString(self):
s = ""
for c in self.childNodes:
s += str(c) + "\n"
return s