Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 15 additions & 29 deletions agents4e.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,17 @@
# What if the grabber moves?
# Speed control in GUI does not have any effect -- fix it.

from utils4e import distance_squared, turn_heading
import collections
import copy
import numbers
import random
from statistics import mean
from ipythonblocks import BlockGrid
from IPython.display import HTML, display, clear_output
from time import sleep

import random
import copy
import collections
import numbers
from IPython.display import HTML, display, clear_output
from ipythonblocks import BlockGrid

from utils4e import distance_squared, turn_heading


# ______________________________________________________________________________
Expand Down Expand Up @@ -645,23 +646,6 @@ def get_world(self):
result.append(row)
return result

"""
def run(self, steps=1000, delay=1):
"" "Run the Environment for given number of time steps,
but update the GUI too." ""
for step in range(steps):
sleep(delay)
if self.visible:
self.reveal()
if self.is_done():
if self.visible:
self.reveal()
return
self.step()
if self.visible:
self.reveal()
"""

def run(self, steps=1000, delay=1):
"""Run the Environment for given number of time steps,
but update the GUI too."""
Expand All @@ -679,7 +663,9 @@ def update(self, delay=1):
def reveal(self):
"""Display the BlockGrid for this world - the last thing to be added
at a location defines the location color."""

self.draw_world()

# wait for the world to update and
# apply changes to the same grid instead
# of making a new one.
Expand Down Expand Up @@ -875,10 +861,10 @@ def __init__(self, agent_program, width=6, height=6):
def init_world(self, program):
"""Spawn items in the world based on probabilities from the book"""

"WALLS"
# WALLS
self.add_walls()

"PITS"
# PITS
for x in range(self.x_start, self.x_end):
for y in range(self.y_start, self.y_end):
if random.random() < self.pit_probability:
Expand All @@ -888,18 +874,18 @@ def init_world(self, program):
self.add_thing(Breeze(), (x + 1, y), True)
self.add_thing(Breeze(), (x, y + 1), True)

"WUMPUS"
# WUMPUS
w_x, w_y = self.random_location_inbounds(exclude=(1, 1))
self.add_thing(Wumpus(lambda x: ""), (w_x, w_y), True)
self.add_thing(Stench(), (w_x - 1, w_y), True)
self.add_thing(Stench(), (w_x + 1, w_y), True)
self.add_thing(Stench(), (w_x, w_y - 1), True)
self.add_thing(Stench(), (w_x, w_y + 1), True)

"GOLD"
# GOLD
self.add_thing(Gold(), self.random_location_inbounds(exclude=(1, 1)), True)

"AGENT"
# AGENT
self.add_thing(Explorer(program), (1, 1), True)

def get_world(self, show_walls=True):
Expand Down
97 changes: 48 additions & 49 deletions games4e.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,16 @@ def max_value(state):
if game.terminal_test(state):
return game.utility(state, player)
v = -np.inf
for a in game.actions(state):
v = max(v, min_value(game.result(state, a)))
for action in game.actions(state):
v = max(v, min_value(game.result(state, action)))
return v

def min_value(state):
if game.terminal_test(state):
return game.utility(state, player)
v = np.inf
for a in game.actions(state):
v = min(v, max_value(game.result(state, a)))
for action in game.actions(state):
v = min(v, max_value(game.result(state, action)))
return v

# Body of minmax_decision:
Expand All @@ -56,14 +56,14 @@ def expect_minmax(state, game):

def max_value(state):
v = -np.inf
for a in game.actions(state):
v = max(v, chance_node(state, a))
for action in game.actions(state):
v = max(v, chance_node(state, action))
return v

def min_value(state):
v = np.inf
for a in game.actions(state):
v = min(v, chance_node(state, a))
for action in game.actions(state):
v = min(v, chance_node(state, action))
return v

def chance_node(state, action):
Expand All @@ -83,7 +83,7 @@ def chance_node(state, action):
return sum_chances / num_chances

# Body of expect_min_max:
return max(game.actions(state), key=lambda a: chance_node(state, a), default=None)
return max(game.actions(state), key=lambda action: chance_node(state, action), default=None)


def alpha_beta_search(state, game):
Expand All @@ -97,8 +97,8 @@ def max_value(state, alpha, beta):
if game.terminal_test(state):
return game.utility(state, player)
v = -np.inf
for a in game.actions(state):
v = max(v, min_value(game.result(state, a), alpha, beta))
for action in game.actions(state):
v = max(v, min_value(game.result(state, action), alpha, beta))
if v >= beta:
return v
alpha = max(alpha, v)
Expand All @@ -108,8 +108,8 @@ def min_value(state, alpha, beta):
if game.terminal_test(state):
return game.utility(state, player)
v = np.inf
for a in game.actions(state):
v = min(v, max_value(game.result(state, a), alpha, beta))
for action in game.actions(state):
v = min(v, max_value(game.result(state, action), alpha, beta))
if v <= alpha:
return v
beta = min(beta, v)
Expand All @@ -119,11 +119,11 @@ def min_value(state, alpha, beta):
best_score = -np.inf
beta = np.inf
best_action = None
for a in game.actions(state):
v = min_value(game.result(state, a), best_score, beta)
for action in game.actions(state):
v = min_value(game.result(state, action), best_score, beta)
if v > best_score:
best_score = v
best_action = a
best_action = action
return best_action


Expand All @@ -138,8 +138,8 @@ def max_value(state, alpha, beta, depth):
if cutoff_test(state, depth):
return eval_fn(state)
v = -np.inf
for a in game.actions(state):
v = max(v, min_value(game.result(state, a), alpha, beta, depth + 1))
for action in game.actions(state):
v = max(v, min_value(game.result(state, action), alpha, beta, depth + 1))
if v >= beta:
return v
alpha = max(alpha, v)
Expand All @@ -149,8 +149,8 @@ def min_value(state, alpha, beta, depth):
if cutoff_test(state, depth):
return eval_fn(state)
v = np.inf
for a in game.actions(state):
v = min(v, max_value(game.result(state, a), alpha, beta, depth + 1))
for action in game.actions(state):
v = min(v, max_value(game.result(state, action), alpha, beta, depth + 1))
if v <= alpha:
return v
beta = min(beta, v)
Expand All @@ -163,11 +163,11 @@ def min_value(state, alpha, beta, depth):
best_score = -np.inf
beta = np.inf
best_action = None
for a in game.actions(state):
v = min_value(game.result(state, a), best_score, beta, 1)
for action in game.actions(state):
v = min_value(game.result(state, action), best_score, beta, 1)
if v > best_score:
best_score = v
best_action = a
best_action = action
return best_action


Expand All @@ -176,19 +176,18 @@ def min_value(state, alpha, beta, depth):


def monte_carlo_tree_search(state, game, N=1000):
def select(n):
def select(node):
"""select a leaf node in the tree"""
if n.children:
return select(max(n.children.keys(), key=ucb))
else:
return n
if node.children:
return select(max(node.children.keys(), key=ucb))
return node

def expand(n):
def expand(node):
"""expand the leaf node by adding all its children states"""
if not n.children and not game.terminal_test(n.state):
n.children = {MCT_Node(state=game.result(n.state, action), parent=n): action
for action in game.actions(n.state)}
return select(n)
if not node.children and not game.terminal_test(node.state):
node.children = {MCT_Node(state=game.result(node.state, action), parent=node): action
for action in game.actions(node.state)}
return select(node)

def simulate(game, state):
"""simulate the utility of current state by random picking a step"""
Expand All @@ -199,15 +198,15 @@ def simulate(game, state):
v = game.utility(state, player)
return -v

def backprop(n, utility):
def backprop(node, utility):
"""passing the utility back to all parent nodes"""
if utility > 0:
n.U += utility
node.U += utility
# if utility == 0:
# n.U += 0.5
n.N += 1
if n.parent:
backprop(n.parent, -utility)
# node.U += 0.5
node.N += 1
if node.parent:
backprop(node.parent, -utility)

root = MCT_Node(state=state)

Expand Down Expand Up @@ -275,7 +274,7 @@ class Game:
be done in the constructor."""

def actions(self, state):
"""Return a list of the allowable moves at this point."""
"""Return a list of the legal moves at this point."""
raise NotImplementedError

def result(self, state, move):
Expand Down Expand Up @@ -348,18 +347,18 @@ def play_game(self, *players):
class Fig52Game(Game):
"""The game represented in [Figure 5.2]. Serves as a simple test case."""

succs = dict(A=dict(a1='B', a2='C', a3='D'),
B=dict(b1='B1', b2='B2', b3='B3'),
C=dict(c1='C1', c2='C2', c3='C3'),
D=dict(d1='D1', d2='D2', d3='D3'))
successors = dict(A=dict(a1='B', a2='C', a3='D'),
B=dict(b1='B1', b2='B2', b3='B3'),
C=dict(c1='C1', c2='C2', c3='C3'),
D=dict(d1='D1', d2='D2', d3='D3'))
utils = dict(B1=3, B2=12, B3=8, C1=2, C2=4, C3=6, D1=14, D2=5, D3=2)
initial = 'A'

def actions(self, state):
return list(self.succs.get(state, {}).keys())
return list(self.successors.get(state, {}).keys())

def result(self, state, move):
return self.succs[state][move]
return self.successors[state][move]

def utility(self, state, player):
if player == 'MAX':
Expand All @@ -377,14 +376,14 @@ def to_move(self, state):
class Fig52Extended(Game):
"""Similar to Fig52Game but bigger. Useful for visualisation"""

succs = {i: dict(l=i * 3 + 1, m=i * 3 + 2, r=i * 3 + 3) for i in range(13)}
successors = {i: dict(l=i * 3 + 1, m=i * 3 + 2, r=i * 3 + 3) for i in range(13)}
utils = dict()

def actions(self, state):
return sorted(list(self.succs.get(state, {}).keys()))
return sorted(list(self.successors.get(state, {}).keys()))

def result(self, state, move):
return self.succs[state][move]
return self.successors[state][move]

def utility(self, state, player):
if player == 'MAX':
Expand Down
Loading