aimacode · pctablet505 · Jan 20, 2021 · Jan 20, 2021 · Jan 20, 2021 · Jan 23, 2021
diff --git a/agents4e.py b/agents4e.py
@@ -34,16 +34,17 @@
 # What if the grabber moves?
 # Speed control in GUI does not have any effect -- fix it.
 
-from utils4e import distance_squared, turn_heading
+import collections
+import copy
+import numbers
+import random
 from statistics import mean
-from ipythonblocks import BlockGrid
-from IPython.display import HTML, display, clear_output
 from time import sleep
 
-import random
-import copy
-import collections
-import numbers
+from IPython.display import HTML, display, clear_output
+from ipythonblocks import BlockGrid
+
+from utils4e import distance_squared, turn_heading
 
 
 # ______________________________________________________________________________
@@ -645,23 +646,6 @@ def get_world(self):
             result.append(row)
         return result
 
-    """
-    def run(self, steps=1000, delay=1):
-        "" "Run the Environment for given number of time steps,
-        but update the GUI too." ""
-        for step in range(steps):
-            sleep(delay)
-            if self.visible:
-                self.reveal()
-            if self.is_done():
-                if self.visible:
-                    self.reveal()
-                return
-            self.step()
-        if self.visible:
-            self.reveal()
-    """
-
     def run(self, steps=1000, delay=1):
         """Run the Environment for given number of time steps,
         but update the GUI too."""
@@ -679,7 +663,9 @@ def update(self, delay=1):
     def reveal(self):
         """Display the BlockGrid for this world - the last thing to be added
         at a location defines the location color."""
+
         self.draw_world()
+
         # wait for the world to update and
         # apply changes to the same grid instead
         # of making a new one.
@@ -875,10 +861,10 @@ def __init__(self, agent_program, width=6, height=6):
     def init_world(self, program):
         """Spawn items in the world based on probabilities from the book"""
 
-        "WALLS"
+        # WALLS
         self.add_walls()
 
-        "PITS"
+        # PITS
         for x in range(self.x_start, self.x_end):
             for y in range(self.y_start, self.y_end):
                 if random.random() < self.pit_probability:
@@ -888,18 +874,18 @@ def init_world(self, program):
                     self.add_thing(Breeze(), (x + 1, y), True)
                     self.add_thing(Breeze(), (x, y + 1), True)
 
-        "WUMPUS"
+        # WUMPUS
         w_x, w_y = self.random_location_inbounds(exclude=(1, 1))
         self.add_thing(Wumpus(lambda x: ""), (w_x, w_y), True)
         self.add_thing(Stench(), (w_x - 1, w_y), True)
         self.add_thing(Stench(), (w_x + 1, w_y), True)
         self.add_thing(Stench(), (w_x, w_y - 1), True)
         self.add_thing(Stench(), (w_x, w_y + 1), True)
 
-        "GOLD"
+        # GOLD
         self.add_thing(Gold(), self.random_location_inbounds(exclude=(1, 1)), True)
 
-        "AGENT"
+        # AGENT
         self.add_thing(Explorer(program), (1, 1), True)
 
     def get_world(self, show_walls=True):

diff --git a/games4e.py b/games4e.py
@@ -27,16 +27,16 @@ def max_value(state):
         if game.terminal_test(state):
             return game.utility(state, player)
         v = -np.inf
-        for a in game.actions(state):
-            v = max(v, min_value(game.result(state, a)))
+        for action in game.actions(state):
+            v = max(v, min_value(game.result(state, action)))
         return v
 
     def min_value(state):
         if game.terminal_test(state):
             return game.utility(state, player)
         v = np.inf
-        for a in game.actions(state):
-            v = min(v, max_value(game.result(state, a)))
+        for action in game.actions(state):
+            v = min(v, max_value(game.result(state, action)))
         return v
 
     # Body of minmax_decision:
@@ -56,14 +56,14 @@ def expect_minmax(state, game):
 
     def max_value(state):
         v = -np.inf
-        for a in game.actions(state):
-            v = max(v, chance_node(state, a))
+        for action in game.actions(state):
+            v = max(v, chance_node(state, action))
         return v
 
     def min_value(state):
         v = np.inf
-        for a in game.actions(state):
-            v = min(v, chance_node(state, a))
+        for action in game.actions(state):
+            v = min(v, chance_node(state, action))
         return v
 
     def chance_node(state, action):
@@ -83,7 +83,7 @@ def chance_node(state, action):
         return sum_chances / num_chances
 
     # Body of expect_min_max:
-    return max(game.actions(state), key=lambda a: chance_node(state, a), default=None)
+    return max(game.actions(state), key=lambda action: chance_node(state, action), default=None)
 
 
 def alpha_beta_search(state, game):
@@ -97,8 +97,8 @@ def max_value(state, alpha, beta):
         if game.terminal_test(state):
             return game.utility(state, player)
         v = -np.inf
-        for a in game.actions(state):
-            v = max(v, min_value(game.result(state, a), alpha, beta))
+        for action in game.actions(state):
+            v = max(v, min_value(game.result(state, action), alpha, beta))
             if v >= beta:
                 return v
             alpha = max(alpha, v)
@@ -108,8 +108,8 @@ def min_value(state, alpha, beta):
         if game.terminal_test(state):
             return game.utility(state, player)
         v = np.inf
-        for a in game.actions(state):
-            v = min(v, max_value(game.result(state, a), alpha, beta))
+        for action in game.actions(state):
+            v = min(v, max_value(game.result(state, action), alpha, beta))
             if v <= alpha:
                 return v
             beta = min(beta, v)
@@ -119,11 +119,11 @@ def min_value(state, alpha, beta):
     best_score = -np.inf
     beta = np.inf
     best_action = None
-    for a in game.actions(state):
-        v = min_value(game.result(state, a), best_score, beta)
+    for action in game.actions(state):
+        v = min_value(game.result(state, action), best_score, beta)
         if v > best_score:
             best_score = v
-            best_action = a
+            best_action = action
     return best_action
 
 
@@ -138,8 +138,8 @@ def max_value(state, alpha, beta, depth):
         if cutoff_test(state, depth):
             return eval_fn(state)
         v = -np.inf
-        for a in game.actions(state):
-            v = max(v, min_value(game.result(state, a), alpha, beta, depth + 1))
+        for action in game.actions(state):
+            v = max(v, min_value(game.result(state, action), alpha, beta, depth + 1))
             if v >= beta:
                 return v
             alpha = max(alpha, v)
@@ -149,8 +149,8 @@ def min_value(state, alpha, beta, depth):
         if cutoff_test(state, depth):
             return eval_fn(state)
         v = np.inf
-        for a in game.actions(state):
-            v = min(v, max_value(game.result(state, a), alpha, beta, depth + 1))
+        for action in game.actions(state):
+            v = min(v, max_value(game.result(state, action), alpha, beta, depth + 1))
             if v <= alpha:
                 return v
             beta = min(beta, v)
@@ -163,11 +163,11 @@ def min_value(state, alpha, beta, depth):
     best_score = -np.inf
     beta = np.inf
     best_action = None
-    for a in game.actions(state):
-        v = min_value(game.result(state, a), best_score, beta, 1)
+    for action in game.actions(state):
+        v = min_value(game.result(state, action), best_score, beta, 1)
         if v > best_score:
             best_score = v
-            best_action = a
+            best_action = action
     return best_action
 
 
@@ -176,19 +176,18 @@ def min_value(state, alpha, beta, depth):
 
 
 def monte_carlo_tree_search(state, game, N=1000):
-    def select(n):
+    def select(node):
         """select a leaf node in the tree"""
-        if n.children:
-            return select(max(n.children.keys(), key=ucb))
-        else:
-            return n
+        if node.children:
+            return select(max(node.children.keys(), key=ucb))
+        return node
 
-    def expand(n):
+    def expand(node):
         """expand the leaf node by adding all its children states"""
-        if not n.children and not game.terminal_test(n.state):
-            n.children = {MCT_Node(state=game.result(n.state, action), parent=n): action
-                          for action in game.actions(n.state)}
-        return select(n)
+        if not node.children and not game.terminal_test(node.state):
+            node.children = {MCT_Node(state=game.result(node.state, action), parent=node): action
+                             for action in game.actions(node.state)}
+        return select(node)
 
     def simulate(game, state):
         """simulate the utility of current state by random picking a step"""
@@ -199,15 +198,15 @@ def simulate(game, state):
         v = game.utility(state, player)
         return -v
 
-    def backprop(n, utility):
+    def backprop(node, utility):
         """passing the utility back to all parent nodes"""
         if utility > 0:
-            n.U += utility
+            node.U += utility
         # if utility == 0:
-        #     n.U += 0.5
-        n.N += 1
-        if n.parent:
-            backprop(n.parent, -utility)
+        #     node.U += 0.5
+        node.N += 1
+        if node.parent:
+            backprop(node.parent, -utility)
 
     root = MCT_Node(state=state)
 
@@ -275,7 +274,7 @@ class Game:
     be done in the constructor."""
 
     def actions(self, state):
-        """Return a list of the allowable moves at this point."""
+        """Return a list of the legal moves at this point."""
         raise NotImplementedError
 
     def result(self, state, move):
@@ -348,18 +347,18 @@ def play_game(self, *players):
 class Fig52Game(Game):
     """The game represented in [Figure 5.2]. Serves as a simple test case."""
 
-    succs = dict(A=dict(a1='B', a2='C', a3='D'),
-                 B=dict(b1='B1', b2='B2', b3='B3'),
-                 C=dict(c1='C1', c2='C2', c3='C3'),
-                 D=dict(d1='D1', d2='D2', d3='D3'))
+    successors = dict(A=dict(a1='B', a2='C', a3='D'),
+                      B=dict(b1='B1', b2='B2', b3='B3'),
+                      C=dict(c1='C1', c2='C2', c3='C3'),
+                      D=dict(d1='D1', d2='D2', d3='D3'))
     utils = dict(B1=3, B2=12, B3=8, C1=2, C2=4, C3=6, D1=14, D2=5, D3=2)
     initial = 'A'
 
     def actions(self, state):
-        return list(self.succs.get(state, {}).keys())
+        return list(self.successors.get(state, {}).keys())
 
     def result(self, state, move):
-        return self.succs[state][move]
+        return self.successors[state][move]
 
     def utility(self, state, player):
         if player == 'MAX':
@@ -377,14 +376,14 @@ def to_move(self, state):
 class Fig52Extended(Game):
     """Similar to Fig52Game but bigger. Useful for visualisation"""
 
-    succs = {i: dict(l=i * 3 + 1, m=i * 3 + 2, r=i * 3 + 3) for i in range(13)}
+    successors = {i: dict(l=i * 3 + 1, m=i * 3 + 2, r=i * 3 + 3) for i in range(13)}
     utils = dict()
 
     def actions(self, state):
-        return sorted(list(self.succs.get(state, {}).keys()))
+        return sorted(list(self.successors.get(state, {}).keys()))
 
     def result(self, state, move):
-        return self.succs[state][move]
+        return self.successors[state][move]
 
     def utility(self, state, player):
         if player == 'MAX':