A snake game was created using Pygame and I tried to solve it using an AI. Initially I didn't increase the body length to check if the snake head moves towards the food. Grid size is 5*5. DDQN network was used. Most of time the head moves towards the wall or gets struck in a continuous loop.The maximum score attained was 4 even if I train it for 5000 episodes.
State: It is an array of size 16. The first 8 values has the distance between the head and wall at 8 directions(left , left top, top, right top, right , right bottom, bottom, left bottom). Next 8 values has the distance between head and food at 8 directions. All the values are in the range 0 to 1. 1 means the object is near and 0 means it is very far.
Action : There are 3 actions 0,1,2. 0- Head moves in same direction. 1- Head turns left. 2- Head turns right.
Reward: Reward of +50 if it collects the food and reward of -200 if it touches the wall.
I am not able to understand why my neural network learns in the wrong way. Please do help me solve this issue. I have attached the code here.
Code:
import pygame
pygame.font.init()
import time
import random
import numpy as np
from math import hypot
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import tensorflow as tf
import os
seeds = 1001
os.environ['PYTHONHASHSEED']=str(seeds)
np.random.seed(seeds)
random.seed(seeds)
tf.random.set_seed(seeds)
batch = 16
class Food:
def __init__(self,width,n):
self.width = width
self.n = n
self.size = width //n
self.blocks = self.blocks_total()
def blocks_total(self):# Total available positions
x = y = 0
s=[]
for i in range(self.n):
for j in range(self.n):
s.append([x,y])
y += self.size
x += self.size
y = 0
return s
def food_pos(self,s):# Random food position
food_blocks = self.blocks_total()
try:
for i in self.blocks:
for j in s:
if i[0] == j[0] and i[1] == j[1]:
food_blocks.remove([j[0],j[1]])
break
a = random.choice(food_blocks)
return a
except:
return 0.1,0.1
class Agent:
def __init__(self,width,n,state_size,action_size=3,gamma = 0.98):
self.width = width
self.n = n
self.size = width //n
self.state_size = state_size
self.action_size = action_size
self.gamma = gamma
self.epsilon = 1
self.epsilon_min = 0
self.epsilon_decay = 0.99
self.memory = deque(maxlen=5000)
self.model = self.build_model()
self.train_model = self.build_model()
def reshape(self,state):# Reshaping state for input in nueral network
return np.reshape(state,[1,state.shape[0]])
def build_model(self):# Nueral network
model = Sequential()
model.add(Dense(16,input_shape=(self.state_size,),activation='relu'))
model.add(Dense(12,activation='relu'))
model.add(Dense(12,activation='relu'))
model.add(Dense(3,activation='softmax'))
model.compile(loss='mse',optimizer=Adam(0.0001))
return model
def get_action(self,state):
if np.random.rand() <= self.epsilon:
return random.randint(0,2)
a = self.reshape(state)
p = self.model.predict(a)
return np.argmax(p[0])
def remember(self,state,action,reward,new_state,done): # Saving in memory
self.memory.append((state,action,reward,new_state,done))
def replay(self): # Training of nueral network
minibatch = random.sample(self.memory,batch)
for state,action,reward,new_state,done in minibatch:
target = reward
state = self.reshape(state)
new_state = self.reshape(new_state)
if not done:
target = reward +(self.gamma*(np.max(self.train_model.predict(new_state)[0])))
target_f = self.model.predict(state)
target_f[0][action] = target
self.model.fit(state,target_f,epochs =1, verbose = 0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
def save_model(self):
self.model.save_weights('nn1.h5')
def load_model(self):
self.train_model.load_weights('nn1.h5')
class Game:
def __init__(self,width=500,n=5):
self.width = width
self.n = n
self.size = width//n
self.display = pygame.display.set_mode((width,width))
pygame.display.set_caption('A')
self.food = Food(width,n)
self.agent = Agent(width, n, state_size=16)
self.game_over = False
self.dirc = 'r'# starting direction of snake
self.snake_list = []
self.snake_length = 1
self.wall_touch = False
def display_player(self,disp,s_list,dirc):# displaying the snake head and its eyes
l = len(s_list)
if l > self.snake_length:
del s_list[0]
l -=1
for idx,i in enumerate(s_list):
if idx == l-1:
pygame.draw.rect(disp,(255,255,255),(i[0],i[1],self.size-1,self.size-1))
else:
pygame.draw.rect(disp,(255,165,0),(i[0],i[1],self.size-1,self.size-1))
a = s_list.copy()
x,y = a.pop()
rad = 10
if dirc == 'u':
pygame.draw.circle(disp,(0,0,0),(x+50,y+30),rad)
elif dirc == 'd':
pygame.draw.circle(disp,(0,0,0),(x+50,y+70),rad)
elif dirc == 'r':
pygame.draw.circle(disp,(0,0,0),(x+70,y+49),rad)
elif dirc == 'l':
pygame.draw.circle(disp,(0,0,0),(x+30,y+49),rad)
def move(self,dirc,s_list):# Constantly moving the snake on that particular direction
x,y = s_list.pop()
if dirc == 'l':
x -= self.size
if x <0:
x = 0
self.wall_touch = True
elif dirc == 'r':
x += self.size
if x > self.width - self.size:
x = self.width - self.size
self.wall_touch = True
elif dirc == 'u':
y -= self.size
if y <0:
y = 0
self.wall_touch = True
elif dirc == 'd':
y += self.size
if y > self.width - self.size:
y = self.width - self.size
self.wall_touch = True
self.snake_list.append([x,y])
def check_food_collect(self,fx,fy,s_list):# Check if head position and food position are same
x,y =s_list.pop()
if x == fx and y == fy:
return True
return False
def display_msg(self,msg,font='freesansbold.ttf',size=15,color=(255,255,255),loc=(410,15)):
mymsg = pygame.font.Font(font,size).render(msg,True,color)
self.display.blit(mymsg,loc)
def get_direction(self,change):# Changing the direction of snake based on the current direction
if self.dirc == 'r':
if change == 'r':
self.dirc= 'd'
elif change == 'l':
self.dirc= 'u'
elif self.dirc == 'l':
if change == 'r':
self.dirc= 'u'
elif change == 'l':
self.dirc= 'd'
elif self.dirc == 'd' :
if change == 'r':
self.dirc= 'l'
elif change == 'l':
self.dirc= 'r'
elif self.dirc == 'u':
if change == 'r':
self.dirc= 'r'
elif change == 'l':
self.dirc= 'l'
def near_wall(self,n,s_list): # Distance of nearby wall
a,b = n
x,y = s_list.pop()
i =0
while True:
xx = x +(self.size * i * a)
yy = y +(self.size * i * b)
dis = abs(xx-x)/self.size,abs(yy-y)/self.size
i +=1
if xx <0 or yy <0 or xx > self.width - self.size or yy > self.width - self.size:
return 1/ hypot(dis[0],dis[1])
def get_wall_dis(self,s_list): # Wall distance at 8 direction
j = [[-1,0],[-1,-1],[0,-1],[1,-1],[1,0],[1,1],[0,1],[-1,1]]
s = []
for i in j:
s.append(self.near_wall(i, s_list.copy()))
s = np.asarray(s)
return s
def near_food(self,fx,fy,n,s_list):# Head looks at 8 direction for the food
a,b = n
x,y = s_list.pop()
i =0
while True:
xx = x +(self.size * i * a)
yy = y +(self.size * i * b)
dis = abs(x-fx)/self.size,abs(fy-y)/self.size
i +=1
if xx <0 or yy <0 or xx > self.width - self.size or yy > self.width - self.size:
return 0
else:
if xx == fx and yy == fy:
if dis[0] ==0 and dis[1] ==0:
return 0
return 1/hypot(dis[0],dis[1])
def get_state(self,fx,fy,s_list,w): # Array of size 16
j = [[-1,0],[-1,-1],[0,-1],[1,-1],[1,0],[1,1],[0,1],[-1,1]]
s = []
for i in j:
s.append(self.near_food(fx,fy,i, s_list.copy()))
s = np.asarray(s)
a = np.append(w,s)
return a
def reset(self): # Initialising the value when a new game is started
self.agent.load_model()
self.game_over = False
self.wall_touch = False
self.dirc = 'r'
self.snake_length = 1
self.snake_list = []
def startgame(self,e):
sx,sy = 0,0 # Starting position of snake
self.snake_list.append([sx,sy])
fx,fy = 200,200 # Initial position of food
step = 0
action = 0
score = 0
change = None
wall = self.get_wall_dis(self.snake_list.copy())
state = self.get_state(fx, fy, self.snake_list.copy(), wall)
reward = 0
t = 0
save = False
j = 0
while not self.game_over:
j +=1
if j >500: # If snake struck in continuous loop
print('Ended')
break
action = 0
action = self.agent.get_action(state)
if action ==1:
change = 'l'
step = 1
elif action ==2:
change = 'r'
step = 1
else:
action = 0
change = None
if step == 0:
step = 1
else:
t +=1
save = True
self.get_direction(change)
change = None
self.move(self.dirc,self.snake_list.copy())
wall = self.get_wall_dis(self.snake_list.copy())
new_state = self.get_state(fx, fy, self.snake_list.copy(), wall)
if self.wall_touch:
reward = -200
print('Walled')
self.agent.remember(state, action, reward, new_state, True)
break
food_collect = self.check_food_collect(fx, fy, self.snake_list.copy())
if food_collect:
reward += 50
self.agent.remember(state, action, reward, new_state, False)
save = False
score +=1
step = 0
t = 0
reward = 0
fx,fy = self.food.food_pos(self.snake_list.copy())
if fx == 0.1 and fy ==0.1:
print('COMPLETED')
pygame.quit()
if save:
save = False
self.agent.remember(state, action, reward, new_state, False)
self.display.fill((0,0,0))
self.display_msg('Score :'+str(score))
pygame.draw.rect(self.display,(0,255,0),(fx,fy,self.size-1,self.size-1))
self.display_player(self.display, self.snake_list,self.dirc)
pygame.display.update()
#time.sleep(1)
state = new_state
if e > 2500:
time.sleep(0.1)
print('E : {} , Epsilon :{:.2} , Score : {}'.format(e,np.float32(self.agent.epsilon),score))
if e%10 ==0:
self.agent.save_model()
if len(self.agent.memory) > batch:
self.agent.replay()
game = Game()
for e in range(10000):
game.startgame(e)
game.reset()
pygame.quit()
A few things you could try:
The way you have defined your state looks a little complicated. Won't top and bottom give the same information, one being negative of the other? Also if your snake head is at (1,1) and your fruit at (3,4), well then the fruit won't show up in the state at all. There will be very limited times when the snake agent can actually see the fruit. Maybe you can try defining the state another way?
In RL, things go south very frequently so it often makes sense to start with basic agents and basic games and move up the ladder. Try using the same agent for a simple openai gym environment like mountaincar, to check if the agent class works as intended.
I am attempting to make minesweeper in Python by using tkinter. When the program checks for bombs, it works just fine unless the tile clicked is at 0, 0 (top left), in which case the program always has tileNorth and tileWest True, causing the program to check a variable that doesn't exist. This causes an error and leaves the 0, 0 tile blank. The checking works in every other tile, including corners, just not the top left. This should not be happening.
TLDR:
My minesweeper program works just fine, but it always messes up at 0, 0 and creates an error. I don't understand what's wrong...
The Error:
Exception in Tkinter callback
Traceback (most recent call last):
File "/usr/lib/python3.7/tkinter/__init__.py", line 1705, in __call__
return self.func(*args)
File "<string>", line 11, in <lambda>
File "/home/pi/Documents/Python/Minesweeper/Minesweeper.py", line 133, in tileClicked
stringVar_{x}_{y}.set(tileValue)""")
File "<string>", line 56
if bomb_-1_-1 == True:
^
SyntaxError: invalid token
It mentions bomb_-1_-1 which doesn't exist and can't exist... This is why that one if statement needs to work.
My Code:
import random
import tkinter
# Functions
def tileClicked(x, y): # Function is ran when a tile is clicked. The tile is defined by the inputted 'x' and 'y' values.
exec(f"""
global tileNorth, tileEast, tileSouth, tileWest
if y > 0:
tileNorth = True
else:
tileNorth = False
if x < game.size[0] - 1:
tileEast = True
else:
tileEast = False
if y < game.size[1] - 1:
tileSouth = True
else:
tileSouth = False
if x > 0:
tileWest = True
else:
tileWest = False""")
print(f"""{tileNorth}
{tileEast}
{tileSouth}
{tileWest}
DIV""")
exec(f"""
print("{x}, {y}")
if bomb_{x}_{y} == True:
stringVar_{x}_{y}.set("Bomb")
game.failed = True
if x == 0 and y == 0:
tileValue = int(0)
if tileNorth == True:
if tileEast == True:
if bomb_{x + 1}_{y - 1} == True:
tileValue += 1
if tileEast == True:
if bomb_{x + 1}_{y} == True:
tileValue += 1
if tileSouth == True:
if tileEast == True:
if bomb_{x + 1}_{y + 1} == True:
tileValue += 1
if tileWest == True:
if bomb_{x - 1}_{y + 1} == True:
tileValue += 1
if bomb_{x}_{y + 1} == True:
tileValue += 1
if tileWest == True:
if bomb_{x - 1}_{y} == True:
tileValue += 1
else:
tileValue = int(0)
if tileNorth == True:
if tileEast == True:
if bomb_{x + 1}_{y - 1} == True:
tileValue += 1
if tileWest == True:
if bomb_{x - 1}_{y - 1} == True:
tileValue += 1
if bomb_{x}_{y - 1} == True:
tileValue += 1
if tileEast == True:
if bomb_{x + 1}_{y} == True:
tileValue += 1
if tileSouth == True:
if tileEast == True:
if bomb_{x + 1}_{y + 1} == True:
tileValue += 1
if tileWest == True:
if bomb_{x - 1}_{y + 1} == True:
tileValue += 1
if bomb_{x}_{y + 1} == True:
tileValue += 1
if tileWest == True:
if bomb_{x - 1}_{y} == True:
tileValue += 1
if tileValue == 0:
tileValue = "Clear"
stringVar_{x}_{y}.set(tileValue)""")
# Classes
class game:
title = "Minesweeper"
bg = "white"
fg = "black"
size = [10, 10]
tileWidth = 3
tileHeight = 2
failed = False
bombFrequency = 4
flagMode = False
# Execution
window = tkinter.Tk() # The window.
window.title(game.title)
window.config(bg = game.bg)
mainFrame = tkinter.Frame(window, bg = game.bg) # Main frame that everything is located in.
titleFrame = tkinter.Frame(mainFrame, bg = game.bg) # Title frame.
titleLabel = tkinter.Label(titleFrame, bg = game.bg, fg = game.fg, text = game.title, font = "none 20").grid(row = 0, column = 0)
titleFrame.grid(row = 0, column = 0)
tileFrame = tkinter.Frame(mainFrame, bg = game.bg) # Frame where tiles are located.
x = 0
y = 0
for tiles_x in range(game.size[0]): # Generates tiles.
for tiles_y in range(game.size[1]):
exec(f"""global tile_{x}_{y}, stringVar_{x}_{y}, bomb_{x}_{y}
bomb_{x}_{y} = random.randint(1, game.bombFrequency)
if bomb_{x}_{y} == 1:
bomb_{x}_{y} = True
else:
bomb_{x}_{y} = False
stringVar_{x}_{y} = tkinter.StringVar(tileFrame)
tile_{x}_{y} = tkinter.Button(tileFrame, bg = 'lightgrey', fg = 'black', width = game.tileWidth, height = game.tileHeight, textvariable = stringVar_{x}_{y}, command = lambda: tileClicked({x}, {y})).grid(row = {y}, column = {x})""")
y += 1
x += 1
y = 0
tileFrame.grid(row = 1, column = 0)
mainFrame.pack() # The main frame is packed so everything is centered.
window.mainloop()
I don't care if you think dynamic variables are inefficient, it's my choice. I don't want people to comment on my methods of accomplishing a task... unless it's causing the problem...
Thanks!
Using dynamic variables is bad practice, and your experience is a good demonstration why.
Variable names cannot have a minus sign in them. The minus sign is interpreted as the arithmetic operator. So bomb_-1_-1 is interpreted as bomb_ - 1_ - 1. The bomb_ part is understood as a variable name, the 1 as a number, but the underscore following that number is triggering the syntax error.
This also demonstrates that dynamic code is not that great: syntax errors only pop up when certain circumstances are created (like selecting a particular cell).
A quick fix, just to show a work around, is to test first the values of x and y:
if {x} >= 0 and {y} >= 0 and bomb_{x}_{y} == True:
You would have to do similar tests for any other place where you create a dynamic reference like that. So also:
if {x} >= 1 and {y} >= 1 and bomb_{x-1}_{y-1} == True:
...etc.
But this is really patching a terrible design.
Note that even if only one of the variables is negative, you'll evaluate an expression that you did not really intend. You could get this for when only y == -1: bomb_5_-1. This produces no syntax error, but it evaluates as bomb_5_ minus 1. Obviously that is not intended by the algorithm.
Instead of dynamic variables and parsing code at run-time, use lists. They can be nested to have the 2D coverage.
I've made a program that counts weighted average and required weighted value to average being equal to our preference. If I want the average be equal to 85 from (the first value in the list is the weight of next values) [[4,72,78],[3,56],[6,93]] and x value of 6 weight it does not output the right value.
def choice(x):
c = 0
Choice = True
choices = []
while Choice:
if choices == []:
if x != 0:
fill = "weight of required value"
else:
fill = "weight of next values"
else:
if x != 0:
fill = "value of wanted weighted average"
else:
fill = "value"
try:
c = input("Give {}\n" .format(fill))
except:
continue
if isinstance(c, str):
if c == "":
Choice = False
if choices == []:
choices = False
break
else:
try:
choices.append(float(c))
except:
continue
if x != 0 and len(choices) == x:
break
c = 0
return choices
def av(x):
c = 0
alist = x[:]
alist.pop(0)
for a in alist:
c += a*x[0]
return c
def average(k,args):
c = 0
n = 0
for y in range(len(args)):
for a in range(len(args)):
c += (av(args[a]))/2
for b in range(len(args)):
n += (args[b][0]*(len(args[b])-1))/2
if k == 1:
return ([float("{0:.2f}".format(c/n)),c,n])
else:
j = float("{0:.2f}".format(c/n))
print("Weighted average {} from {}" .format(j,args))
def rmark(q,args):
alist = average(1,args)
a = float("{:.2f}" .format((((q[1]*(alist[2]+q[0]))-alist[1])/q[0])))
print("To get weighted average {}, u have to add the value equal to {} of weight {}" .format(q[1],a,q[0]))
# return a
Continue = True
list_choices = []
while Continue:
x = 0
x = choice(0)
if isinstance(x, list):
list_choices.append(x)
elif x == False:
break
print(list_choices)
rmark(choice(2),list_choices)
average(0,list_choices)
Let me break it down for you.
av function is reducing the size of your lists (x1, x2 and x3) to 1 by popping (alist.pop(0)) one element.
Hence, value of len(x1)-1 is 0, which means value of all multipliers in the denominator of (av(x1) + av(x2) + av(x3))/((x1[0]*(len(x1)-1)) + (x2[0]*(len(x2)-1)) + (x3[0]*(len(x3)-1))) is 0. Thus, the error divide by zero.