正文
tuple_board = list(zip(*[board[6:9], board[3:6], board[0:3]]))
return([value for item in tuple_board for value in item], new_response)
elif transformation == 'rotate270':
new_response = [2, 5, 8, 1, 4, 7, 0, 3, 6].index(response)
tuple_board = list(zip(*[board[0:3], board[3:6],
board[6:9]]))[::-1]
return([value for item in tuple_board for value in item], new_response)
elif transformation == 'flip_v':
new_response = [6, 7, 8, 3, 4, 5, 0, 1, 2].index(response)
return(board[6:9] + board[3:6] + board[0:3], new_response)
elif transformation == 'flip_h':
# flip_h = rotate180, then flip_v
new_response = [2, 1, 0, 5, 4, 3, 8, 7, 6].index(response)
new_board = board[::-1]
return(new_board[6:9] + new_board[3:6] + new_board[0:3],
new_response)
else:
raise ValueError('Method not implmented.')
5.棋盘位置列表和对应的最佳落子点数据位于.csv文件中。我们将创建get_moves_from_csv()函数来加载文件中的棋盘和最佳落子点数据,并保存成元组,代码如下:
def get_moves_from_csv(csv_file):
'''
:param csv_file: csv file location containing the boards w/responses
:return: moves: list of moves with index of best response
'''
moves = []
with open(csv_file, 'rt') as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
moves.append(([int(x) for x in row[0:9]],int(row[9])))
return(moves)
6.创建一个get_rand_move()函数,返回一个随机变换棋盘和落子点,代码如下:
def get_rand_move(moves, rand_transforms=2):
# This function performs random transformations on a board.
(board, response) = random.choice(moves)
possible_transforms = ['rotate90', 'rotate180', 'rotate270', 'flip_v', 'flip_h']
for i in range(rand_transforms):
random_transform = random.choice(possible_transforms)
(board, response) = get_symmetry(board, response, random_transform)
return(board, response)
7.初始化计算图会话,加载数据文件,创建训练集,代码如下:
sess = tf.Session()
moves = get_moves_from_csv('base_tic_tac_toe_moves.csv')
# Create a train set:
train_length = 500
train_set = []
for t in range(train_length):
train_set.append(get_rand_move(moves))
8.前面提到,我们将从训练集中移除一个棋盘位置和对应的最佳落子点,来看训练的模型是否可以生成最佳走棋。下面棋盘的最佳落子点是棋盘位置索引为6的位置,代码如下:
test_board = [-1, 0, 0, 1, -1, -1, 0, 0, 1]
train_set = [x for x in train_set if x[0] != test_board]
9.创建init_weights()函数和model()函数,分别实现初始化模型变量和模型操作。注意,模型中并没有包含softmax()激励函数,因为softmax()激励函数会在损失函数中出现,代码如下:
def init_weights(shape):
return(tf.Variable(tf.random_normal(shape)))
def model(X, A1, A2, bias1, bias2):
layer1 = tf.nn.sigmoid(tf.add(tf.matmul(X, A1), bias1))
layer2 = tf.add(tf.matmul(layer1, A2), bias2)