代码拉取完成,页面将自动刷新
# -*- coding: utf-8 -*-
"""
Created on Sat Nov 28 20:43:31 2020
@author: dzyzh
"""
import numpy as np
import random as rd
import time
class player(object):
def __init__(self,number):
self.number = number
self.average_reward = 0.0
self.count = 0
self.value = np.random.rand(10,19,20)
self.decision = np.random.rand(10,19,20,2)
self.frequency1 = np.random.randint(1,10,[10,19,20])
self.frequency2 = np.random.randint(1,10,[10,19,20,2])
self.memory = np.empty(100)
def choose_action(self,concealed_card,open_card):
t = np.mean(self.memory)
if(abs((t-self.average_reward))>abs(self.average_reward)):
explor = 100
elif(abs((t-self.average_reward))>0.1*abs(self.average_reward)):
explor = 10
else:
explor = 1
explor = 100
UCT_value=np.empty(20)
current_value = self.value[concealed_card,open_card]
current_frequency = self.frequency1[concealed_card,open_card]
i = 0
while(i<20):
total_frequency = sum(sum(sum(self.frequency1)))
UCT_value[i] = (current_value[i] + np.sqrt(explor)*np.sqrt((np.log(total_frequency)/current_frequency[i])))
i = i+1
choice = UCT_value.argmax()
return choice + 1
def decision_choose(self,concealed_card,open_card,chip):
t = np.mean(self.memory)
if(abs((t-self.average_reward))>abs(self.average_reward)):
explor = 100
elif(abs((t-self.average_reward))>0.1*abs(self.average_reward)):
explor = 10
else:
explor = 1
explor = 100
UCT_value=np.empty(2)
current_value = self.decision[concealed_card,open_card,chip-1]
current_frequency = self.frequency2[concealed_card,open_card,chip-1]
i = 0
while(i<2):
total_frequency = sum(sum(sum(sum(self.frequency2))))
UCT_value[i] = (current_value[i] + np.sqrt(explor)*np.sqrt((np.log(total_frequency)/current_frequency[i])))
i = i+1
choice = UCT_value.argmax()
return choice
def update_value(self,banker,concealed_card,open_card,chip,decision,reward):
#p = int(given_value/2-1)
#q = int(choice-1)
if(banker==1):
origin_frequency = self.frequency1[concealed_card,open_card,chip-1]
origin_value = self.value[concealed_card,open_card,chip-1]
self.frequency1[concealed_card,open_card,chip-1] = origin_frequency + 1
self.value[concealed_card,open_card,chip-1] = (origin_value*origin_frequency + reward) / self.frequency1[concealed_card,open_card,chip-1]
else:
origin_frequency2 = self.frequency2[concealed_card,open_card,chip-1,decision]
origin_decision = self.decision[concealed_card,open_card,chip-1,decision]
self.frequency2[concealed_card,open_card,chip-1,decision] = origin_frequency2 + 1
self.decision[concealed_card,open_card,chip-1,decision] = (origin_decision*origin_frequency2 + reward) / self.frequency2[concealed_card,open_card,chip-1,decision]
self.average_reward = ( self.average_reward * self.count + reward ) / (self.count + 1)
self.count = self.count + 1
self.memory[0:99] = self.memory[1:100]
self.memory[99] = reward
def game(player1,player2,i):
win_1 = 0
win_2 = 0
chip_1 = 0
chip_2 = 0
banker = rd.choice([0,1])
card_library = [0,1,2,3,4,5,6,7,8,9]
open_card = rd.choice([-1,1])*rd.choice(card_library)
concealed_card_1 = rd.choice(card_library)
concealed_card_2 = rd.choice(card_library)
decision_1 = 0
decision_2 = 0
if((concealed_card_1 - concealed_card_2) > -open_card):
win_1 = 1
elif((concealed_card_1 - concealed_card_2) < -open_card):
win_2 = 1
else:
win_1 = 1
win_2 = 1
if(banker == 0):
chip_1 = player1.choose_action(concealed_card_1,open_card+9)
decision_2 = player2.decision_choose(concealed_card_2,-open_card+9,chip_1)
if(decision_2 ==1):
if(win_1 ==1 and win_2==0):
reward_1 = chip_1
reward_2 = -chip_1
elif(win_1 ==0 and win_2==1):
reward_1 = -chip_1
reward_2 = chip_1
else:
reward_1 = 0
reward_2 = 0
else:
reward_1 = 1
reward_2 = -1
else:
chip_2 = player2.choose_action(concealed_card_2,-open_card+9)
decision_1 = player1.decision_choose(concealed_card_1,open_card+9,chip_2)
if(decision_1 ==1):
if(win_1 ==1 and win_2==0):
reward_1 = chip_2
reward_2 = -chip_2
elif(win_1 ==0 and win_2==1):
reward_1 = -chip_2
reward_2 = chip_2
else:
reward_1 = 0
reward_2 = 0
else:
reward_1 = -1
reward_2 = 1
if(banker == 0):
banker_1 = 1
banker_2 = 0
else:
banker_2 = 1
banker_1 = 0
print("现在是第{0}局游戏,当前正在游戏的玩家是:player{1},player{2}".format(i,player1.number, player2.number))
print("当前的庄家是:{0},{1}; player{2}的底牌是:{4},player{3}的底牌是:{5},player{2}的公共牌是:{6},player{3}的公共牌是:{7}".format(banker_1,banker_2,player1.number,player2.number,concealed_card_1,concealed_card_2,open_card,-open_card))
print("player{0}下注{2}; player{1}下注{3}; player{0}跟注为{4},player{1}跟注为{5},player{0}收益为{6},player{1}收益为{7}".format(player1.number, player2.number,chip_1+1,chip_2+1,decision_1,decision_2,reward_1,reward_2))
return(banker_1,banker_2,concealed_card_1,concealed_card_2,open_card+9,-open_card+9,chip_1,chip_2,decision_1,decision_2,reward_1,reward_2)
d = {}
for i in range(10):
d['player'+str(i)] = player(i)
k=0
while(k<5000000):
current_waiting = [0,1,2,3,4,5,6,7,8,9]
while(current_waiting != []):
a=rd.choice(current_waiting)
current_waiting.remove(a)
b=rd.choice(current_waiting)
current_waiting.remove(b)
result = game( d['player'+str(a)] , d['player'+str(b)] , k )
d['player'+str(a)].update_value(result[0],result[2],result[4],result[6],result[8],result[10])
d['player'+str(b)].update_value(result[1],result[3],result[5],result[7],result[9],result[11])
k = k+1
doc = open("value2.txt",'w')
for i in range(0,10):
for j in range(0,19):
for k in range(0,20):
print([i,j,k],file=doc)
print(d['player'+str(5)].decision[i,j,k],file = doc)
doc.close()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。