[MCTS] 不围棋NoGoAI
计概课的大作业
学了一下MCTS,随便调了下参数(
upd:被锤爆了 需要修改一下随机后的估值(咕咕咕
MCTS参考资料
https://blog.csdn.net/baidu_40614951/article/details/105481498
https://www.cnblogs.com/yifdu25/p/8303462.html
界面相关
https://blog.csdn.net/luoshengkim/article/details/50412354
平台
https://www.botzone.org.cn/game/NoGo
扔一下代码
1 #include <cstdio> 2 #include <cstring> 3 #include <algorithm> 4 #include <cmath> 5 #include <vector> 6 #include <string> 7 #include <iostream> 8 #include "jsoncpp/json.h" 9 using namespace std; 10 const int N=9; 11 const int M=10000; 12 const int dx[4]={-1,0,1,0},dy[4]={0,-1,0,1}; 13 int currBotColor; 14 15 class Grid { 16 public: 17 int grid[N][N],avasz,curCol,lose;//,turn=1; //turn 记为我方已落子数+1 18 bool use[N*N]; 19 vector<int> ava; 20 bool vis[N][N]; 21 bool ingrid(int x,int y) {return x>=0 && y>=0 && x<9 && y<9;} 22 bool dfsqi(int x,int y) { //判断是否有气 23 int tx,ty; vis[x][y]=1; bool flag=0; 24 for (int k=0; k<4; k++) if (ingrid(tx=x+dx[k],ty=y+dy[k])) { 25 if (!grid[tx][ty]) flag=1; 26 else if (grid[x][y]==grid[tx][ty] && !vis[tx][ty] && dfsqi(tx,ty)) flag=1; 27 } 28 return flag; 29 } 30 bool judgeAvailable(int x,int y,int col) { 31 if (grid[x][y]) return 0; 32 grid[x][y]=col; memset(vis,0,sizeof vis); 33 if (!dfsqi(x,y)) {grid[x][y]=0; return 0;} 34 int tx,ty; 35 for (int k=0; k<4; k++) if (ingrid(tx=x+dx[k],ty=y+dy[k])&&!vis[tx][ty]) 36 if (col==-grid[tx][ty]&&!dfsqi(tx,ty)) {grid[x][y]=0;return 0;} 37 grid[x][y]=0; return 1; 38 } 39 void checkavailable() { 40 if (ava.size()) ava.clear(); 41 for (int i=0; i<9; i++) for (int j=0; j<9; j++) if (judgeAvailable(i,j,curCol)) 42 ava.push_back(i*9+j); 43 avasz=ava.size(); if (!avasz) lose=1; else lose=0; 44 memset(use,0,sizeof use); 45 } 46 int getaction() { 47 if (!avasz) return -1; 48 return ava[rand()%avasz]; 49 } 50 } G; 51 52 // const double Con = 0.70710678; //调参 53 const double Con = 0.8; 54 const int SearchDepth = 60; 55 const int TimeLimit = 15000; 56 int cnt,rt,act[M],A[M],B[M],fa[M];// A/B表胜率 57 Grid g[M]; 58 vector<int> son[M]; 59 60 int BestChild(int v,bool flag,double c) {//c是参数 61 int id; double mx=-1,val; 62 if (g[v].curCol != currBotColor) { 63 if (flag) for (int i=son[v].size()-1; ~i; i--) { 64 if (!B[son[v][i]]) val=1e5; 65 else val=1.0*(B[son[v][i]]-A[son[v][i]])/B[son[v][i]]+c*sqrt(2*log(B[v])/B[son[v][i]]);//需要保证B非零!! 66 if (val-mx>1e-8) mx=val,id=i; 67 } else for (int i=son[v].size()-1; ~i; i--) { 68 if (!B[son[v][i]]) val=1e5; 69 else val=1.0*(B[son[v][i]]-A[son[v][i]])/B[son[v][i]];//需要保证B非零!! 70 if (val-mx>1e-8) mx=val,id=i; 71 } 72 } else { 73 if (flag) for (int i=son[v].size()-1; ~i; i--) { 74 if (!B[son[v][i]]) val=1e5; 75 else val=1.0*A[son[v][i]]/B[son[v][i]]+c*sqrt(2*log(B[v])/B[son[v][i]]);//需要保证B非零!! 76 if (val-mx>1e-8) mx=val,id=i; 77 } else for (int i=son[v].size()-1; ~i; i--) { 78 if (!B[son[v][i]]) val=1e5; 79 else val=1.0*A[son[v][i]]/B[son[v][i]];//需要保证B非零!! 80 if (val-mx>1e-8) mx=val,id=i; 81 } 82 } 83 return son[v][id]; 84 } 85 int expand(int v) {// Expansion 86 for (int i=son[v].size()-1; ~i; i--) g[v].use[act[son[v][i]]]=1; 87 vector<int> tmp; 88 for (int i=g[v].avasz-1; ~i; i--) if (!g[v].use[g[v].ava[i]]) tmp.push_back(g[v].ava[i]); 89 int ac = tmp[rand()%tmp.size()]; 90 g[++cnt]=g[v],g[cnt].grid[ac/9][ac%9]=g[v].curCol,g[cnt].curCol=-g[v].curCol; 91 fa[cnt]=v; son[v].push_back(cnt); act[cnt]=ac; g[cnt].checkavailable(); 92 return cnt; 93 } 94 int TreePolicy(int v) { 95 int dep=SearchDepth; 96 while (g[v].avasz && dep--) //调参(控制层数) 97 if (son[v].size()<g[v].avasz) return expand(v); 98 else v=BestChild(v,1,Con); 99 return v; 100 } 101 int DefaultPolicy(int v) { 102 Grid gt=g[v]; 103 vector<int> a,b; int pa=0,pb=0; bool cur=1; 104 for (int i=0; i<81; i++) if (!gt.grid[i/9][i%9]) a.push_back(i),b.push_back(i); 105 for (int i=0; i<a.size(); i++) swap(a[i],a[rand()%a.size()]); 106 for (int i=0; i<b.size(); i++) swap(b[i],b[rand()%b.size()]); 107 while (pa<a.size()&&pb<b.size()) { 108 if (cur) { 109 while (pa<a.size() && !gt.judgeAvailable(a[pa]/9,a[pa]%9,gt.curCol)) pa++; 110 if (pa==a.size()) return currBotColor == gt.curCol ? 0:1; 111 gt.grid[a[pa]/9][a[pa]%9]=gt.curCol,pa++,cur=0; 112 } 113 else { 114 while (pb<b.size() && !gt.judgeAvailable(b[pb]/9,b[pb]%9,-gt.curCol)) pb++; 115 if (pb==b.size()) return currBotColor == -gt.curCol ? 0:1; 116 gt.grid[b[pb]/9][b[pb]%9]=-gt.curCol,pb++,cur=1; 117 } 118 } 119 if (pa==a.size()) return currBotColor == gt.curCol ? 0:1; 120 else return currBotColor == -gt.curCol ? 0:1; 121 } 122 inline void BackUp(int v,int dt) {for (; v; v=fa[v]) A[v]+=dt,B[v]++;} //判断胜负以currBotColor计 123 int MCTS(int runtime) { 124 cnt=rt=1; g[1]=G; 125 while (runtime--) { 126 int v=TreePolicy(rt); //Selection 127 int dt=DefaultPolicy(v); //Simulation 128 BackUp(v,dt); //Backpropagation 129 } 130 return act[BestChild(rt,0,0)]; 131 } 132 133 int main() { 134 srand((unsigned)time(0)); 135 string str; 136 getline(cin,str); 137 Json::Reader reader; 138 Json::Value input; 139 reader.parse(str, input); 140 141 int x,y; 142 int turnID = input["responses"].size(); 143 G.curCol = currBotColor = input["requests"][0]["x"].asInt()<0 ? 1:-1; 144 for (int i = 0; i < turnID; i++) { 145 x=input["requests"][i]["x"].asInt(), y=input["requests"][i]["y"].asInt(); 146 if (x!=-1) G.grid[x][y]=-currBotColor; 147 x=input["responses"][i]["x"].asInt(), y=input["responses"][i]["y"].asInt(); 148 if (x!=-1) G.grid[x][y]=currBotColor;//,turn++; 149 } 150 x=input["requests"][turnID]["x"].asInt(), y=input["requests"][turnID]["y"].asInt(); 151 if (x!=-1) G.grid[x][y]=-currBotColor; 152 153 G.checkavailable(); 154 int decision=MCTS(TimeLimit);//调参 155 156 Json::Value ret; 157 Json::Value action; 158 action["x"]=decision/9; action["y"]=decision%9; 159 ret["response"] = action; 160 Json::FastWriter writer; 161 cout << writer.write(ret) << endl; 162 return 0; 163 }