[MCTS] 不围棋NoGoAI

计概课的大作业

学了一下MCTS,随便调了下参数(

upd:被锤爆了 需要修改一下随机后的估值(咕咕咕


MCTS参考资料

https://blog.csdn.net/baidu_40614951/article/details/105481498

https://www.cnblogs.com/yifdu25/p/8303462.html

界面相关

https://blog.csdn.net/luoshengkim/article/details/50412354

平台

https://www.botzone.org.cn/game/NoGo


扔一下代码

  1 #include <cstdio>
  2 #include <cstring>
  3 #include <algorithm>
  4 #include <cmath>
  5 #include <vector>
  6 #include <string>
  7 #include <iostream>
  8 #include "jsoncpp/json.h"
  9 using namespace std;
 10 const int N=9;
 11 const int M=10000;
 12 const int dx[4]={-1,0,1,0},dy[4]={0,-1,0,1};
 13 int currBotColor;
 14 
 15 class Grid {
 16 public:
 17     int grid[N][N],avasz,curCol,lose;//,turn=1; //turn 记为我方已落子数+1
 18     bool use[N*N];
 19     vector<int> ava;
 20     bool vis[N][N];
 21     bool ingrid(int x,int y) {return x>=0 && y>=0 && x<9 && y<9;}
 22     bool dfsqi(int x,int y) { //判断是否有气
 23         int tx,ty; vis[x][y]=1; bool flag=0;
 24         for (int k=0; k<4; k++) if (ingrid(tx=x+dx[k],ty=y+dy[k])) {
 25             if (!grid[tx][ty]) flag=1;
 26             else if (grid[x][y]==grid[tx][ty] && !vis[tx][ty] && dfsqi(tx,ty)) flag=1;
 27         }
 28         return flag;
 29     }
 30     bool judgeAvailable(int x,int y,int col) {
 31         if (grid[x][y]) return 0;
 32         grid[x][y]=col; memset(vis,0,sizeof vis);
 33         if (!dfsqi(x,y)) {grid[x][y]=0; return 0;}
 34         int tx,ty;
 35         for (int k=0; k<4; k++) if (ingrid(tx=x+dx[k],ty=y+dy[k])&&!vis[tx][ty]) 
 36             if (col==-grid[tx][ty]&&!dfsqi(tx,ty)) {grid[x][y]=0;return 0;}
 37         grid[x][y]=0; return 1;
 38     }
 39     void checkavailable() {
 40         if (ava.size()) ava.clear();
 41         for (int i=0; i<9; i++) for (int j=0; j<9; j++) if (judgeAvailable(i,j,curCol))
 42             ava.push_back(i*9+j);
 43         avasz=ava.size(); if (!avasz) lose=1; else lose=0;
 44         memset(use,0,sizeof use);
 45     }
 46     int getaction() {
 47         if (!avasz) return -1;
 48         return ava[rand()%avasz];
 49     }
 50 } G;
 51 
 52 // const double Con = 0.70710678; //调参
 53 const double Con = 0.8;
 54 const int SearchDepth = 60;
 55 const int TimeLimit = 15000;
 56 int cnt,rt,act[M],A[M],B[M],fa[M];// A/B表胜率
 57 Grid g[M];
 58 vector<int> son[M];
 59 
 60 int BestChild(int v,bool flag,double c) {//c是参数
 61     int id; double mx=-1,val;
 62     if (g[v].curCol != currBotColor) {
 63         if (flag) for (int i=son[v].size()-1; ~i; i--) {
 64             if (!B[son[v][i]]) val=1e5;
 65             else val=1.0*(B[son[v][i]]-A[son[v][i]])/B[son[v][i]]+c*sqrt(2*log(B[v])/B[son[v][i]]);//需要保证B非零!!
 66             if (val-mx>1e-8) mx=val,id=i;
 67         } else for (int i=son[v].size()-1; ~i; i--) {
 68             if (!B[son[v][i]]) val=1e5;
 69             else val=1.0*(B[son[v][i]]-A[son[v][i]])/B[son[v][i]];//需要保证B非零!!
 70             if (val-mx>1e-8) mx=val,id=i;
 71         }
 72     } else {
 73         if (flag) for (int i=son[v].size()-1; ~i; i--) {
 74             if (!B[son[v][i]]) val=1e5;
 75             else val=1.0*A[son[v][i]]/B[son[v][i]]+c*sqrt(2*log(B[v])/B[son[v][i]]);//需要保证B非零!!
 76             if (val-mx>1e-8) mx=val,id=i;
 77         } else for (int i=son[v].size()-1; ~i; i--) {
 78             if (!B[son[v][i]]) val=1e5;
 79             else val=1.0*A[son[v][i]]/B[son[v][i]];//需要保证B非零!!
 80             if (val-mx>1e-8) mx=val,id=i;
 81         }
 82     }
 83     return son[v][id];
 84 }
 85 int expand(int v) {// Expansion
 86     for (int i=son[v].size()-1; ~i; i--) g[v].use[act[son[v][i]]]=1;
 87     vector<int> tmp;
 88     for (int i=g[v].avasz-1; ~i; i--) if (!g[v].use[g[v].ava[i]]) tmp.push_back(g[v].ava[i]);
 89     int ac = tmp[rand()%tmp.size()];
 90     g[++cnt]=g[v],g[cnt].grid[ac/9][ac%9]=g[v].curCol,g[cnt].curCol=-g[v].curCol;
 91     fa[cnt]=v; son[v].push_back(cnt); act[cnt]=ac; g[cnt].checkavailable();
 92     return cnt;
 93 }
 94 int TreePolicy(int v) {
 95     int dep=SearchDepth;
 96     while (g[v].avasz && dep--) //调参(控制层数)
 97         if (son[v].size()<g[v].avasz) return expand(v);
 98         else v=BestChild(v,1,Con);
 99     return v;
100 }
101 int DefaultPolicy(int v) {
102     Grid gt=g[v];
103     vector<int> a,b; int pa=0,pb=0; bool cur=1;
104     for (int i=0; i<81; i++) if (!gt.grid[i/9][i%9]) a.push_back(i),b.push_back(i);
105     for (int i=0; i<a.size(); i++) swap(a[i],a[rand()%a.size()]);
106     for (int i=0; i<b.size(); i++) swap(b[i],b[rand()%b.size()]);
107     while (pa<a.size()&&pb<b.size()) {
108         if (cur) {
109             while (pa<a.size() && !gt.judgeAvailable(a[pa]/9,a[pa]%9,gt.curCol)) pa++;
110             if (pa==a.size()) return currBotColor == gt.curCol ? 0:1;
111             gt.grid[a[pa]/9][a[pa]%9]=gt.curCol,pa++,cur=0;
112         }
113         else {
114             while (pb<b.size() && !gt.judgeAvailable(b[pb]/9,b[pb]%9,-gt.curCol)) pb++;
115             if (pb==b.size()) return currBotColor == -gt.curCol ? 0:1;
116             gt.grid[b[pb]/9][b[pb]%9]=-gt.curCol,pb++,cur=1;
117         }
118     }
119     if (pa==a.size()) return currBotColor == gt.curCol ? 0:1;
120     else return currBotColor == -gt.curCol ? 0:1;
121 }
122 inline void BackUp(int v,int dt) {for (; v; v=fa[v]) A[v]+=dt,B[v]++;} //判断胜负以currBotColor计
123 int MCTS(int runtime) {
124     cnt=rt=1; g[1]=G;
125     while (runtime--) {
126         int v=TreePolicy(rt); //Selection
127         int dt=DefaultPolicy(v); //Simulation
128         BackUp(v,dt); //Backpropagation
129     }
130     return act[BestChild(rt,0,0)];
131 }
132 
133 int main() {
134     srand((unsigned)time(0));
135     string str;
136     getline(cin,str);
137     Json::Reader reader;
138     Json::Value input;
139     reader.parse(str, input);
140 
141     int x,y;
142     int turnID = input["responses"].size();
143     G.curCol = currBotColor = input["requests"][0]["x"].asInt()<0 ? 1:-1;
144     for (int i = 0; i < turnID; i++) {
145         x=input["requests"][i]["x"].asInt(), y=input["requests"][i]["y"].asInt();
146         if (x!=-1) G.grid[x][y]=-currBotColor;
147         x=input["responses"][i]["x"].asInt(), y=input["responses"][i]["y"].asInt();
148         if (x!=-1) G.grid[x][y]=currBotColor;//,turn++;
149     }
150     x=input["requests"][turnID]["x"].asInt(), y=input["requests"][turnID]["y"].asInt();
151     if (x!=-1) G.grid[x][y]=-currBotColor;
152     
153     G.checkavailable();
154     int decision=MCTS(TimeLimit);//调参
155     
156     Json::Value ret;
157     Json::Value action;
158     action["x"]=decision/9; action["y"]=decision%9;
159     ret["response"] = action;
160     Json::FastWriter writer;
161     cout << writer.write(ret) << endl;
162     return 0;
163 }

 

posted @ 2021-01-27 18:46  HNOOO  阅读(261)  评论(0编辑  收藏  举报