pachi 学习
修改
Makefile
DCNN=1 -> DCNN=0 // 禁用DCNN
#BOARD_SIZE=19 -> BOARD_SIZE=19 // 棋盘大小19x19
OPT ?= -O3 -> OPT ?= -O0 // 优化gcc编译选项
运行参数: prior=eqex=0,dynkomi=none
参数
pachi.c
int debug_level = 3; -> int debug_level = 5; // 内部debug等级,主要用来打印相关信息
uct.c
u->fast_alloc = true; -> u->fast_alloc = false; // 不预先生成树节点
方法:
board.c
board_init_data
if(size % 2){
board->symmetry.d = 1;
board->symmetry.x1 = board->symmetry.y1 = board_size(board) / 2;
board->symmetry.x2 = board->symmetry.y2 = board_size(board) - 1;
board->symmetry.type = SYM_FULL;
}
->
if(size % 2){
board->symmetry.d = 0;
board->symmetry.x1 = board->symmetry.y1 = 1;
board->symmetry.x2 = board->symmetry.y2 = board_size(board) - 1;
board->symmetry.type = SYM_NONE;
}
文件:
joseki19.pdict -> joseki19.pdict.bak
命令 gtp.c
protocol_version name echo version list_commands known_command quit boardsize clear_board komi play genmove time_left time_settings set_free_handicap place_free_handicap fixed_handicap final_score final_status_list undo kgs-game_over kgs-rules kgs-genmove_cleanup kgs-time_settings kgs-chat pachi-predict pachi-tunit pachi-gentbook pachi-dumptbook pachi-evaluate pachi-result predict tunit gogui-analyze_commands gogui-best_moves gfx/gfx Best Moves gogui-winrates gfx/gfx Winrates gogui-ownermap gfx/gfx Influence gogui-score_est gfx/gfx Score Est gogui-livegfx best_moves gfx/Live gfx = Best Moves gogui-livegfx best_seq gfx/Live gfx = Best Sequence gogui-livegfx winrate gfx/Live gfx = Winrates/gogui-livegfx winrates gogui-livegfx gfx/Live gfx = None final_score string/Final Score
数据结构
internal
/* How many games to consider at minimum before judging groups. */ #define GJ_MINGAMES 500
playout
// Maximal simulation length
#define MAX_GAMELEN 600
struct playout_policy {
int debug_level; //
// We call setboard when we start new playout
// We call choose when we ask policy about next move
// We call assess when we ask policy about how good given move is.
// We call permit when we ask policy if we can make a randomly chosen move
playoutp_setboard setboard;
playoutp_choose choose;
playoutp_assess assess;
playoutp_permit permit;
playoutp_done done;
// By default,with setboard set we will refuse to make(random)
// moves outside of the *choose routine in order not to mess up
// state tracking.If you use *setboard but do not track state
// (e.g. you just initialize some per-playout data,like the Moggy policy),set setboard_randomok too.
bool setboard_randomok;
// particular playout policy's internal data
void *data;
};
struct playout_amafmap{
// We keep record of the game so tha we can examine nakade moves;really going out of our way to
// implement nakade AMAF properly turns out to be crucial when reading some tactical positions in
// depth(even if they are just one-stone-snapback)
coord_t game[MAX_GAMELEN];
bool is_ko_capture[MAX_GAMELEN];
int gamelen;
// Our current position in the game sequence;in AMAF, we search the range [game_baselen,gamelen[
int game_baselen;
};
struct playout_setup {
unsigned int gamelen; // Maximal # of moves in playout
int mercymin; //Minimal difference between captures to terminate the playout.0 means don't check
void *hook_data; // for hook to reference its state
playouth_prepolicy prepolicy_hook;
playouth_postpolicy postpolicy_hook;
};
stone
// 棋子
enum stone { S_NONE=0; // 没有棋子 S_WHITE=1; // 白棋 S_BLACK=2; // 黑棋 S_OFFBOARD=3; // 边缘 S_MAX=4; // };
board
// 围棋规则
enum go_ruleset{
RULES_CHINESE=0; // 默认规则
RULES_AGA=1,
RULES_NEW_ZEALAND=2,
RULES_JAPANESE=3,
RULES_STONES_ONLY=4,
RULES_SIMING=5;
};
//
enum e_sym{
SYM_FULL=0,
SYM_DIAG_UP=1,
SYM_DIAG_DOWN=2,
SYM_HORIZ=3,
SYM_VERT=4,
SYM_NONE=5
};
//
struct group {
coord_t lib[10]; // 气上限
int libs;
};
//
struct board_symmetry{
int x1,x2,y1,y2;
int d;
enum e_sym type;
};
// board
struct board{
int size; // 棋盘大小包含边缘 21 int size2; // 21*21 int bits2; //
int captures[4]; //
float komi; // 贴目
int handicap; // 让子
enum go_ruleset rules; // 围棋规则
char *fbookfile; //
struct fbook *fbook; //
int moves; // 走了多少步
struct move last_move;
struct move last_move2;
struct move last_move3;
struct move last_move4;
_Bool superko_violation
// 初始化 x=S_OFFBOARD,0=S_NONE
// x(420) x x x(440)
// x 0(400) 0(418) x
// x 0(22) 0(40) x
// x(0) x x x(20)
enum stone b[441]; // stones played on the board
// 初始化
// 0(420) 0 0(440)
// 0 0 0
// 0(0) 0 0(20)
group_t g[441]; // 围棋所属的groupid,为0表示没有group
// 初始化
// 0(420) 0 0(440)
// 0 0 0
// 0(0) 0 0(20)
coord_t p[441]; // Positions of next stones in the stone group; 0 == last stone
// 初始化 0=S_NONE,1=S_BLACK,2=S_WHITE,3=S_OFFBOARD
// [0,0,0,0](420) [0,0,0,0] [0,0,0,0] [0,0,0,0](440)
// [0,0,0,0] [2,0,0,2] [2,0,0,2] [0,0,0,0]
// [0,0,0,0] [3,0,0,1] [3,0,0,1] [0,0,0,0]
// [0,0,0,0] [2,0,0,2] [2,0,0,2] [0,0,0,0]
// [0,0,0,0](0) [0,0,0,0] [0,0,0,0] [0,0,0,0](20)
struct neighbor_colors n[441]; // 上下左右 邻居的不同颜色的棋子的个数 0:没有,1:黑,2:白,3:边缘
hash3_t pat3[441]; // pat3 格式化
struct group gi[441]; //
// 初始化
// 0(361) 0(440)
// 418(360)
//
// 22(0) 42(20)
coord_t f[441]; // 可以移动的位置 [0]=22,[360]=418
int flen; // 19x19 = 361
// 初始化
// 0(420) 0 0 0(440)
// 0 359 360 0
// 0 0 1 0
// 0(0) 0 0 0(20)
int fmap[441]; // 将可以移动的位置映射到列表索引 边缘为0,其余 0~360
group_t c[240]; // 可以捕捉的group队列
int clen;
struct board_symmetry symmetry; //
struct move last_ko; // 最后一个劫
int last_ko_age; //
struct move ko; // 劫
void *es; // Engine-specific state;
void *ps; // Playout-specific state;
hash_t history_hash[4096];
hash_t hash; // Hash of current board position;
hash_t qhash[4]; // Hash of current board position quadrants
};
// 邻居颜色
struct neighbor_colors {
char colors[S_MAX];
};
struct board_statics{
int size;
int nei8[8],dnei[4];
hash_t h[BOARD_MAX_COORDS][2]; // zobrist hash 黑,白
unit8 coord[BOARD_MAX_COORDS][2]; // x,y 坐标 0,0 ~ 20,20
};
move
#define pass -1
#define resign -2
// 落子 struct move { coord_t coord; enum stone color; };
stats
/* Move statistics; we track how good value each move has. */ /* These operations are supposed to be atomic - reasonably * safe to perform by multiple threads at once on the same stats. * What this means in practice is that perhaps the value will get * slightly wrong, but not drastically corrupted. */
// 移动统计 struct move_stats{ floating_t value; // BLACK wins/playouts; int playouts; // # of playouts };
tree
// 树节点
struct tree_node{
hash_t hash; // hash is used only for debugging. it is very likely(but not guaranteed) to be unique
struct tree_node *parent,*sibling,*children;
struct move_stats u;
struct move_stats prior;
struct move_stats amaf;
struct move_stats pu;
struct move_stats winner_owner; // owner == winner
struct move_stats black_owner; // owner == black
short coord;
unsigned short depth; //
signed char descents; // 下降 Number of parallel descents going through this node at the moment.Used for virtual loss computation.
unsigend char d; //
unsigend char hints; //
/* In case multiple threads walk the tree, is_expanded is set
* atomically. Only the first thread setting it expands the node.
* The node goes through 3 states:
* 1) children == null, is_expanded == false: leaf node
* 2) children == null, is_expanded == true: one thread currently expanding
* 2) children != null, is_expanded == true: fully expanded node */
bool is_expaned; //
};
// 树
struct tree{
struct board *board;
struct tree_node *root;
struct board_symmetry root_symmetry;
enum stone root_color; // 树根上的旗子状态
bool use_extr_komi; //
/* A single-move-valid flag that marks a tree that is potentially
* badly skewed and should be used with care. Currently, we never
* resign on untrustworthy_tree and do not reuse the tree on next
* move. */
bool untrustworthy_tree; // 不值得信任的树
floating_t extra_komi; //
struct move_stats avg_score; // 平均分
// We merge local (non-tenuki) sequences for both colors,occuring anywhere in the tree;nodes are created on-demand,special 'pass' nodes
// represent tenuki.Only u move_stats are used,prior and amaf is ignored.Values in root node are ignored
// The value corresponds to black-to-play as usual;i.e. if white succeeds in its replies,the values will be low
struct tree_node *ltree_black;
// ltree_white has white-first sequences as children
struct tree_node *ltree_white;
// Aging factor; 2 means halve all playout values after each turn.1 means don't age at all
floating_t ltree_aging;
/* Hash table used when working as slave for the distributed engine.
* Maps coordinate path to tree node. */
struct tree_hash *htable;
int hbits;
int max_depth;
volatile size_t nodes_size; // byte size of all allocated nodes
size_t max_tree_size; // maximum byte size for entire tree, > 0 only for fast_alloc
size_t max_pruned_size;
size_t pruning_threshold;
void *nodes; // nodes buffer,only for fast_alloc
};
uct
#define MC_GAMELEN MAX_GAMELEN // MAximal simulation length
internal
struct uct {
int debug_level; // debug等级
enum uct_reporting {
UR_TEXT,
UR_JSON,
UR_JSON_BIG
} reporting; //
int reportfreq; //
int games; //
int gamelen; //
float resign_threshold; //
float sure_win_threshold; //
double best2_ratio; //
double bestr_ratio; //
float max_maintime_ratio; //
_Bool pass_all_alive; //
_Bool allow_losing_pass; //
_Bool territory_scoring; //
int expand_p; //
_Bool playout_amaf; //
_Bool amaf_prior; //
int playout_amaf_cutoff; //
double dumpthres; //
int force_seed; //
_Bool no_tbook; //
_Bool fast_alloc; //
size_t max_tree_size; //
size_t max_pruned_size; //
size_t pruning_threshold;
int mercymin;
int significant_threshold;
int threads;
enum uct_thread_model {
TM_TREE, // Tree parallelization w/o virtual loss
TM_TREEVL, // Tree parallelization with virtual loos.
} thread_model;
int virtual_loss;
bool pondering_opt; // User wants pondering
bool pondering; // Actually pondering now
bool slave; // act as slave in distributed engine
int max_slaves; // optional, -1 if not set
enum stone my_color;
int fuseki_end;
int yose_start;
int dynkomi_mask;
int dynkomi_interval;
struct uct_dynkomi *dynkomi;
floating_t initial_extra_komi;
floating_t val_scale;
int val_points;
bool val_extra;
bool val_byavg;
bool val_bytemp;
floating_t val_bytemp_min;
int random_policy_chance;
bool local_tree;
int tenuki_d;
floating_t local_tree_aging;
#define LTREE_PLAYOUTS_MULTIPLIER 100
floating_t local_tree_depth_decay;
bool local_tree_allseq;
bool local_tree_neival;
enum {
LTE_ROOT,
LTE_EACH,
LTE_TOTAL
} local_tree_eval;
bool local_tree_rootchoose;
struct {
int level;
int playouts;
} debug_after;
char *banner;
struct uct_policy *policy;
struct uct_policy *random_policy;
struct playout_policy *playout;
struct uct_prior *prior;
struct uct_pluginset *plugins;
struct joseki_dict *jdict;
struct pattern_setup pat;
bool want_pat; // Various modules (prior,policy,...) set this if they want pattern database to be loaded
// used within frame of single genmove
struct board_ownermap ownermap;
int stas_hbits; // Used for coordination among slaves of the distributed engine. 未用到
int shared_nodes;
int shared_levels;
double stats_delay;
int played_own;
int played_all;
// Saved dead groups,for final_status_list dead;
struct move_queue dead_groups;
int dead_groups_move;
struct tree *t; // Game State - maintained by setup_state(),reset_state();
};
struct uct_policy { struct uct *uct; uctp_choose choose; uctp_winner winner; uctp_evaluate evaluate; uctp_descend descend; uctp_update update; uctp_prior prior; uctp_done done; bool wants_amaf; void *data; }; // This is the state used for descending the tree;we use this wrapper // structure in order to be able to easily descend in multiple trees // in parallel(e.g. main tree and local tree) or compute cummulative // "path value" throughout the tree descent struct uct_descend { // Active tree nodes struct tree_node *node; // Main tree struct tree_node *lnode // local tree // Value of main tree node (with all value factors,but unbiased - // without exploration factor),from black's perspective struct move_stats value; };
ownermap
struct board_ownermap { sig_atomic_t playouts; sig_atomic_t map[441][4]; };
policy
moggy
// Move queue tags.Some may be even undesirable - these moves then receive // a penalty;penalty tags should be used only when it is certain the move would // be considered anyway enum mq_tag { MQ_KO=0, MQ_LATARI, MQ_L2LIB, #define MQ_LADDER MQ_L2LIB MQ_LNLIB, MQ_PAT3, MQ_GATARI, MQ_JOSEKI, MQ_NAKADE, MQ_MAX }; #define PAT3_N 15 struct moggy_policy { unsigend int lcapturerate,atarirate,nlibrate,ladderrate,capturerate,patternrate,korate,josekirate,nakaderate,eyefixrate; unsigned int selfatarirate,eyefillrate,alwaysccapture; unsigned int fillboardtries; int koage; // whether to look for patterns around second-to-last move bool pattern2; // whether,when self-atari attempt is detected, to play the other group's liberty if that is non-self-atari bool selfatari_other; // whether to read out ladders elsewhere than near the board in the playouts.note that such ladder testing is currently a fairly expensive operation bool middle_ladder; // 1lib settings: // whether to always pick from moves capturing all groups in global_atari_check() bool capcheckall; // Prior stone weighting. weight of each stone between cap_stone_min and cap_stone_max is (assess*100)/cap_stone_denom int cap_stone_min,cap_stone_max; int cap_stone_denom; // 2lib settings bool atari_def_no_hopeless; bool atari_miaisafe; // nlib settings int nlib_count; struct joseki_dict *jdict; struct pattern3s patterns;
double pat3_gammas[PAT3_N];
bool fullchoose;
double mq_prob[MQ_MAX], tenuki_prob; };
prior
struct uct_prior{ // Equivalent experience for prior knowledge. MoGo paper recommands // 50 playouts per source;in practice, esp. with RAVE, about 6 playouts // per source seems best int eqex; int even_eqex,policy_eqex,b19_eqex,eye_eqex,ko_eqex,plugin_eqex,joseki_eqex,pattern_eqex; int dcnn_eqex; int cfgdn; int *cfgd_eqex; bool prune_ladders; };
struct prior_map {
struct board *b;
enum stone to_play;
int parity;
// [board_size2(b)] array,move_stats are the prior values to be assigned to individual moves;
// move_stats.value is not updated
struct move_stats *prior;
// [board_size2(b)] array,whether to compute prior for the given value.
bool *consider;
// [board_size2(b)] array from cfg_distances()
int *distances;
};
ucb1amaf
// This implements the UCB1 policy with an extra AMAF heuristics struct ucb1_policy_amaf { // this is what the modification of UCT with patterns in monte carlo go paper // calls 'p'. Original UCB has this on 2,but this seems to produce way too // wide searches; reduce this to get deeper and narrower readouts - try 0.2 floating_t explore_p; // rescale virtual loss value to square root of #threads. This mitigates the number // of virtual losses added in case of a large amount of threads; it seems that with // linear virtual losses, overly diverse exploration caused by this may cause a wrong // mean value computed for the parent node. bool vloss_sqrt; // in distributed mode,encourage different slaves to work on different parts of the // tree by adding virtual wins to different nodes int virtual_win; int root_virtual_win; int vwin_min_playouts; // First Play Urgency - if set to less than infinity( the MoGo paper above reports 1.0 as // the best), new branches are explored only if none of the existing ones has higher // urgency than fpu. floating_t fpu; unsigned int equiv_rave; bool sylvain_rave; /* Give more weight to moves played earlier */ int distance_rave; // Give 0 or negative rave bonus to ko threats before taking the ko. // 1=normal bonus, 0= no bonus,-1= invert rave bonus,-2= double penalty int threat_rave; // Coefficient of local tree values embedded in RAVE floating_t ltree_rave; // Coefficient of criticality embedded in RAVE floating_t crit_rave; int crit_min_playouts; floating_t crit_plthres_coef; bool crit_negative; bool crit_negflip; bool crit_amaf; bool crit_lvalue; };
pattern
struct pattern_config { unsigned int bdist_max; unsigned int spat_min,spat_max;
bool spat_largest;
struct spatial_dict *spat_dict; // the spatial patterns dictionary used by FEAT_SPATIAL };
patternsp
// Maximum spatial pattern diameter #define MAX_PATTERN_DIST 7 // Maximum number of points in spatial pattern(upper bound) #define MAX_PATTERN_AREA (MAX_PATTERN_DIST*MAX_PATTERN_DIST) struct spatial { // Gridcular radius of matched pattern unsigned char dist; unsigned char points[MAX_PATTERN_AREA/4]; };
struct spatial_dict {
unsigned int nspatials;
struct spatial *spatials;
uint32 hash[1 << spatial_hash_bits];
int fills,collisions;
};
dynkomi
struct uct_dynkomi { struct uct *uct; uctd_permove permove; uctd_persim persim; uctd_done done; void *data; // Game state for dynkomi use: // Information on average score at the simulation end (black's perspective) // since last dynkomi adjustment struct move_stats score; // Information on average winrate of simulations since last dynkomi adjustment struct move_stats value; };
josekibase
// single joseki situation - moves for S_BLACK-1,S_WHITE-1 struct joseki_pattern { coord_t *moves[2]; // moves[] is a pass-terminated list or NULL }; struct joseki_dict { int bisze; #define joseki_hash_bits 20 // 8M w/ 32-bit pointers #define joseki_hash_maks ((1 << joseki_hash_bits) - 1) struct joseki_pattern *patterns; };
slave
/* Hash table entry mapping path to node */ struct tree_hash{ path_t coord_path; struct tree_node *node; };
fbook
struct fbook{ int bsize; int handicap; int movecnt; coord_t moves[1048576]; hash_t hashes[1048576]; };
util
// likely(x)等价于x,即if(likely(x))等价于if(x),但是它告诉gcc,x取1的可能性比较大。
// unlikely(x)等价于x,即if(unlikely(x))等价于if(x),但是它告诉gcc,x取0的可能性比较大
#define likely(x) __builtin_expect(!!(x),1) #define unlikely(x) __builtin_expect((x),0)
函数
pachi
static struct engine *init_engine(enum engine_id engine,char *e_arg,struct board *b);
board
struct board *board_init(char *fbookfile);
static void board_setup(struct board *b); void board_clear(struct board *board); void board_done_noalloc(struct board *board); static void board_init_data(struct board *board); static void board_statics_init(struct board *board);
int board_play(struct board *board,struct move *m);
static int board_play_(struct board *board,struct move *m,struct board_undo *u);
static int __attribute__((flatten)) board_play_f(struct board *board,struct move *m,int f,struct board_undo *u);
static group_t profiling_noinline board_play_outside(struct board *board,struct move *m,int f,struct board_undo *u);
static inline void board_rmf(struct board *b,int f);
static group_t profiling_noinline new_group(struct board *board,coord_t coord,struct board_undo *u);
static void profiling_noinline board_hash_update(struct board *board,coord_t coord,enum stone color);
static inline bool board_is_eyelike(struct board *board,coord_t coord,enum stone eye_color);
uct
struct engine *engine_uct_init(char *arg,struct board *b); struct uct *uct_state_init(char *arg,struct board *b);
static char *uct_notify_play(struct engine *e,struct board *b,struct move *m,char *enginearg);
void uct_prepare_move(struct uct *u,struct board *b,enum stone color);
// 主要设置 uct->t
// uct->t = tree_init;
static void setup_state(struct uct *u,struct board *b,enum stone color);
static void uct_board_print(struct engine *e,struct board *b,FILE *f);
static char *uct_notify_play(struct engine *e,struct board *b,struct move *m,char *enginearg);
static char *uct_undo(struct engine *e,struct board *b);
static char *uct_result(struct engine *e,struct board *b);
static coord_t uct_genmove(struct engine *e,struct board *b,struct time_info *ti,enum stone color,bool pass_all_alive);
char *uct_genmoves(struct engine *e,struct board *b,struct time_info *ti,enum stone_color,char *args,bool pass_all_alive,void **stats_buf,int *stats_size);
void uct_evaluate(struct engine *e,struct board *b,struct time_info *ti,floating_t *vals,enum stone color);
static void uct_dead_group_list(struct engine *e,struct board *b,struct move_queue *mq);
static void uct_stop(struct engine *e);
static void uct_done(struct engine *e);
static struct board_ownermap *uct_ownermap(struct engine *e,struct board *b);
tree
// 创建一个新的树
struct tree *tree_init(struct board *board,enum stone color,size_t max_tree_size,size_t max_pruned_size,size_t pruning_threshold,floating_t ltree_aging,int hbits);
static struct tree_node *tree_init_node(struct tree *t,coord_t coord,int depth,bool fast_alloc);
static struct tree_node *tree_alloc_node(struct tree *t,int count,bool fast_alloc);
static void tree_setup_node(struct tree *t,struct tree_node *n,coord_t coord,int depth);
bool tree_promote_at(struct tree *tree,struct board *b,coord_t c);
void tree_promote_node(struct tree *tree,struct tree_node **node);
static void tree_fix_symmetry(struct tree *tree,struct board *b,coord_t c);
static void tree_fix_node_symmetry(struct board *b,struct tree_node *node,bool flip_horiz,bool flip_vert,int flip_diag);
generic
struct tree_node *uctp_generic_choose(struct uct_policy *p,struct tree_node *node,struct board *b,enum stone color,coord_t exclude);
void uctp_generic_winner(struct uct_policy *p,struct tree *tree,struct uct_descent *descent);
ucb1amaf
void ucb1amaf_done(struct uct_policy *p);
static inline foating_t ucb1rave_evaluate(struct uct_policy *p,struct tree *tree,struct uct_descent *descent,int parity);
void ucb1rave_descend(struct uct_policy *p,struct tree *tree,struct uct_descent *descent,int parity,bool allow_pass);
void ucb1amaf_update(struct uct_policy *p,struct tree *tree,struct tree_node *node,enum stone node_color,enum stone player_color,struct playout_amafmap *map,struct board *final_board,floating_t result);
moggy
struct playout_policy *playout_moggy_init(char *arg,struct board *b,struct joseki_dict *jdict); static void playout_moggy_setboard(struct playout_policy *playout_policy,struct board *b); static coord_t playout_moggy_seqchoose(struct playout_policy *p,struct playout_setup *s,struct board *b,enum stone to_play); static void playout_moggy_assess(struct playout_policy *p,struct prior_map *map,int games); static bool playout_moggy_permit(struct playout_policy *p,struct board *b,struct move *m,bool alt);
pattern
static inline hash3_t pattern3_hash(struct board *b,coord_t c);
josekibase
struct joseki_dict *joseki_load(int bsize);
算法
流程