#include "includes.h" #include "knightcap.h" #define TD_LAMBDA 0.7 #define TD_ALPHA (10/(EVAL_SCALE)) #define MAX_ROUNDS 4 #define MAX_SIZE 50 static int total_rounds; struct max_struct { double val; int i,j,k; }; static int max_compare(struct max_struct *m1, struct max_struct *m2) { if (m1->val > m2->val) return 1; else if (m1->val == m2->val) return 0; return -1; } extern struct state *state; extern int player; extern int dont_change[]; char *stage_name[] = {"OPENING", "MIDDLE", "ENDING", "MATING"}; #include "names.h" static void p_coeff_vector(struct coefficient_name *cn, FILE *large, FILE *small) { int x; fprintf(large,"/* %s */\n", cn->name); if (small) fprintf(small,"/* %s */\n", cn->name); for (x=0; x<(cn+1)->index - cn->index; x++) { fprintf(large,"%7d,", coefficients[cn->index + x]); if (small) fprintf(small,"%7d,", coefficients[cn->index + x]/100); } fprintf(large,"\n"); if (small) fprintf(small,"\n"); } static void p_coeff_array(struct coefficient_name *cn, FILE *large, FILE *small) { int x; fprintf(large,"/* %s */\n", cn->name); if (small) fprintf(small,"/* %s */\n", cn->name); for (x=0; x<(cn+1)->index - cn->index; x++) { fprintf(large,"%7d,", coefficients[cn->index + x]); if (small) fprintf(small,"%7d,", coefficients[cn->index + x]/100); if ((x+1)%10 == 0) { fprintf(large, "\n"); if (small) fprintf(small, "\n"); } } fprintf(large,"\n"); if (small) fprintf(small,"\n"); } static void p_coeff_board(struct coefficient_name *cn, FILE *large, FILE *small) { int x, y; fprintf(large,"/* %s */\n", cn->name); if (small) fprintf(small,"/* %s */\n", cn->name); for (y=0; y<8; y++) { for (x=0; x<8; x++) { fprintf(large,"%7d,", coefficients[cn->index + x + y*8]); if (small) fprintf(small,"%7d,", coefficients[cn->index + x + y*8]/100); } fprintf(large,"\n"); if (small) fprintf(small,"\n"); } } static void p_coeff_half_board(struct coefficient_name *cn, FILE *large, FILE *small) { int x, y; fprintf(large,"/* %s */\n", cn->name); if (small) fprintf(small,"/* %s */\n", cn->name); for (y=0; y<8; y++) { for (x=0; x<4; x++) { fprintf(large,"%7d,", coefficients[cn->index + x + y*4]); if (small) fprintf(small,"%7d,", coefficients[cn->index + x + y*4]/100); } fprintf(large,"\n"); if (small) fprintf(small,"\n"); } } void dump_coeffs(char *fname, int round) { struct coefficient_name *cn; FILE *large, *small; int fd; int i; char fn[160]; #if LARGE_ETYPE if (round >= 0) sprintf(fn, "/usr/local/chess/large_coeffs%d.h", round); else sprintf(fn,"large_coeffs.h"); large = (FILE *)fopen(fn, "w"); sprintf(fn, "small_coeffs.h"); small = (FILE *)fopen(fn, "w"); #else if (round >= 0) sprintf(fn, "/usr/local/chess/small_coeffs%d.h", round); else sprintf(fn, "small_coeffs.h"); large = (FILE *)fopen(fn, "w"); small = NULL; #endif if (large == NULL) { perror(fname); return; } state->total_rounds = total_rounds; fprintf(large, "etype orig_coefficients[] = {\n"); if (small) fprintf(small, "etype orig_coefficients[] = {\n"); for (i=OPENING; i<=MATING; i++) { fprintf(large, "\n/* %%%s%% */\n", stage_name[i]); if (small) fprintf(small, "\n/* %%%s%% */\n", stage_name[i]); cn = &coefficient_names[0]; coefficients = new_coefficients + i*__COEFFS_PER_STAGE__; while (cn->name) { int n = cn[1].index - cn[0].index; if (n == 1) { fprintf(large, "/* %s */ %d,\n", cn[0].name, coefficients[cn[0].index]); if (small) fprintf(small, "/* %s */ %d,\n", cn[0].name, coefficients[cn[0].index]/100); } else if (n == 64) { p_coeff_board(cn,large,small); } else if (n == 32) { p_coeff_half_board(cn,large,small); } else if (n % 10 == 0) { p_coeff_array(cn,large,small); } else { p_coeff_vector(cn,large,small); } cn++; } } fprintf(large, "};\n"); if (small) fprintf(small, "};\n"); fclose(large); if (small) fclose(small); fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0666); if (fd == -1) { perror(fname); return; } Write(fd, (char *)new_coefficients, __TOTAL_COEFFS__*sizeof(new_coefficients[0])); close(fd); return; } int td_dump(char *fname) { int i; etype sum; dump_coeffs(fname, total_rounds); sum = 0.0; for (i=0; i<__TOTAL_COEFFS__; i++) { sum += ABS(new_coefficients[i] - orig_coefficients[i]); } cprintf(0,"%d\n", sum); return 1; } /* routines for updating the evaluation function according to the method of temporal differences */ #if LEARN_EVAL int td_store_pos(Position *b) { state->leaf_pos[state->stored_move_num] = *b; print_board(state->leaf_pos[state->stored_move_num].board); ++state->stored_move_num; if (state->computer != 0) state->td_comp = state->computer; return 1; } /* calculate the partial derivative of the eval function with respect to each of the coefficients. computed numerically */ int td_gradient(float *big_grad) { etype v, v2; int i, n, m; etype delta = 100; float *grad; Position *b, b1; #if TEST_GRADIENT float error; etype v3, v4; #endif n = __COEFFS_PER_STAGE__; for (m = 0; m < state->stored_move_num; m++) { b = state->leaf_pos+m; lprintf(0, "%d ", m); /* sanity check */ if (b->stage < OPENING || b->stage > MATING) { lprintf(0, "**Wrong stage in gradient calc: %d\n", b->stage); return 0; } b->flags &= ~FLAG_EVAL_DONE; b->flags &= ~FLAG_DONE_TACTICS; b1 = (*b); v = eval_etype(&b1, INFINITY, MAX_DEPTH); lprintf(0, "%d %d\n", v, b1.stage); state->leaf_eval[m].v = next_to_play(b)*v; if (!state->demo_mode) { state->leaf_eval[m].v *= state->td_comp; } coefficients = new_coefficients + b->stage*__COEFFS_PER_STAGE__; grad = big_grad + __TOTAL_COEFFS__*m + b->stage*__COEFFS_PER_STAGE__; for (i=0;i IPIECE_VALUES && i < IPIECE_VALUES+KING) create_pboard(&b1); v2 = eval_etype(&b1, INFINITY, MAX_DEPTH); grad[i] = next_to_play(&b1)*(v2 - v) / (float)delta; if (!state->demo_mode) { grad[i] *= state->td_comp; } #if TEST_GRADIENT coefficients[i] += delta; b1 = (*b); if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING) create_pboard(&b1); v3 = eval_etype(&b1, INFINITY, MAX_DEPTH); coefficients[i] -= 2*delta; b1 = (*b); if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING) create_pboard(&b1); v4 = eval_etype(&b1, INFINITY, MAX_DEPTH); error = next_to_play(&b1)*(v3 - v); if (!state->demo_mode) error *= state->td_comp; error -= 2*delta*grad[i]; error /= delta; if (ABS(error)>0.05) { lprintf(0,"***coeff: %d grad: %f error: %f %e %e %e %e\n", i, grad[i], error, v, v2, v3, v4); } #else coefficients[i] -= delta; #endif } } return n; } void td_save_bad(int fd, Position *b1) { int x; lseek(fd, 0, SEEK_END); if ((x = Write(fd, (char *)b1, sizeof(Position))) != sizeof(Position)) { lprintf(0,"***Error saving bad eval position %d %d\n", sizeof(Position), x); } } /* Updates the coefficients according to the TD(lambda) algorithm. */ int td_update() { int fd; int i,j,n,t; int argmax; int num_moves; int rounds = 0; float grad[300*__TOTAL_COEFFS__]; double c, max; double dw[__TOTAL_COEFFS__]; double olddw[__TOTAL_COEFFS__]; double tanhv[MAX_GAME_MOVES]; double d[MAX_GAME_MOVES]; double oldnorm, newnorm, dotprod, angle; FILE *f; if (state->analysed) return 0; if ((f = (FILE *)fopen("rounds.dat", "r")) != NULL) { fscanf(f, "%d\n", &rounds); fclose(f); } if ((f = (FILE *)fopen("total_rounds.dat", "r")) != NULL) { fscanf(f, "%d\n", &total_rounds); fclose(f); } memset(dw, 0, __TOTAL_COEFFS__*sizeof(dw[0])); memset(olddw, 0, __TOTAL_COEFFS__*sizeof(dw[0])); #if DUMPING_TD_UPDATES fd = open("update.dat", O_RDONLY); if (fd != -1) { if (read(fd, olddw, __TOTAL_COEFFS__*sizeof(olddw[0])) != __TOTAL_COEFFS__*sizeof(olddw[0])) { lprintf(0, "update file corrupt\n"); } else { memcpy(dw, olddw, __TOTAL_COEFFS__*sizeof(olddw[0])); } } close(fd); #endif if (state->stored_move_num == 0 || state->stored_move_num > 300) { lprintf(0, "no gradient information: %d\n", state->stored_move_num); return 0; } memset(grad, 0, 300*__TOTAL_COEFFS__*sizeof(grad[0])); if (state->ics_robot && result() == TIME_FORFEIT) num_moves = state->stored_move_num-1; else num_moves = state->stored_move_num; lprintf(0,"***moves: %d\n", num_moves); n = __TOTAL_COEFFS__; if (td_gradient(grad)) { lprintf(0,"gradients calculated\n"); } else { lprintf(0,"gradient error\n"); return 0; } /* Squash the evals and compute the temporal differences */ tanhv[0] = tanh(EVAL_SCALE*state->leaf_eval[0].v); for (t=0; tleaf_eval[t+1].v); d[t] = tanhv[t+1] - tanhv[t]; if (state->predicted_move[t+1] == -1 && !state->demo_mode && state->rating_change < 0) d[t] = RAMP(d[t]); } /* work out the outcome */ if (state->demo_mode) { switch (state->won) { case STALEMATE: { if (NO_STALEMATE_LEARN) return 0; d[num_moves-1] = tanh(EVAL_SCALE*DRAW_VALUE) - tanhv[num_moves-1]; break; } case 1: { d[num_moves-1] = 1.0 - tanhv[num_moves-1]; break; } case 0: { d[num_moves-1] = -1.0 - tanhv[num_moves-1]; break; } } } else { switch (result()) { case STALEMATE: { if (NO_STALEMATE_LEARN) return 0; d[num_moves-1] = tanh(EVAL_SCALE*DRAW_VALUE) - tanhv[num_moves-1]; break; } case 1: { d[num_moves-1] = 1.0 - tanhv[num_moves-1]; break; } case 0: { d[num_moves-1] = -1.0 - tanhv[num_moves-1]; break; } /* for time forfeited or resigned games we just assume the final eval was correct */ case TIME_FORFEIT: { d[num_moves-1] = 0.0; break; } } } if (state->predicted_move[num_moves] == -1 && !state->demo_mode && state->rating_change < 0) d[num_moves-1] = RAMP(d[num_moves-1]); lprintf(0,"outcome: %d %d %d\n", state->won, state->colour, state->position.winner); for (i=0; ileaf_eval[i].v, d[i]); } /* calculate the coefficient updates */ max = 0.0; j=0; for (i=0; i max) { max = ABS(dw[i]); argmax = i; } } lprintf(0,"max: %lf %d\n", TD_ALPHA*max, argmax); oldnorm = 0.0; newnorm = 0.0; dotprod = 0.0; for (i=0; ianalysed = 1; return 0; } #else void td_dummy(void) {} #endif