// This file is part of fityk program. Copyright (C) Marcin Wojdyr // Licence: GNU General Public License version 2 // $Id: GAfit.cpp 322 2007-07-24 00:17:11Z wojdyr $ #include #include #include #include #include #include #include #include "common.h" #include "GAfit.h" #include "logic.h" #include "settings.h" #include "numfuncs.h" using namespace std; GAfit::GAfit(Ftk* F) : Fit(F, "Genetic-Algorithms"), popsize (100), elitism(0), mutation_type('u'), p_mutation(0.1), mutate_all_genes(false), mutation_strength(0.1), crossover_type('u'), p_crossover(0.3), selection_type('r'), rank_scoring(false), tournament_size(2), window_size(-1), linear_scaling_a(1.), linear_scaling_c(2.), linear_scaling_b (1.), std_dev_stop(0), iter_with_no_progresss_stop(0), autoplot_indiv_nr(-1), pop(0), opop(0), best_indiv(0) { /* irpar["population-size"] = IntRange (&popsize, 2, 9999); irpar["steady-size"] = IntRange (&elitism, 0, 9999); epar.insert(pair("mutation-type", Enum_string (Distrib_enum, &mutation_type))); fpar["p-mutation"] = &p_mutation; bpar["mutate-all-genes"] = &mutate_all_genes; fpar["mutation-strength"] = &mutation_strength; */ Crossover_enum ['u'] = "uniform"; Crossover_enum ['o'] = "one-point"; Crossover_enum ['t'] = "two-point"; Crossover_enum ['a'] = "arithmetic1"; Crossover_enum ['A'] = "arithmetic2"; Crossover_enum ['g'] = "guaranteed-avg"; /* epar.insert(pair("crossover-type", Enum_string (Crossover_enum, &crossover_type))); fpar["p-crossover"] = &p_crossover; */ Selection_enum ['r'] = "roulette"; Selection_enum ['t'] = "tournament"; Selection_enum ['s'] = "SRS"; Selection_enum ['d'] = "DS"; /* epar.insert (pair("selection-type", Enum_string (Selection_enum, &selection_type))); bpar["rank-scoring"] = &rank_scoring; irpar["tournament-size"] = IntRange (&tournament_size, 2, 999); irpar["window-size"] = IntRange (&window_size, -1, 199); fpar["linear-scaling-a"] = &linear_scaling_a; fpar["linear-scaling-c"] = &linear_scaling_c; fpar["linear-scaling-b"] = &linear_scaling_b; fpar["rel-std-dev-stop"] = &std_dev_stop; ipar["iterations-with-no-progresss-stop"] = &iter_with_no_progresss_stop; fpar["wssr-stop"] = &wssr_stop; irpar["autoplot-indiv-nr"] = IntRange(&autoplot_indiv_nr, -1, 999999999); */ } GAfit::~GAfit() {} fp GAfit::init() { pop = &pop1; opop = &pop2; pop->resize (popsize); vector::iterator best = pop->begin(); for (vector::iterator i = pop->begin(); i != pop->end(); i++) { i->g.resize(na); for (int j = 0; j < na; j++) i->g[j] = draw_a_from_distribution(j); compute_wssr_for_ind (i); if (i->raw_score < best->raw_score) best = i; } best_indiv = *best; return 0; } void GAfit::autoiter() { wssr_before = compute_wssr(a_orig, datsums); F->msg ("WSSR before starting GA: " + S(wssr_before)); assert (pop && opop); if (elitism >= popsize) { F->warn ("hmm, now elitism >= popsize, setting elitism = 1"); elitism = 1; } for (int iter = 0; !termination_criteria_and_print_info(iter); iter++) { autoplot_in_autoiter(); iter_nr ++; pre_selection(); crossover(); mutation(); post_selection(); } post_fit (best_indiv.g, best_indiv.raw_score); } void GAfit::compute_wssr_for_ind (vector::iterator ind) { ind->raw_score = compute_wssr(ind->g, datsums); ind->generation = iter_nr; } void GAfit::autoplot_in_autoiter() { const Individual& indiv = is_index(autoplot_indiv_nr, *pop) ? (*pop)[autoplot_indiv_nr] : best_indiv; iteration_plot(indiv.g); } void GAfit::mutation() { for (vector::iterator i = pop->begin(); i != pop->end(); i++) { if (mutate_all_genes) { if (rand() < RAND_MAX * p_mutation) { for (int j = 0; j < na; j++) i->g[j] = draw_a_from_distribution(j, mutation_type, mutation_strength); compute_wssr_for_ind (i); } } else for (int j = 0; j < na; j++) if (rand() < RAND_MAX * p_mutation) { i->g[j] = draw_a_from_distribution(j, mutation_type, mutation_strength); compute_wssr_for_ind (i); } } } void GAfit::crossover() { for (vector::iterator i = pop->begin(); i != pop->end(); i++) if (rand() < RAND_MAX / 2 * p_crossover) { vector::iterator i2 = pop->begin() + rand()%pop->size(); switch (crossover_type) { case 'u': uniform_crossover (i, i2); break; case 'o': one_point_crossover (i, i2); break; case 't': two_points_crossover (i, i2); break; case 'a': arithmetic_crossover1 (i, i2); break; case 'A': arithmetic_crossover2 (i, i2); break; case 'g': guaranteed_avarage_crossover (i, i2); break; default: F->warn ("No such crossover-type: " + S(crossover_type) + ". Setting to 'u'"); crossover_type = 'u'; uniform_crossover (i, i2); break; } compute_wssr_for_ind (i); compute_wssr_for_ind (i2); } } void GAfit::uniform_crossover (vector::iterator c1, vector::iterator c2) { for (int i = 0; i < na; i++) if (rand() % 2) swap(c1->g[i], c2->g[i]); } void GAfit::one_point_crossover (vector::iterator c1, vector::iterator c2) { int p = rand() % na; for (int j = 0; j < p; j++) swap(c1->g[j], c2->g[j]); } void GAfit::two_points_crossover (vector::iterator c1, vector::iterator c2) { int p1 = rand() % na; int p2 = rand() % na; for (int j = min(p1, p2); j < max(p1, p2); j++) swap(c1->g[j], c2->g[j]); } void GAfit::arithmetic_crossover1 (vector::iterator c1, vector::iterator c2) { fp a = rand_0_1(); for (int j = 0; j < na; j++) { c1->g[j] = a * c1->g[j] + (1 - a) * c2->g[j]; c2->g[j] = (1 - a) * c1->g[j] + a * c2->g[j]; ; } } void GAfit::arithmetic_crossover2 (vector::iterator c1, vector::iterator c2) { for (int j = 0; j < na; j++) { fp a = rand_0_1(); c1->g[j] = a * c1->g[j] + (1 - a) * c2->g[j]; c2->g[j] = (1 - a) * c1->g[j] + a * c2->g[j]; ; } } void GAfit::guaranteed_avarage_crossover (vector::iterator c1, vector::iterator c2) { for (int j = 0; j < na; j++) c1->g[j] = c2->g[j] = (c1->g[j] + c2->g[j]) / 2; } void GAfit::pre_selection() { vector next(popsize - elitism); switch (selection_type) { case 'r': scale_score(); roulette_wheel_selection(next); break; case 't': tournament_selection(next); break; case 's': scale_score(); stochastic_remainder_sampling(next); break; case 'd': scale_score(); deterministic_sampling_selection(next); break; default: F->warn ("No such selection-type: " + S((char)selection_type) + ". Setting to 'r'"); selection_type = 'r'; pre_selection (); return; } opop->resize(next.size(), Individual(na)); for (int i = 0; i < size(next); i++) (*opop)[i] = (*pop)[next[i]]; swap (pop, opop); } void GAfit::post_selection() { if (elitism == 0) return; do_rank_scoring (opop); for (vector::iterator i = opop->begin(); i != opop->end(); i++) if (i->phase_2_score < elitism) pop->push_back (*i); assert (size(*pop) == popsize); } struct ind_raw_sc_cmp { bool operator() (Individual* a, Individual* b) { return a->raw_score < b->raw_score; } }; void GAfit::do_rank_scoring(std::vector *popp) { // rank in population is assigned to phase_2_score // e.g. 0 - the best, 1 - second, (popp.size() - 1) - worst static vector ind_p; ind_p.resize(popp->size()); for (unsigned int i = 0; i < popp->size(); i++) ind_p[i] = &(*popp)[i]; sort (ind_p.begin(), ind_p.end(), ind_raw_sc_cmp()); for (unsigned int i = 0; i < popp->size(); i++) ind_p[i]->phase_2_score = i; } void GAfit::roulette_wheel_selection(vector& next) { vector roulette(pop->size()); //preparing roulette unsigned int t = 0; for (int i = 0; i < size(*pop) - 1; i++) { t += static_cast ((*pop)[i].norm_score * RAND_MAX / size(*pop)); roulette[i] = t; } roulette[size(*pop) - 1] = RAND_MAX; //end of preparing roulette for (vector::iterator i = next.begin(); i != next.end(); i++) *i = lower_bound (roulette.begin(), roulette.end(), static_cast(rand())) - roulette.begin(); } void GAfit::tournament_selection(vector& next) { for (vector::iterator i = next.begin(); i != next.end(); i++) { int best = rand() % pop->size(); for (int j = 1; j < tournament_size; j++) { int n = rand() % pop->size(); if ((*pop)[n].raw_score < (*pop)[best].raw_score) best = n; } *i = best; } } vector::iterator GAfit::SRS_and_DS_common (vector& next) { vector::iterator r = next.begin(); fp f = 1.0 * next.size() / pop->size(); // rescaling for steady-state for (unsigned int i = 0; i < pop->size(); i++) { int n = static_cast((*pop)[i].norm_score * f); fill (r, min (r + n, next.end()), i); r += n; } return min (r, next.end()); } void GAfit::stochastic_remainder_sampling(vector& next) { vector::iterator r = SRS_and_DS_common (next); if (r == next.end()) return; vector rest_of_next (next.end() - r); copy (rest_of_next.begin(), rest_of_next.end(), r); } struct Remainder_and_ptr { int ind; fp r; bool operator< (const Remainder_and_ptr &b) { return r < b.r; } }; void GAfit::deterministic_sampling_selection(vector& next) { vector::iterator r = SRS_and_DS_common (next); if (r == next.end()) return; static vector rem; rem.resize(pop->size()); for (unsigned int i = 0; i < pop->size(); i++) { rem[i].ind = i; fp x = (*pop)[i].norm_score; rem[i].r = x - floor(x); } int rest = next.end() - r; partial_sort (rem.begin(), rem.begin() + rest, rem.end()); for (int i = 0; i < rest; i++, r++) *r = rem[i].ind; assert (r == next.end()); } void GAfit::scale_score () //return value - are individuals varying? { if (rank_scoring) do_rank_scoring(pop); else for (vector::iterator i = pop->begin(); i !=pop->end(); i++) i->phase_2_score = i->raw_score; //scaling p -> q - p; p -> p /

fp q = max_in_window(); if (q < 0) q = std_dev_based_q(); q += linear_scaling_b; fp sum = 0; for (vector::iterator i = pop->begin(); i != pop->end(); i++) { i->reversed_score = max (q - i->phase_2_score, 0.); sum += i->reversed_score; } if (sum == 0) //to avoid x/0 return; fp avg_rev_sc = sum / pop->size(); for (vector::iterator i = pop->begin(); i != pop->end(); i++) i->norm_score = i->reversed_score / avg_rev_sc; } fp GAfit::std_dev_based_q() { fp sum_p = 0, sum_p2 = 0; for (vector::iterator i = pop->begin(); i != pop->end(); i++) { sum_p += i->phase_2_score; sum_p2 += i->phase_2_score * i->phase_2_score; } fp avg_p2 = sum_p2 / pop->size(); fp avg_p = sum_p / pop->size(); fp sq_sigma = avg_p2 - avg_p * avg_p; fp sigma = sq_sigma > 0 ? sqrt (sq_sigma) : 0; //because of problem with return linear_scaling_a * avg_p + linear_scaling_c * sigma; // sqrt(0) } fp GAfit::max_in_window () { // stores the worst raw_score in every of last hist_len generations // return the worst (max) raw_score in last window_size generations const int hist_len = 200; static deque max_raw_history (hist_len, 0); max_raw_history.push_front (tmp_max); max_raw_history.pop_back(); assert (window_size <= hist_len); if (window_size > 0) { if (rank_scoring) return size(*pop) - 1; else return *max_element (max_raw_history.begin(), max_raw_history.begin() + window_size); } else return -1; } bool GAfit::termination_criteria_and_print_info (int iter) { static int no_progress_iters = 0; fp sum = 0; fp min = pop->front().raw_score; tmp_max = min; vector::iterator ibest = pop->begin(); for (vector::iterator i = pop->begin(); i != pop->end(); i++) { if (i->raw_score < min) { min = i->raw_score; ibest = i; } if (i->raw_score > tmp_max) tmp_max = i->raw_score; sum += i->raw_score; } fp avg = sum / pop->size(); fp sq_sum = 0; fp generations_sum = 0; for (vector::iterator i = pop->begin(); i != pop->end(); i++) { fp d = i->raw_score - avg; sq_sum += d * d; generations_sum += i->generation; } fp std_dev = sq_sum > 0 ? sqrt (sq_sum / pop->size()) : 0; F->msg("Population #" + S(iter_nr) + ": best " + S(min) + ", avg " + S(avg) + ", worst " + S(tmp_max) + ", std dev. " + S(std_dev)); if (min < best_indiv.raw_score) { best_indiv = *ibest; no_progress_iters = 0; } else no_progress_iters ++; //checking stop conditions bool stop = false; if (common_termination_criteria(iter)) stop = true; if (std_dev < std_dev_stop * avg) { F->msg("Standard deviation of results is small enough to stop"); stop = true; } if (iter_with_no_progresss_stop > 0 && no_progress_iters >= iter_with_no_progresss_stop) { F->msg("No progress in " + S(no_progress_iters) + " iterations. Stop"); stop = true; } if (min <= wssr_stop) { F->msg("WSSR is small enough to stop"); stop = true; } return stop; }