/* atlc - arbitrary transmission line calculator, for the analysis of transmission lines are directional couplers. Copyright (C) 2002. Dr. David Kirkby, PhD (G8WRB). This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either package_version 2 of the License, or (at your option) any later package_version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Dr. David Kirkby, e-mail drkirkby at ntlworld.com */ #include "config.h" #define SLOW #ifndef ENABLE_POSIX_THREADS /* If we are not comiling for multi-threaded use, a function is defined, but it is not used. This keeps the linker happy */ double finite_difference_multi_threaded() { return(0.0); } #endif #ifdef ENABLE_POSIX_THREADS #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #include #ifdef HAVE_SCHED_H #include #endif #ifdef HAVE_STRING_H #include #endif #include "definitions.h" #include "exit_codes.h" extern int coupler; extern int width, height; extern double **Vij, **Er; extern signed char **oddity; extern int number_of_workers; /* The algorithm for this is based on one in the book 'Foundations of Multithraded, Parallel and Distributed Programming' by G. A. Andrews, (2000). Especially chapter 11, Fig 11.6, "Red/Black Gauss-Seidel using shared vairables'. Basically the array is a considered a chess board. Since we only use in the computation the values above, below, to the left and the right of the current pixel, these will all be the same colour on the chess board (black or white). So we compute all the white squares, which can all be done in parallel, since they don't depend on each other. Once the white squares are done, the black ones can be done. Again, these don't depend on the white squares, but only the black ones. So the algorithm does 1) Covers the array with a large chess board with black and white squares. B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B 2) Split the array into a number of columns, one for each CPU. B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B B W B W B W B W B W B W B W B W B W B W B W B W B W B W W B W B W B W B W B W B W B W B W B W B W B W B W B W B 3) Do all columns in parallel on the black squares. B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B B Thread 0 Thread 1 Thread 2 Thread 3 4) Wait until all the black squares are done. 5) Compute all the white squares in parallel. W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W W Thread 0 Thread 1 Thread 2 Thread 3 6) Repeat a large number (typically 100) times. 7) Check for convergence. 8) Stop if the solution has converged. */ extern double r; pthread_mutex_t barrier; /* mutex semaphore for the barrier */ pthread_cond_t go; /* condition variable for leaving */ int numArrived = 0; /* count of the number who have arrived */ #define BARRIER2 #ifdef BARRIER1 void Barrier() { #ifndef DEBUG pthread_mutex_lock(&barrier); #else if( pthread_mutex_lock(&barrier) != 0 ) exit_with_msg_and_exit_code("pthread_mutex_lock failed",PTHREAD_MUTEX_LOCK_FAILED); #endif numArrived++; if (numArrived == number_of_workers) { numArrived = 0; pthread_cond_broadcast(&go); } else pthread_cond_wait(&go, &barrier); #ifndef DEBUG pthread_mutex_unlock(&barrier); #else if( pthread_mutex_unlock(&barrier) != 0 ) exit_with_msg_and_exit_code("pthread_mutex_unlock failed",PTHREAD_MUTEX_UNLOCK_FAILED); #endif } #endif #ifdef BARRIER2 int numArrived2[2] = {0, 0}; int barrierNdx = 0; void Barrier() { int localNdx; #ifndef DEBUG pthread_mutex_lock(&barrier); #else if( pthread_mutex_lock(&barrier) != 0 ) exit_with_msg_and_exit_code("pthread_mutex_lock failed in finite_difference_multi_threaded.c",PTHREAD_MUTEX_LOCK_FAILED); #endif numArrived2[barrierNdx]++; if (numArrived2[barrierNdx] == number_of_workers) { barrierNdx = (barrierNdx + 1)%2; /* toggle */ numArrived2[barrierNdx] = 0; /* reset other count */ #ifndef DEBUG pthread_cond_broadcast(&go); #else if( pthread_cond_broadcast(&go) != 0) exit_with_msg_and_exit_code("pthread_cond_broadcast failed in finite_difference_multi_threaded.c",PTHREAD_COND_BROADCAST_FAILED); #endif } else { localNdx = barrierNdx; /* wait on "current" numArrived. */ while (numArrived2[localNdx] != number_of_workers) #ifndef DEBUG pthread_cond_wait(&go, &barrier); #else if( pthread_cond_wait(&go, &barrier) != 0) exit_with_msg_and_exit_code("pthread_cond_wait failed finite_difference_multi_threaded.c",PTHREAD_COND_WAIT_FAILED); #endif } #ifndef DEBUG pthread_mutex_unlock(&barrier); #else if( pthread_mutex_unlock(&barrier) != 0 ) exit_with_msg_and_exit_code("pthread_mutex_unlock failed finite_difference_multi_threaded.c",PTHREAD_MUTEX_UNLOCK_FAILED); #endif } #endif /* Each Worker computes values in one strip of the grids. The main worker loop does two computations to avoid copying from one grid to the other. */ void *worker(void *arg) { int myid = (int) arg; double r_over_4,a,b,c,d,e,f,g,h; double one_minus_r; int i, j, iters, jstart; int firstcol, lastcol; int strip_width=width/number_of_workers; firstcol = myid*strip_width+1; lastcol = firstcol + strip_width - 1; r_over_4=r/4.0; one_minus_r=1-r; if(myid == number_of_workers -1) lastcol=width-2; Barrier(); for (iters = 1; iters <= ITERATIONS; iters++) { for(i= firstcol ; i <= lastcol; ++i){ if(i%2 ==1 ) jstart=1; else jstart=2; for(j=jstart ; j < height-1 ; j+=2){ if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */ Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j]; else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */ { a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]); b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]); c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]); d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]); e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]); f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]); g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]); h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]); Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j]; } } } Barrier(); for(i= firstcol ; i <= lastcol; ++i){ if(i%2 ==1 ) jstart=2; else jstart=1; for(j=jstart ; j < height -1; j+=2){ if(oddity[i][j] == ORDINARY_INTERIOR_POINT) /* Same dielectric all around */ Vij[i][j]=r_over_4*(Vij[i][j+1]+Vij[i+1][j]+Vij[i][j-1]+Vij[i-1][j])+one_minus_r*Vij[i][j]; else if(oddity[i][j] > ORDINARY_INTERIOR_POINT) /* only update dielectrics, not conductors */ { a=(Er[i][j] * Er[i][j-1] * Vij[i][j-1])/(Er[i][j] + Er[i][j-1]); b=(Er[i][j] * Er[i][j+1] * Vij[i][j+1])/(Er[i][j] + Er[i][j+1]); c=(Er[i][j] * Er[i-1][j] * Vij[i-1][j])/(Er[i][j] + Er[i-1][j]); d=(Er[i][j] * Er[i+1][j] * Vij[i+1][j])/(Er[i][j] + Er[i+1][j]); e=(Er[i][j] * Er[i][j-1])/(Er[i][j]+Er[i][j-1]); f=(Er[i][j] * Er[i][j+1])/(Er[i][j]+Er[i][j+1]); g=(Er[i][j] * Er[i-1][j])/(Er[i][j]+Er[i-1][j]); h=(Er[i][j] * Er[i+1][j])/(Er[i][j]+Er[i+1][j]); Vij[i][j]=r*(a+b+c+d)/(e+f+g+h) + (1-r)*Vij[i][j]; } } } Barrier(); } Barrier(); return(0); } double finite_difference_multi_threaded() { int i, j, ret, thread_number; pthread_t thread_id[MAXIMUM_PROCESSING_DEVICES]; double capacitance_per_metre, energy_per_metre; /* initialize mutex and condition variable */ pthread_mutex_init(&barrier, NULL); pthread_cond_init(&go, NULL); for(thread_number=0;thread_number