UltraScan III
ga_worker.cpp
Go to the documentation of this file.
1 #include "us_mpi_analysis.h"
2 #include "us_math2.h"
3 
4 QDateTime elapsed = QDateTime::currentDateTime();
5 #define ELAPSEDNOW (elapsed.msecsTo(QDateTime::currentDateTime()))
6 #define ELAPSEDSEC (elapsed.secsTo(QDateTime::currentDateTime()))
7 #define DbTimMsg(a) DbTiming << my_rank << generation << ELAPSEDNOW << a;
8 #define DRTiming DbTiming << my_rank
9 
10 #if 1
11 #define UPDATE_FIT
12 #endif
13 
14 // Minimize prints debug level
15 //#define DL 0
16 #define DL 1
17 
19 {
20  current_dataset = 0;
22 
23  // Initialize grid values
24  QStringList keys = parameters.keys();
25 
26  s_grid = ( keys.contains( "s_grid" ) )
27  ? parameters[ "s_grid" ].toInt() : 100;
28  k_grid = ( keys.contains( "k_grid" ) )
29  ? parameters[ "k_grid" ].toInt() : 100;
30 
31  static const double extn_s = (double)( s_grid - 1 );
32  static const double extn_k = (double)( k_grid - 1 );
33 
34  for ( int b = 0; b < buckets.size(); b++ )
35  {
36  double x_min = buckets[ b ].x_min;
37  double x_max = buckets[ b ].x_max;
38  double y_min = buckets[ b ].y_min;
39  double y_max = buckets[ b ].y_max;
40 
41  buckets[ b ].ds = ( x_max - x_min ) / extn_s;
42  buckets[ b ].dk = ( y_max - y_min ) / extn_k;
43  }
44 
45  MPI_GA_MSG msg;
46  MPI_Status status;
47  MPI_Job job;
48  bool finished = false;
49  int grp_nbr = ( my_rank / gcores_count );
50  int deme_nbr = my_rank - grp_nbr * gcores_count;
51 
52  while ( ! finished )
53  {
55 
56  msg.size = max_rss();
57  DbgLv(0) << "Deme" << grp_nbr << deme_nbr
58  << ": Generations finished, second" << ELAPSEDSEC;
59 
60  MPI_Send( &msg, // This iteration is finished
61  sizeof( msg ), // to MPI #1
62  MPI_BYTE,
64  FINISHED,
66 
67  MPI_Recv( &job, // Find out what to do next
68  sizeof( job ), // from MPI #0, MPI #7, MPI #9
69  MPI_BYTE,
70  MPI_ANY_SOURCE,
71  MPI_ANY_TAG,
73  &status );
74 
75  int dataset = job.dataset_offset;
76  int count = job.dataset_count;
77  int length = job.length;
78  int index = 0;
79 
80  switch ( status.MPI_TAG )
81  {
82  case FINISHED:
83  finished = true;
84  DbgLv(0) << " Deme" << grp_nbr << deme_nbr << ":"
85  << fitness_hits << "fitness hits of" << fitness_count
86  << " fitness checks maxrss" << maxrss;
87  break;
88 
89  case UPDATE:
90  // Update data for global fit or Monte Carlo
91  // Global fit comes before MC (if necessary), one dataset at a time
92  // Monte Carlo always comes as a sequence of all datasets
93 
94  mc_data.resize( length );
95 
96  MPI_Barrier( my_communicator );
97 
98  // This is a receive
99  MPI_Bcast( mc_data.data(), // from MPI #8, #10
100  length,
101  MPI_DOUBLE,
103  my_communicator );
104 
105  for ( int e = dataset; e < dataset + count; e++ )
106  {
107  US_DataIO::EditedData* data = &data_sets[ e ]->run_data;
108 
109  int scan_count = data->scanCount();
110  int radius_points = data->pointCount();
111 
112  for ( int s = 0; s < scan_count; s++ )
113  {
114  US_DataIO::Scan* scan = &data->scanData[ s ];
115 
116  for ( int r = 0; r < radius_points; r++ )
117  {
118  scan->rvalues[ r ] = mc_data[ index++ ];
119  }
120  }
121  }
122 
123  if ( count == data_sets.size() ) // Next iteration will be global
124  {
125  current_dataset = 0;
127  }
128  else // Next dataset is a part of global fit
129  {
130  current_dataset = dataset;
131  // datasets_to_process will stay at 1
132  }
133 
134  break;
135 
136  case GENERATION:
137  break;
138 
139  default:
140  abort( "Unknown message at end of GA worker loop" );
141  break;
142 
143  } // end switch
144  } // end while
145 }
146 
148 {
149  // Initialize genes
150  genes.clear();
151  population = parameters[ "population" ].toInt();
152 
153  for ( int i = 0; i < population; i++ ) genes << new_gene();
154 
155 // // Let calc_residuals get the meniscus directly from the edited data for
156 // // each data set
157 // meniscus_value = -1.0;
158 
159  fitness.reserve( population );
160 
161  Fitness empty_fitness;
162  empty_fitness.fitness = LARGE;
163 
164  for ( int i = 0; i < population; i++ )
165  {
166  empty_fitness.index = i;
167  fitness << empty_fitness;
168  }
169 
170  int generations = parameters[ "generations" ].toInt();
171  int crossover = parameters[ "crossover" ].toInt();
172  int mutation = parameters[ "mutation" ].toInt();
173  int plague = parameters[ "plague" ].toInt();
174  int elitism = parameters[ "elitism" ].toInt();
175 
176  int p_mutate = mutation;
177  int p_crossover = p_mutate + crossover;
178  int p_plague = p_crossover + plague;
179  int grp_nbr = ( my_rank / gcores_count );
180  int deme_nbr = my_rank - grp_nbr * gcores_count;
181 
182  fitness_map.clear();
183  fitness_hits = 0;
184  fitness_count = 0;
185 
186  max_rss();
187 
188  QDateTime start = QDateTime::currentDateTime();
189  MPI_GA_MSG msg;
190 
191  for ( generation = 0; generation < generations; generation++ )
192  {
193  max_rss();
194 
195 DbTimMsg("Worker start rank/generation/elapsed-secs");
196  // Calculate fitness
197  for ( int i = 0; i < population; i++ )
198  {
199  fitness[ i ].index = i;
200  fitness[ i ].fitness = get_fitness( genes[ i ] );
201  }
202 
203  // Sort fitness
204  qSort( fitness );
205 DbTimMsg("Worker after get_fitness loop + sort");
206 
207  // Refine with gradient search method (gsm) on last generation
208  if ( generation == generations - 1 )
209  {
210 DbgLv(DL) << "Deme" << grp_nbr << deme_nbr
211  << ": At last generation minimize.";
212 DbTimMsg("Worker before gsm rank/generation/elapsed");
213 
214  fitness[ 0 ].fitness = minimize( genes[ fitness[ 0 ].index ],
215  fitness[ 0 ].fitness );
216 DbgLv(DL) << "Deme" << grp_nbr << deme_nbr
217  << ": last generation minimize fitness=" << fitness[0].fitness;
218 DbTimMsg("Worker after gsm rank/generation/elapsed");
219  }
220 
221  max_rss();
222 
223  // Ensure gene is on grid
224  align_gene( genes[ fitness[ 0 ].index ] );
225 DbTimMsg("Worker after align_gene");
226 
227  // Send best gene to master
228 DbgLv(1) << "Best gene to master: gen" << generation << "worker" << deme_nbr;
230  msg.generation = generation;
231  msg.size = genes[ fitness[ 0 ].index ].size();
232  msg.fitness = fitness[ 0 ].fitness;
233 
234  MPI_Send( &msg, // to MPI #1
235  sizeof( msg ),
236  MPI_BYTE,
238  GENERATION,
239  my_communicator );
240 
241  MPI_Send( genes[ fitness[ 0 ].index ].data(), // to MPI #2
242  solute_doubles * buckets.size(),
243  MPI_DOUBLE,
245  GENE,
246  my_communicator );
247 
248 DbTimMsg("Worker after send fitness,genes");
249  // Receive instructions from master (continue or finish)
250  MPI_Status status;
251 
252  MPI_Recv( &msg, // from MPI #3
253  sizeof( msg ),
254  MPI_BYTE,
255  MPI_ANY_SOURCE,
256  MPI_ANY_TAG,
258  &status );
259 DbTimMsg("Worker after receive instructions");
260 
261  max_rss();
262 
263  if ( status.MPI_TAG == FINISHED )
264  {
265  DbgLv(0) << "Deme" << grp_nbr << deme_nbr
266  << ": Finish signalled at deme generation" << generation + 1;
267  break;
268  }
269 
270  // See if we are really done
271  if ( generation == generations - 1 )
272  {
273  DbgLv(0) << "Deme" << grp_nbr << deme_nbr
274  << ": At last generation";
275  continue;
276  }
277 
278  // Mark duplicate genes
279  int f0 = 0; // An index into the fitness array
280  int f1 = 1; // A second index
281  // The value of 1.0e-8 for close fitness is arbitrary. Parameterize?
282  const double NEAR_MATCH = 1.0e-8;
283  const double EPSF_SCALE = 1.0e-3;
284  double fitpwr = (double)qRound( log10( fitness[ 0 ].fitness ) );
285  double epsilon_f = pow( 10.0, fitpwr ) * EPSF_SCALE;
286 DbgLv(1) << "gw:" << my_rank << ": Dup best-gene clean: fitness0 fitpwr epsilon_f"
287  << fitness[0].fitness << fitpwr << epsilon_f;
288 
289  while ( f1 < population )
290  {
291  double fitdiff = qAbs( fitness[ f0 ].fitness - fitness[ f1 ].fitness );
292 
293  if ( fitdiff < epsilon_f )
294  {
295  bool match = true;
296  int g0 = fitness[ f0 ].index;
297  int g1 = fitness[ f1 ].index;
298 
299  for ( int ii = 0; ii < buckets.size(); ii++ )
300  {
301  double sdif = qAbs( genes[ g0 ][ ii ].s - genes[ g1 ][ ii ].s );
302  double kdif = qAbs( genes[ g0 ][ ii ].k - genes[ g1 ][ ii ].k );
303 
304  if ( sdif > NEAR_MATCH || kdif > NEAR_MATCH )
305  {
306 DbgLv(1) << "gw:" << my_rank << ": Dup NOT cleaned: f0 f1 fit0 fit1"
307  << f0 << f1 << fitness[f0].fitness << fitness[f1].fitness << "ii g0 g1 g0s g1s"
308  << ii << g0 << g1 << genes[g0][ii].s << genes[f1][ii].s;
309  match = false;
310  f0 = f1;
311  break;
312  }
313  }
314 
315  if ( match )
316  {
317 DbgLv(1) << "gw:" << my_rank << ": Dup cleaned: f0 f1 fit0 fit1"
318  << f0 << f1 << fitness[f0].fitness << fitness[f1].fitness;
319  fitness[ f1 ].fitness = LARGE; // Invalidate gene/sim_values
320  }
321  }
322  else
323  f0 = f1;
324 
325  f1++;
326  }
327 
328  // Re-sort
329  qSort( fitness );
330 
331  QList< Gene > old_genes = genes;
332 
333  // Create new generation from old
334  // First copy elite genes
335  for ( int g = 0; g < elitism; g++ )
336  genes[ g ] = old_genes[ fitness[ g ].index ];
337 
338  int immigrants = migrate_genes();
339 
340 DbTimMsg("Worker before elitism loop");
341 
342  for ( int g = elitism + immigrants; g < population; g++ )
343  {
344  // Select a random gene from old population using exponential
345  // distribution
346  int gene_index = e_random();
347  Gene gene = old_genes[ gene_index ];
348  int probability = u_random( p_plague );
349 
350  if ( probability < p_mutate ) mutate_gene( gene );
351  else if ( probability < p_crossover ) cross_gene ( gene, old_genes );
352  else gene = new_gene();
353 
354  genes[ g ] = gene;
355  }
356 DbTimMsg("Worker after elitism loop");
357 
358  max_rss();
359 
360  } // End of generation loop
361 DbTimMsg(" +++Worker after generation loop");
362 }
363 
365 {
366  int grid_es = s_grid - 1;
367  int grid_ek = k_grid - 1;
368 
369  for ( int i = 0; i < gene.size(); i++ )
370  {
371  double s = gene[ i ].s;
372  double s_min = buckets[ i ].x_min;
373  double ds = s - s_min;
374 
375  if ( ds < 0 )
376  s = s_min;
377  else
378  {
379  int gridpoint = qRound( ds / buckets[ i ].ds );
380  gridpoint = qMin( grid_es, gridpoint );
381  s = s_min + gridpoint * buckets[ i ].ds;
382  }
383 
384  double k = gene[ i ].k;
385  double k_min = buckets[ i ].y_min;
386  double dk = k - k_min;
387 
388  if ( dk < 0 )
389  k = k_min;
390  else
391  {
392  int gridpoint = qRound( dk / buckets[ i ].dk );
393  gridpoint = qMin( grid_ek, gridpoint );
394  k = k_min + gridpoint * buckets[ i ].dk;
395  }
396 
397  gene[ i ].s = s;
398  gene[ i ].k = k;
399 //*DEBUG*
400 //if(gene[i].s<0.0) {
401 //int grp_nbr = ( my_rank / gcores_count );
402 //int deme_nbr = my_rank - grp_nbr * gcores_count;
403 //DbgLv(0) << "Dem :" << grp_nbr << deme_nbr << "gene.s" << gene[i].s << "ALIGN"
404 // << "ds s_min gridpt" << ds << s_min << qRound( ds / buckets[i].ds );
405 //}
406 //*DEBUG*
407  }
408 }
409 
410 double US_MPI_Analysis::get_fitness( const Gene& gene )
411 {
413 sim.dbg_level = qMax(0,dbg_level-1);
414  sim.solutes = gene;
415  qSort( sim.solutes );
416 
417  fitness_count++;
418  int nisols = gene.size();
419  QString key = "";
420  QString str;
421 
422  for ( int cc = 0; cc < nisols; cc++ )
423  { // Concatenate all solute s,k values to form fitness key
424  key += str.sprintf( "%.5f%.5f", sim.solutes[ cc ].s,
425  sim.solutes[ cc ].k );
426  }
427 
428 DbgLv(2) << "get_fitness: nisols" << nisols << "key" << key;
429  if ( fitness_map.contains( key ) )
430  { // We already have a match to this key, so use its fitness value
431  fitness_hits++;
432 DbgLv(2) << "get_fitness: HIT! new hits" << fitness_hits;
433  return fitness_map.value( key );
434  }
435 
436  solutes_from_gene( sim.solutes, nisols );
437 
438 //DbTimMsg(" ++ call gf calc_residuals");
440 //DbTimMsg(" ++ return calc_residuals");
441 
442  double fitness = sim.variance;
443  int solute_count = 0;
444  int nosols = sim.solutes.size();
445 
446  if ( data_sets.size() == 1 )
447  { // Count solutes whose concentration is at least the threshold
448  for ( int cc = 0; cc < nosols; cc++ )
449  {
450  if ( sim.solutes[ cc ].c >= concentration_threshold ) solute_count++;
451  }
452 DbgLv(2) << "get_fitness: sol_count conc_thresh" << solute_count << concentration_threshold;
453  }
454 
455  else
456  { // For multiple datasets, use a weighted average fitness
457  double concen = 0.0;
458  fitness = 0.0;
459 
460  for ( int ee = 0; ee < datasets_to_process; ee++ )
461  {
462  fitness += ( sim.variances[ ee ] / maxods[ ee ] );
463  }
464 
465  fitness /= (double)datasets_to_process;
466 
467  for ( int cc = 0; cc < nosols; cc++ )
468  concen += sim.solutes[ cc ].c;
469 
470  double cthresh = concentration_threshold * concen;
471 
472  for ( int cc = 0; cc < nosols; cc++ )
473  {
474  if ( sim.solutes[ cc ].c >= cthresh ) solute_count++;
475  }
476  }
477 
478  fitness *= ( 1.0 + sq( regularization * solute_count ) );
479  fitness_map.insert( key, fitness );
480 DbgLv(2) << "get_fitness: out fitness" << fitness;
481 //*DEBUG*
482 if(dbg_level>0 && fitness_map.size()==20 )
483 {
484  int n=nosols-1;
485  DbgLv(1) << "w:" << my_rank << generation << ": fmapsize fitness nsols"
486  << fitness_map.size() << fitness << nisols << nosols
487  << "s0 s,k,v" << sim.solutes[0].s << sim.solutes[0].k << sim.solutes[0].v
488  << "sn s,k,v" << sim.solutes[n].s << sim.solutes[n].k << sim.solutes[n].v;
489 }
490 //*DEBUG*
491 
492 //*DEBUG*
493 //if(datasets_to_process>1 && generation<11 )
494 if(dbg_level>1 && datasets_to_process>1 && generation<11 )
495 {
496  DbgLv(0) << "rank generation" << my_rank << generation << "fitness" << fitness
497  << "vari0 vari1" << sim.variances[0] << sim.variances[1];
498 }
499 //*DEBUG*
500  return fitness;
501 }
502 
503 // Compute the fitness of a gene represented as a vector of doubles
505 {
506  // Convert from US_Vector to Gene
507  int size = vv.size() / 2;
508  int index = 0;
509  Gene gene( size );
510 
511  for ( int ii = 0; ii < size; ii++ )
512  {
513  gene[ ii ].s = vv[ index++ ];
514  gene[ ii ].k = vv[ index++ ];
515  }
516 
517 //*DEBUG*
518 if(gene[0].s<0.0) {
519 int grp_nbr = ( my_rank / gcores_count );
520 int deme_nbr = my_rank - grp_nbr * gcores_count;
521 DbgLv(DL) << "Dem :" << grp_nbr << deme_nbr << "gene.s" << gene[0].s << "FIT_V";
522 }
523 //*DEBUG*
524 // return get_fitness( gene );
525  int dbgsv = dbg_level;
526 // dbg_level = 2;
527  double fitn = get_fitness( gene );
528  dbg_level = dbgsv;
529  return fitn;
530 }
531 
532 int US_MPI_Analysis::u_random( int modulo )
533 {
534  // Returns a uniform random integer between 0 and modulo - 1
535  // Default modulo is 100
536  return (int)qFloor( (double)modulo * US_Math2::ranf() );
537 }
538 
540 {
541  // Exponential distribution
542  double randx = US_Math2::ranf();
543  const double divisor = 8.0; // Parameterize?
544  static const double
545  beta = population / divisor;
546  int gnsize = ( buckets.size() > 0 ) ? genes.size() : dgenes.size();
547 
548  int gene_index = (int)( -log( 1.0 - randx ) * beta );
549  gene_index = qMin( ( gnsize - 1 ), qMax( 0, gene_index ) );
550 
551  return gene_index;
552 }
553 
555 {
556  static const int p_mutate_s = parameters[ "p_mutate_s" ].toInt();
557  static const int p_mutate_k = parameters[ "p_mutate_k" ].toInt();
558  static const int p_mutate_sk = parameters[ "p_mutate_sk" ].toInt();
559 
560  static const int p_k = p_mutate_s + p_mutate_k; // e.g., 40
561  static const int p_total = p_k + p_mutate_sk; // e.g., 60
562 
563  int solute = u_random( gene.size() );
564  int rand = u_random( p_total );
565 
566  if ( rand < p_mutate_s )
567  mutate_s( gene[ solute ], solute );
568  else if ( rand < p_k )
569  mutate_k( gene[ solute ], solute );
570  else
571  {
572  mutate_s( gene[ solute ], solute );
573  mutate_k( gene[ solute ], solute );
574  }
575 //*DEBUG*
576 //if(gene[0].s<0.0) {
577 //int grp_nbr = ( my_rank / gcores_count );
578 //int deme_nbr = my_rank - grp_nbr * gcores_count;
579 //DbgLv(0) << "Dem :" << grp_nbr << deme_nbr << "gene.s" << gene[0].s << "MUTATE";
580 //}
581 //*DEBUG*
582 }
583 
584 void US_MPI_Analysis::mutate_s( US_Solute& solute, int b )
585 {
586  // Consider paramaterizing the sigma and x calculations
587  double sigma = ( s_grid - 1 ) / ( 6.0 * ( log2( generation + 2 ) ) );
588  double x = US_Math2::box_muller( 0.0, sigma );
589  double delta = qRound( x );
590 //DbgLv(1) << " MUTATE_S x" << x << "sg sigma delta"
591 // << s_grid << sigma << delta;
592 
593  // Ensure new value is at a grid point and within bucket
594  solute.s += delta * buckets[ b ].ds;
595  solute.s = qMax( solute.s, buckets[ b ].x_min );
596  solute.s = qMin( solute.s, buckets[ b ].x_max );
597 //*DEBUG*
598 //if(solute.s<0.0) {
599 //int grp_nbr = ( my_rank / gcores_count );
600 //int deme_nbr = my_rank - grp_nbr * gcores_count;
601 //DbgLv(0) << "Dem :" << grp_nbr << deme_nbr << " solute.s" << solute.s << "MUTATE_S";
602 //}
603 //*DEBUG*
604 }
605 
606 void US_MPI_Analysis::mutate_k( US_Solute& solute, int b )
607 {
608  //static const double mutate_sigma = parameters[ "mutate_sigma" ].toDouble();
609 
610  double sigma = ( k_grid - 1 ) / ( 6.0 * ( log2( generation + 2 ) ) );
611  double x = US_Math2::box_muller( 0.0, sigma );
612  double delta = qRound( x );
613 //DbgLv(1) << " MUTATE_K x" << x << "kg sigma delta"
614 // << k_grid << sigma << delta;
615 
616  // Ensure new value is at a grid point and within bucket
617  solute.k += delta * buckets[ b ].dk;
618  solute.k = qMax( solute.k, buckets[ b ].y_min );
619  solute.k = qMin( solute.k, buckets[ b ].y_max );
620 }
621 
622 void US_MPI_Analysis::cross_gene( Gene& gene, QList< Gene > old_genes )
623 {
624  // Get the crossing gene according to an exponential distribution
625  int gene_index = e_random();
626  Gene cross_from = old_genes[ fitness[ gene_index ].index ];
627 
628  // Select a random solute. The number will always be between
629  // 1 and size - 1.
630  int solute_index = u_random( gene.size() - 1 ) + 1;
631 
632  // Swap the genes half the time before cross so if we have
633  // (abcde, ABCDE) the crossed gene will be either abCDE or ABcde
634  if ( u_random() < 50 )
635  {
636  Gene temp = gene;
637  gene = cross_from;
638  cross_from = temp;
639  }
640 
641  for ( int i = solute_index; i < buckets.size(); i++ )
642  {
643  gene[ i ] = cross_from[ i ];
644  }
645 //*DEBUG*
646 //if(gene[0].s<0.0) {
647 //int grp_nbr = ( my_rank / gcores_count );
648 //int deme_nbr = my_rank - grp_nbr * gcores_count;
649 //DbgLv(0) << "Dem :" << grp_nbr << deme_nbr << "gene.s" << gene[0].s << "CROSS";
650 //}
651 //*DEBUG*
652 }
653 
655 {
656  static const int migrate_pcent = parameters[ "migration" ].toInt();
657  static const int elitism_count = parameters[ "elitism" ].toInt();
658 
659  QVector< US_Solute > emmigres;
660  int migrate_count = ( migrate_pcent * population + 50 ) / 100;
661 
662  // Send genes to master
663  for ( int i = 0; i < migrate_count; i++ )
664  {
665  int gene_index = e_random();
666  emmigres += genes[ fitness[ gene_index ].index ];
667  }
668 //*DEBUG*
669 //if(emmigres[0].s<0.0) {
670 //int grp_nbr = ( my_rank / gcores_count );
671 //int deme_nbr = my_rank - grp_nbr * gcores_count;
672 //DbgLv(0) << "Dem :" << grp_nbr << deme_nbr << "gene.s" << emmigres[0].s << "EMMIGRES";
673 //}
674 //*DEBUG*
675 
676  // MPI send msg type
677  MPI_GA_MSG msg;
678  msg.size = migrate_count;
679  int bucket_sols = buckets.size();
680  int migrate_sols = migrate_count * bucket_sols;
681 
682  MPI_Send( &msg, // to MPI #1
683  sizeof( msg ),
684  MPI_BYTE,
686  EMMIGRATE,
687  my_communicator );
688 
689  // MPI send emmigrants
690  MPI_Send( emmigres.data(), // to MPI #4
691  migrate_sols * solute_doubles,
692  MPI_DOUBLE,
694  EMMIGRATE,
695  my_communicator );
696 
697  // Get genes from master as concatenated genes
698  QVector< US_Solute > immigres( migrate_sols );
699  MPI_Status status;
700 
701  MPI_Recv( immigres.data(), // from MPI #5
702  migrate_sols * solute_doubles,
703  MPI_DOUBLE,
705  IMMIGRATE,
707  &status );
708 
709  int solutes_sent;
710  MPI_Get_count( &status, MPI_DOUBLE, &solutes_sent );
711  solutes_sent /= solute_doubles;
712  int mgenes_count = solutes_sent / bucket_sols;
713 
714  // The number returned equals the number sent or zero.
715  if ( mgenes_count > 0 )
716  {
717  int grp_nbr = ( my_rank / gcores_count );
718  int deme_nbr = my_rank - grp_nbr * gcores_count;
719 DbgLv(1) << "MG:Deme" << deme_nbr << ": solsent mg_count" << solutes_sent
720  << mgenes_count << " elit" << elitism_count << "sol_dbls" << solute_doubles;
721  for ( int i = 0; i < mgenes_count; i++ )
722  {
723  Gene gene = immigres.mid( i * bucket_sols, bucket_sols );
724  genes[ elitism_count + i ] = gene;
725  }
726 //*DEBUG*
727 //if(genes[elitism_count][0].s<0.0) {
728 //int grp_nbr = ( my_rank / gcores_count );
729 //int deme_nbr = my_rank - grp_nbr * gcores_count;
730 //DbgLv(0) << "Dem :" << grp_nbr << deme_nbr << "gene.s" << genes[elitism_count][0].s << "IMMIGRANT";
731 //}
732 //*DEBUG*
733  }
734 
735  return mgenes_count;
736 }
737 
739 {
740  Gene gene;
741 
742  for ( int b = 0; b < buckets.size(); b++ )
743  {
744  US_Solute solute;
745 
746  double x_min = buckets[ b ].x_min;
747  double y_min = buckets[ b ].y_min;
748 
749  solute.s = x_min + u_random( s_grid ) * buckets[ b ].ds;
750  solute.k = y_min + u_random( k_grid ) * buckets[ b ].dk;
751 
752  gene << solute;
753  }
754 //*DEBUG*
755 //if(gene[0].s<0.0) {
756 //int grp_nbr = ( my_rank / gcores_count );
757 //int deme_nbr = my_rank - grp_nbr * gcores_count;
758 //DbgLv(0) << "Dem :" << grp_nbr << deme_nbr << "gene.s" << gene[0].s << "NEW";
759 //}
760 //*DEBUG*
761 
762  return gene;
763 }
764 
765 // Find the minimum fitness value close to a gene using
766 // inverse hessian minimization
767 double US_MPI_Analysis::minimize( Gene& gene, double fitness )
768 {
769 #if 0
770 #define TIMING_MZ
771 #endif
772 #ifdef TIMING_MZ
773 static int ncalls=0;
774 static long totms=0L;
775 static long totT1=0L;
776 static long totT2=0L;
777 static long totT3=0L;
778 static long totT4=0L;
779 static long totT5=0L;
780 static long totT6=0L;
781 long insms;
782 QDateTime clcSt0=QDateTime::currentDateTime();
783 #endif
784  int gsize = gene.size();
785  int vsize = gsize * 2;
786  US_Vector vv( vsize ); // Input values
787  US_Vector uu( vsize ); // Vector of derivatives
788 
789  // Create hessian as identity matrix
790  QVector< QVector< double > > hessian( vsize );
791 
792  for ( int ii = 0; ii < vsize; ii++ )
793  {
794  hessian[ ii ] = QVector< double >( vsize, 0.0 );
795  hessian[ ii ][ ii ] = 1.0;
796  }
797 
798 int grp_nbr = ( my_rank / gcores_count );
799 int deme_nbr = my_rank - grp_nbr * gcores_count;
800 QString Phd = "MIN:" + QString().sprintf("%d:%d",grp_nbr,deme_nbr) + ":";
801 DbgLv(DL) << Phd << "vsize" << vsize << "fitness" << fitness
802  << "gene0.s" << gene[0].s;
803  int index = 0;
804 
805  // Convert gene to array of doubles
806  for ( int ii = 0; ii < gsize; ii++ )
807  {
808  vv.assign( index++, gene[ ii ].s );
809  vv.assign( index++, gene[ ii ].k );
810  }
811 
812 DbgLv(DL) << Phd << " (0)call lamm_gsm_df";
813  lamm_gsm_df( vv, uu ); // uu is vector of derivatives
814 DbgLv(DL) << Phd << " (0) rtn fr lamm_gsm_df u0" << uu[0];
815 #ifdef TIMING_MZ
816 QDateTime clcSt1=QDateTime::currentDateTime();
817 totT1+=clcSt0.msecsTo(clcSt1);
818 #endif
819 
820  static const double epsilon_f = 1.0e-7;
821  static const int max_iterations = 20;
822  int iteration = 0;
823  double epsilon = epsilon_f * fitness * 4.0;
824 DbgLv(DL) << Phd << "epsilon" << epsilon << "uL2norm" << uu.L2norm();
825  bool neg_cnstr = ( vv[ 0 ] < 0.1 ); // Negative constraint?
826 
827  while ( uu.L2norm() >= epsilon_f && iteration < max_iterations )
828  {
829 #ifdef TIMING_MZ
830 clcSt1=QDateTime::currentDateTime();
831 #endif
832  iteration++;
833 DbgLv(DL) << Phd << " iter" << iteration << "fitness" << fitness;
834  if ( fitness == 0.0 ) break;
835 
836  US_Vector v_s1 = vv;
837  double g_s1 = fitness;
838  double s1 = 0.0;
839  double s2 = 0.5;
840  double s3 = 1.0;
841 
842  // v_s2 = vv - uu * s2
843  US_Vector v_s2( vsize );
844  vector_scaled_sum( v_s2, uu, -s2, vv );
845 
846  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
847  {
848 DbgLv(DL) << Phd << " NEG-CNSTR:01: v_s2[0]" << v_s2[0];
849  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
850  }
851 
852 DbgLv(DL) << Phd << " iter" << iteration << "v_s2[0]" << v_s2[0];
853  double g_s2 = get_fitness_v( v_s2 );
854 DbgLv(DL) << Phd << " iter" << iteration << "g_s2" << g_s2;
855 
856  // Cut down until we have a decrease
857  while ( s2 > epsilon && g_s2 > g_s1 )
858  {
859  s3 = s2;
860  s2 *= 0.5;
861  // v_s2 = vv - uu * s2
862  vector_scaled_sum( v_s2, uu, -s2, vv );
863 DbgLv(DL) << Phd << " s2 g_s2 g_s1" << s2 << g_s2 << g_s1;
864  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
865  {
866 DbgLv(DL) << Phd << " NEG-CNSTR:02: v_s2[0]" << v_s2[0];
867  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
868  }
869 
870  g_s2 = get_fitness_v( v_s2 );
871  }
872 #ifdef TIMING_MZ
873 QDateTime clcSt2=QDateTime::currentDateTime();
874 totT2+=clcSt1.msecsTo(clcSt2);
875 #endif
876 DbgLv(DL) << Phd << " g_s2" << g_s2 << "s2 s3" << s2 << s3;
877 
878  // Test for initial decrease
879  if ( s2 <= epsilon || ( s3 - s2 ) < epsilon ) break;
880 
881  US_Vector v_s3( vsize );
882 
883  // v_s3 = vv - uu * s3
884  vector_scaled_sum( v_s3, uu, -s3, vv );
885 
886  if ( neg_cnstr && v_s3[ 0 ] < 0.1 )
887  {
888 DbgLv(DL) << Phd << " NEG-CNSTR:03: v_s3[0]" << v_s3[0];
889  v_s3.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
890  }
891 
892  double g_s3 = get_fitness_v( v_s3 );
893 #ifdef TIMING_MZ
894 QDateTime clcSt3=QDateTime::currentDateTime();
895 totT3+=clcSt2.msecsTo(clcSt3);
896 #endif
897 DbgLv(DL) << Phd << " g_s3" << g_s3 << "s1 s2 s3" << s1 << s2 << s3;
898 
899  int reps = 0;
900  static const int max_reps = 100;
901 
902  while ( ( ( s2 - s1 ) > epsilon ) &&
903  ( ( s3 - s2 ) > epsilon ) &&
904  ( reps++ < max_reps ) )
905  {
906  double s1_s2 = 1.0 / ( s1 - s2 );
907  double s1_s3 = 1.0 / ( s1 - s3 );
908  double s2_s3 = 1.0 / ( s2 - s3 );
909 
910  double s1_2 = sq( s1 );
911  double s2_2 = sq( s2 );
912  double s3_2 = sq( s3 );
913 
914  double aa = ( ( g_s1 - g_s3 ) * s1_s3 -
915  ( g_s2 - g_s3 ) * s2_s3
916  ) * s1_s2;
917 
918  double bb = ( g_s3 * ( s2_2 - s1_2 ) +
919  g_s2 * ( s1_2 - s3_2 ) +
920  g_s1 * ( s3_2 - s2_2 )
921  ) *
922  s1_s2 * s1_s3 * s2_s3;
923 
924  static const double max_a = 1.0e-25;
925 
926 DbgLv(DL) << Phd << " reps" << reps << "aa" << aa;
927  if ( qAbs( aa ) < max_a )
928  {
929  index = 0;
930 
931  // Restore gene from array of doubles
932  for ( int ii = 0; ii < gsize; ii++ )
933  {
934  gene[ ii ].s = vv[ index++ ];
935  gene[ ii ].k = vv[ index++ ];
936  }
937 
938 #ifdef TIMING_MZ
939 QDateTime clcSt4=QDateTime::currentDateTime();
940 totT4+=clcSt3.msecsTo(clcSt4);
941 insms=(long)clcSt0.msecsTo(clcSt4);
942 totms+=insms;
943 ncalls++;
944 DRTiming << "MINIMIZE: msecs" << insms << "totmsecs calls" << totms << ncalls;
945 DRTiming << " MMIZE: t1 t2 t3 t4 t5 t6"
946  << totT1 << totT2 << totT3 << totT4 << totT5 << totT6;
947 #endif
948  return fitness;
949  }
950 
951  double xx = -bb / ( 2.0 * aa );
952  double prev_g_s2 = g_s2;
953 
954  if ( xx < s1 )
955  {
956  if ( xx < ( s1 + s1 - s2 ) ) // Keep it close
957  {
958  xx = s1 + s1 - s2; // xx <- s1 + ds
959  if ( xx < 0 ) xx = s1 / 2.0;
960  }
961 
962  if ( xx < 0 ) // Wrong direction!
963  {
964  if ( s1 < 0 ) s1 = 0.0;
965  xx = 0;
966  }
967 
968  // OK, take xx, s1, s2
969  v_s3 = v_s2;
970  g_s3 = g_s2; // 3 <- 2
971  s3 = s2;
972  v_s2 = v_s1;
973  g_s2 = g_s1;
974  s2 = s1; // 2 <- 1
975  s1 = xx; // 1 <- xx
976 
977  // v_s1 = vv - uu * s1
978  vector_scaled_sum( v_s1, uu, -s1, vv );
979 
980  if ( neg_cnstr && v_s1[ 0 ] < 0.1 )
981  {
982 DbgLv(DL) << Phd << " NEG-CNSTR:04: v_s1[0]" << v_s1[0];
983  v_s1.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
984  }
985 
986 DbgLv(DL) << Phd << " x<s1 get_fitness";
987  g_s1 = get_fitness_v( v_s1 );
988  }
989  else if ( xx < s2 ) // Take s1, xx, s2
990  {
991  v_s3 = v_s2;
992  g_s3 = g_s2; // 3 <- 2
993  s3 = s2;
994  s2 = xx; // 2 <- xx
995 
996  // v_s2 = vv - uu * s2
997  vector_scaled_sum( v_s2, uu, -s2, vv );
998 
999  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
1000  {
1001 DbgLv(DL) << Phd << " NEG-CNSTR:05: v_s2[0]" << v_s2[0];
1002  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
1003  }
1004 
1005 DbgLv(DL) << Phd << " xx<s2 get_fitness";
1006  g_s2 = get_fitness_v( v_s2 );
1007  }
1008  else if ( xx < s3 ) // Take s2, xx, s3
1009  {
1010  v_s1 = v_s2;
1011  g_s1 = g_s2;
1012  s1 = s2; // 2 <- 1
1013  s2 = xx; // 2 <- xx
1014 
1015  // v_s2 = vv - uu * s2
1016  vector_scaled_sum( v_s2, uu, -s2, vv );
1017 
1018  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
1019  {
1020 DbgLv(DL) << Phd << " NEG-CNSTR:06: v_s2[0]" << v_s2[0];
1021  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
1022  }
1023 
1024 DbgLv(DL) << Phd << " xx<s3 get_fitness";
1025  g_s2 = get_fitness_v( v_s2 );
1026  }
1027  else // xx >= s3
1028  {
1029  if ( xx > ( s3 + s3 - s2 ) ) // if xx > s3 + ds/2
1030  {
1031  // v_s4 = vv - uu * xx
1032  US_Vector v_s4( vsize );
1033  vector_scaled_sum( v_s4, uu, -xx, vv );
1034 
1035  if ( neg_cnstr && v_s4[ 0 ] < 0.1 )
1036  {
1037 DbgLv(DL) << Phd << " NEG-CNSTR:07: v_s4[0]" << v_s4[0];
1038  v_s4.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
1039  }
1040 
1041 DbgLv(DL) << Phd << " xx>=s3 get_fitness (A)";
1042  double g_s4 = get_fitness_v( v_s4 );
1043 
1044  if ( g_s4 > g_s2 && g_s4 > g_s3 && g_s4 > g_s1 )
1045  {
1046  xx = s3 + s3 - s2; // xx = s3 + ds/2
1047  }
1048  }
1049 
1050  // Take s2, s3, xx
1051  v_s1 = v_s2;
1052  g_s1 = g_s2; // 1 <- 2
1053  s1 = s2;
1054  v_s2 = v_s3;
1055  g_s2 = g_s3;
1056  s2 = s3; // 2 <- 3
1057  s3 = xx; // 3 <- xx
1058 
1059  // v_s3 = vv - uu * s3
1060  vector_scaled_sum( v_s3, uu, -s3, vv );
1061 
1062  if ( neg_cnstr && v_s3[ 0 ] < 0.1 )
1063  {
1064 DbgLv(DL) << Phd << " NEG-CNSTR:01: v_s2[0]" << v_s2[0];
1065  v_s3.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
1066  }
1067 
1068 DbgLv(DL) << Phd << " xx>=s3 get_fitness";
1069  g_s3 = get_fitness_v( v_s3 );
1070  }
1071 
1072 DbgLv(DL) << Phd << " p_g_s2 g_s2" << prev_g_s2 << g_s2;
1073  if ( qAbs( prev_g_s2 - g_s2 ) < epsilon ) break;
1074  } // end of inner loop
1075 
1076  US_Vector v_p( vsize );
1077 
1078  if ( g_s2 < g_s3 && g_s2 < g_s1 )
1079  {
1080  v_p = v_s2;
1081  fitness = g_s2;
1082  }
1083  else if ( g_s1 < g_s3 )
1084  {
1085  v_p = v_s1;
1086  fitness = g_s1;
1087  }
1088  else
1089  {
1090  v_p = v_s3;
1091  fitness = g_s3;
1092  }
1093 DbgLv(DL) << Phd << " g_s? fitness" << fitness;
1094 
1095 #ifdef TIMING_MZ
1096 QDateTime clcSt4=QDateTime::currentDateTime();
1097 totT4+=clcSt3.msecsTo(clcSt4);
1098 #endif
1099 DbgLv(DL) << Phd << " call lamm_gsm_df";
1100  US_Vector v_g( vsize ); // Vector of derivatives
1101  lamm_gsm_df( v_p, v_g ); // New gradient in v_g (old in uu)
1102 DbgLv(DL) << Phd << " retn fr lamm_gsm_df";
1103 #ifdef TIMING_MZ
1104 QDateTime clcSt5=QDateTime::currentDateTime();
1105 totT5+=clcSt4.msecsTo(clcSt5);
1106 #endif
1107 
1108  US_Vector v_dx( vsize );
1109  // v_dx = v_p - vv
1110  vector_scaled_sum( v_dx, vv, -1.0, v_p );
1111 
1112 
1113  vv = v_p; // vv = v_p
1114 
1115  // dgradient v_dg = v_g - uu
1116  US_Vector v_dg( vsize );
1117  vector_scaled_sum( v_dg, uu, -1.0, v_g );
1118 
1119  US_Vector v_hdg( vsize );
1120 
1121  // v_hdg = hessian * v_dg ( matrix * vector )
1122  for ( int ii = 0; ii < vsize; ii++ )
1123  {
1124  double dotprod = 0.0;
1125 
1126  for ( int jj = 0; jj < vsize; jj++ )
1127  dotprod += ( hessian[ ii ][ jj ] * v_dg[ jj ] );
1128 
1129  v_hdg.assign( ii, dotprod );
1130  }
1131 
1132  double fac = v_dg.dot( v_dx );
1133  double fae = v_dg.dot( v_hdg );
1134  double sumdg = v_dg.dot( v_dg );
1135  double sumxi = v_dx.dot( v_dx );
1136 DbgLv(DL) << Phd << " fac sumdg sumxi" << fac << sumdg << sumxi;
1137 
1138  if ( fac > sqrt( epsilon * sumdg * sumxi ) )
1139  {
1140  fac = 1.0 / fac;
1141  double fad = 1.0 / fae;
1142 
1143  for ( int ii = 0; ii < vsize; ii++ )
1144  {
1145  v_dg.assign( ii, fac * v_dx[ ii ] - fad * v_hdg[ ii ] );
1146  }
1147 
1148  for ( int ii = 0; ii < vsize; ii++ )
1149  {
1150  for ( int jj = ii; jj < vsize; jj++ )
1151  {
1152  hessian[ ii ][ jj ] +=
1153  fac * v_dx [ ii ] * v_dx [ jj ] -
1154  fad * v_hdg[ ii ] * v_hdg[ jj ] +
1155  fae * v_dg [ ii ] * v_dg [ jj ];
1156 
1157  // It's a symmetrical matrix
1158  hessian[ jj ][ ii ] = hessian[ ii ][ jj ];
1159  }
1160  }
1161  }
1162 
1163  // uu = hessian * v_g ( matrix * vector )
1164  for ( int ii = 0; ii < vsize; ii++ )
1165  {
1166  double dotprod = 0.0;
1167 
1168  for ( int jj = 0; jj < vsize; jj++ )
1169  dotprod += ( hessian[ ii ][ jj ] * v_g[ jj ] );
1170 
1171  uu.assign( ii, dotprod );
1172  }
1173 
1174 #ifdef TIMING_MZ
1175 QDateTime clcSt6=QDateTime::currentDateTime();
1176 totT6+=clcSt5.msecsTo(clcSt6);
1177 #endif
1178  } // end while ( uu.L2norm() > epsilon )
1179 
1180  index = 0;
1181 
1182  // Restore gene from array of doubles
1183  for ( int ii = 0; ii < gsize; ii++ )
1184  {
1185  gene[ ii ].s = vv[ index++ ];
1186  gene[ ii ].k = vv[ index++ ];
1187  }
1188 DbgLv(DL) << Phd << "OUT:fitness" << fitness << "gene0.s" << gene[0].s;
1189 
1190 #ifdef TIMING_MZ
1191 insms=(long)clcSt0.msecsTo(QDateTime::currentDateTime());
1192 totms+=insms;
1193 ncalls++;
1194 DRTiming << "MINIMIZE: msecs" << insms << "totmsecs calls" << totms << ncalls;
1195 DRTiming << " MMIZE: t1 t2 t3 t4 t5 t6"
1196  << totT1 << totT2 << totT3 << totT4 << totT5 << totT6;
1197 #endif
1198  return fitness;
1199 }
1200 
1201 // Set vector values to the scaled sums of two vectors
1203  US_Vector& aa, double sa, US_Vector& bb, double sb )
1204 {
1205  int sizec = qMin( cc.size(), qMin( aa.size(), bb.size() ) );
1206 
1207  if ( sb == 1.0 )
1208  for ( int ii = 0; ii < sizec; ii++ )
1209  cc.assign( ii, aa[ ii ] * sa + bb[ ii ] );
1210  else
1211  for ( int ii = 0; ii < sizec; ii++ )
1212  cc.assign( ii, aa[ ii ] * sa + bb[ ii ] * sb );
1213 
1214  return;
1215 }
1216 
1217 // Update the fitness value for a set of solutes.
1218 // Initially (index<0), the A and B matrices are populated.
1219 // Thereafter, each call updates a single column of the A matrix
1220 // and recalculates the x vector, with fitness update.
1222 {
1223  double fitness = 0.0;
1224  static QVector< double > nnls_a;
1225  static QVector< double > nnls_b;
1226  QVector< double > nnls_x;
1227  QVector< double > nnls_c;
1228  QVector< double > w_nnls_a;
1229  QVector< double > w_nnls_b;
1230  US_SolveSim::DataSet* dset = data_sets[ 0 ];
1231  US_DataIO::EditedData* edata = &dset->run_data;
1232  US_DataIO::RawData simdat;
1233  int nscans = edata->scanCount();
1234  int npoints = edata->pointCount();
1235  int ntotal = nscans * npoints;
1236  int vsize = v.size();
1237  int nsols = vsize / 2;
1238  int navals = nsols * ntotal;
1239  double fixval = parameters[ "bucket_fixed" ].toDouble();
1240  fixval = ( attr_z == ATTR_V ) ? dset->vbar20 : fixval;
1241 //qDebug() << "UF: vsize nsols ntotal navals" << vsize << nsols << ntotal << navals;
1242 
1244  sd.density = dset->density;
1245  sd.viscosity = dset->viscosity;
1246  sd.manual = dset->manual;
1247  sd.vbar20 = dset->vbar20;
1248  sd.vbar = US_Math2::adjust_vbar20( sd.vbar20, dset->temperature );
1250 
1251  US_Model::SimulationComponent zcomponent;
1252  zcomponent.s = 0.0;
1253  zcomponent.f_f0 = 0.0;
1254  zcomponent.mw = 0.0;
1255  zcomponent.vbar20 = dset->vbar20;
1256  zcomponent.D = 0.0;
1257  zcomponent.f = 0.0;
1258 
1259  set_comp_attrib( zcomponent, fixval, attr_z );
1260 //DbgLv(1) << "UF: xyz" << attr_x << attr_y << attr_z << "fixval" << fixval;
1261 
1262  if ( index < 0 )
1263  { // Do the initial population of A and B matrices
1264 
1265  nnls_a.resize( navals ); // Prepare the NNLS A,B matrices
1266  nnls_b.resize( ntotal );
1267 
1268  int kk = 0;
1269 
1270  for ( int vv = 0; vv < vsize; vv += 2 )
1271  { // Fit each solute and populate the A matrix with simulations
1272  double xval = v[ vv ];
1273  double yval = v[ vv + 1 ];
1274  US_Model model;
1275  model.components.resize( 1 );
1276  model.components[ 0 ] = zcomponent;
1277 
1278  build_component( model.components[ 0 ], sd, xval, yval );
1279 
1280  US_AstfemMath::initSimData( simdat, *edata, 0.0 );
1281  US_Astfem_RSA astfem_rsa( model, dset->simparams );
1282  astfem_rsa.set_debug_flag( dbg_level );
1283  astfem_rsa.calculate( simdat );
1284 
1285  for ( int ss = 0; ss < nscans; ss++ )
1286  for ( int rr = 0; rr < npoints; rr++ )
1287  nnls_a[ kk++ ] = simdat.value( ss, rr );
1288  }
1289 
1290  kk = 0;
1291 
1292  // Populate the B matrix with experiment data
1293  for ( int ss = 0; ss < nscans; ss++ )
1294  for ( int rr = 0; rr < npoints; rr++ )
1295  nnls_b[ kk++ ] = edata->value( ss, rr );
1296 
1297  }
1298 
1299  else
1300  { // Calculate one column of the A matrix and re-do NNLS
1301  nnls_x.resize( nsols );
1302  nnls_c.resize( ntotal );
1303  US_Model model;
1304  model.components.resize( 1 );
1305 
1306  int vv = index * 2;
1307  model.components[ 0 ] = zcomponent;
1308 
1309  build_component( model.components[ 0 ], sd, v[ vv ], v[ vv + 1 ] );
1310 
1311 //qDebug() << "UF: index vv" << index << vv << "s,D"
1312 // << model.components[0].s << model.components[0].D;
1313 
1314  US_AstfemMath::initSimData( simdat, *edata, 0.0 );
1315  US_Astfem_RSA astfem_rsa( model, dset->simparams );
1316  astfem_rsa.set_debug_flag( dbg_level );
1317  astfem_rsa.calculate( simdat );
1318 
1319  int kk = index * ntotal;
1320  int jj = 0;
1321 
1322  for ( int ss = 0; ss < nscans; ss++ )
1323  {
1324  for ( int rr = 0; rr < npoints; rr++ )
1325  { // Save the column, then replace it with simulation values
1326  nnls_c[ jj++ ] = nnls_a[ kk ];
1327  nnls_a[ kk++ ] = simdat.value( ss, rr );
1328  }
1329  }
1330 //qDebug() << "UF: kk jj" << kk << jj;
1331 
1332  // Put A and B in work vectors since NNLS corrupts them
1333  w_nnls_a = nnls_a;
1334  w_nnls_b = nnls_b;
1335 
1336  // Re-do NNLS to get X concentrations using replaced A
1337  US_Math2::nnls( w_nnls_a.data(), ntotal, ntotal, nsols,
1338  w_nnls_b.data(), nnls_x.data() );
1339 
1340  kk = index * ntotal;
1341  jj = 0;
1342 
1343  // Restore the A column with its previous values
1344  for ( int ss = 0; ss < nscans; ss++ )
1345  for ( int rr = 0; rr < npoints; rr++ )
1346  nnls_a[ kk++ ] = nnls_c[ jj++ ];
1347 
1348  model.components.clear();
1349  kk = 0;
1350  int ksol = 0;
1351 
1352  for ( int cc = 0; cc < nsols; cc++ )
1353  { // Build a model using solutes where the concentration is positive
1354  double soluval = nnls_x[ cc ];
1355 
1356  if ( soluval > 0.0 )
1357  {
1358  model.components << zcomponent;
1359  int vv = cc * 2;
1360 
1361  build_component( model.components[ kk ], sd, v[ vv ], v[ vv + 1 ] );
1362 
1363  model.components[ kk++ ].signal_concentration = soluval;
1364 
1365  if ( soluval >= concentration_threshold ) ksol++;
1366  }
1367  }
1368 
1369  // Calculate the simulation using a model of all live solutes
1370  US_AstfemMath::initSimData( simdat, *edata, 0.0 );
1371  US_Astfem_RSA astfem_rsa2( model, dset->simparams );
1372  astfem_rsa2.set_debug_flag( dbg_level );
1373  astfem_rsa2.calculate( simdat );
1374 
1375  // Calculate the fitness (variance) == average of residuals-squared
1376  fitness = 0.0;
1377 
1378  for ( int ss = 0; ss < nscans; ss++ )
1379  {
1380  for ( int rr = 0; rr < npoints; rr++ )
1381  {
1382  double resid = edata->value( ss, rr ) - simdat.value( ss, rr );
1383  fitness += sq( resid );
1384 //int ms=nscans/2;int mr=npoints/2;
1385 //if((ss>ms-3)&&(ss<ms+3)&&(rr>mr-3)&&(rr<mr+3))
1386 // qDebug() << "UF: index" << index << "ss rr edat sdat resd"
1387 // << ss << rr << edata->value(ss,rr) << simdat.value(ss,rr) << resid;
1388  }
1389  }
1390 
1391  fitness /= (double)( ntotal );
1392  fitness *= ( 1.0 + sq( regularization * ksol ) );
1393  }
1394 
1395  return fitness;
1396 }
1397 
1399 {
1400  static const double hh = 0.01;
1401  static const double h2_recip = 0.5 / hh;
1402 
1403  // Work with a temporary vector
1404  US_Vector tt = vv;
1405 
1406 #ifdef UPDATE_FIT
1407  if ( current_dataset == 0 && data_sets.size() == 1 )
1408  {
1409  update_fitness( -1, tt );
1410 
1411  for ( int ii = 0; ii < tt.size(); ii++ )
1412  {
1413  double save = tt[ ii ];
1414  int cc = ii / 2;
1415 
1416  tt.assign( ii, save - hh );
1417  double y0 = update_fitness( cc, tt ); // Calc fitness value -h
1418 
1419  tt.assign( ii, save + hh );
1420  double y2 = update_fitness( cc, tt ); // Calc fitness value +h
1421 
1422  vd.assign( ii, ( y2 - y0 ) * h2_recip ); // The derivative
1423  tt.assign( ii, save );
1424  }
1425  }
1426 
1427  else
1428 #endif
1429  {
1430  for ( int ii = 0; ii < tt.size(); ii++ )
1431  {
1432  double save = tt[ ii ];
1433 
1434  tt.assign( ii, save - hh );
1435  double y0 = get_fitness_v( tt ); // Calc fitness value -h
1436 
1437  tt.assign( ii, save + hh );
1438  double y2 = get_fitness_v( tt ); // Calc fitness value +h
1439 
1440  vd.assign( ii, ( y2 - y0 ) * h2_recip ); // The derivative
1441  tt.assign( ii, save );
1442  }
1443  }
1444 int nn=tt.size()-1;
1445 DbgLv(DL) << "GDF: vd0..." << vd[0] << vd[1] << vd[2] << vd[3];
1446 DbgLv(DL) << "GDF: ...vdn" << vd[nn-3] << vd[nn-2] << vd[nn-1] << vd[nn];
1447 
1448 }
1449 
1452 {
1453  if ( my_rank != 1 ) return;
1454  DbgLv(1) << "Buckets:";
1455 
1456  for ( int b = 0; b < buckets.size(); b++ )
1457  {
1458  DbgLv(1) << buckets[ b ].x_min
1459  << buckets[ b ].x_max
1460  << buckets[ b ].y_min
1461  << buckets[ b ].y_max
1462  << buckets[ b ].ds
1463  << buckets[ b ].dk;
1464  }
1465 }
1466 
1468 {
1469  if ( my_rank != 1 ) return;
1470 
1471  if ( gene > -1 )
1472  {
1473  QString s = "Gene " + QString::number( gene ) + ": ";
1474 
1475  for ( int b = 0; b < genes[ gene ].size(); b++ )
1476  {
1477  s += QString( "(%1, %2); " )
1478  .arg( genes[ gene ][ b ]. s )
1479  .arg( genes[ gene ][ b ]. k );
1480 
1481  }
1482 
1483  DbgLv(1) << s;
1484  }
1485  else
1486  {
1487  DbgLv(1) << "Genes:";
1488 
1489  for ( int g = 0; g < genes.size(); g++ )
1490  {
1491  QString s;
1492 
1493  for ( int b = 0; b < genes[ g ].size(); b++ )
1494  {
1495  s += QString( "(%1, %2)\n" )
1496  .arg( genes[ g ][ b ]. s )
1497  .arg( genes[ g ][ b ]. k );
1498  }
1499 
1500  DbgLv(1) << s;
1501  }
1502  }
1503 }
1504 
1505 void US_MPI_Analysis::dump_fitness( const QList< Fitness >& fitness )
1506 {
1507  if ( my_rank != 1 ) return;
1508 
1509  QString s = "Fitness:\n";
1510 
1511  for ( int f = 0; f < fitness.size(); f++ )
1512  {
1513 
1514  s += QString().sprintf( "i, index, fitness: %3i, %3i, %.6e\n",
1515  f, fitness[ f ].index, fitness[ f ].fitness );
1516  }
1517 
1518  DbgLv(1) << s;
1519 }
1520 
1521 // Set a coefficient value for a specified model component attribute
1523  double covalue, int attribx )
1524 {
1525  switch ( attribx )
1526  { // Set the coefficient value indicated by the attribute index
1527  default:
1528  case ATTR_S: // Set sedimentation coefficient at scale
1529  mcomp.s = covalue * 1.e-13;
1530  break;
1531  case ATTR_K: // Set frictional ratio
1532  mcomp.f_f0 = covalue;
1533  break;
1534  case ATTR_W: // Set molecular weight
1535  mcomp.mw = covalue;
1536  break;
1537  case ATTR_V: // Set partial specific volume (vbar)
1538  mcomp.vbar20 = covalue;
1539  break;
1540  case ATTR_D: // Set diffusion coefficient
1541  mcomp.D = covalue;
1542  break;
1543  case ATTR_F: // Set frictional coefficient
1544  mcomp.f = covalue;
1545  break;
1546  }
1547 }
1548 
1549 // Build a model component from sparse attribute values
1551  US_Math2::SolutionData& sd, double xval, double yval )
1552 {
1553  US_SolveSim::DataSet* dset = data_sets[ 0 ];
1554 
1555  // Set the X,Y attributes of the model component
1556  set_comp_attrib( mcomp, xval, attr_x );
1557  set_comp_attrib( mcomp, yval, attr_y );
1558 
1559  // Compute all the remaining coefficients
1560  US_Model::calc_coefficients( mcomp );
1561 
1562  // If vbar is not fixed, recompute solution-based corrections
1563  if ( attr_z != ATTR_V )
1564  {
1565  sd.vbar20 = mcomp.vbar20;
1566  sd.vbar = US_Math2::adjust_vbar20( sd.vbar20, dset->temperature );
1567 
1569  }
1570 
1571  // Do corrections to sedimentation and diffusion coefficients
1572  mcomp.s /= sd.s20w_correction;
1573  mcomp.D /= sd.D20w_correction;
1574 }
1575 
1576 void US_MPI_Analysis::solutes_from_gene( Gene& solutes, int nsols )
1577 {
1578  double fixval = ( attr_z == ATTR_V )
1579  ? data_sets[ 0 ]->vbar20
1580  : parameters[ "bucket_fixed" ].toDouble();
1581 
1582  for ( int cc = 0; cc < nsols; cc++ )
1583  {
1584  double sval = solutes[ cc ].s;
1585  double kval = solutes[ cc ].k;
1586  solutes[ cc ].s = ( attr_z == ATTR_S ) ? fixval : 0.0;
1587  solutes[ cc ].k = ( attr_z == ATTR_K ) ? fixval : 0.0;
1588  solutes[ cc ].v = ( attr_z == ATTR_V ) ? fixval : 0.0;
1589  solutes[ cc ].d = ( attr_z == ATTR_W || attr_z == ATTR_D ||
1590  attr_z == ATTR_F ) ? fixval : 0.0;
1591 
1592  switch( attr_x )
1593  {
1594  case ATTR_S:
1595  solutes[ cc ].s = sval * 1.0e-13;
1596  break;
1597  case ATTR_K:
1598  solutes[ cc ].k = sval;
1599  break;
1600  case ATTR_W:
1601  case ATTR_D:
1602  case ATTR_F:
1603  solutes[ cc ].d = sval;
1604  break;
1605  case ATTR_V:
1606  solutes[ cc ].v = sval;
1607  default:
1608  break;
1609  }
1610  switch( attr_y )
1611  {
1612  case ATTR_S:
1613  solutes[ cc ].s = kval * 1.0e-13;
1614  break;
1615  case ATTR_K:
1616  solutes[ cc ].k = kval;
1617  break;
1618  case ATTR_W:
1619  case ATTR_D:
1620  case ATTR_F:
1621  solutes[ cc ].d = kval;
1622  break;
1623  case ATTR_V:
1624  solutes[ cc ].v = kval;
1625  default:
1626  break;
1627  }
1628  }
1629 }
1630