UltraScan III
dmga_worker.cpp
Go to the documentation of this file.
1 #include "us_mpi_analysis.h"
2 #include "us_lamm_astfvm.h"
3 #include "us_math2.h"
4 
5 QDateTime elapsedd = QDateTime::currentDateTime();
6 #define ELAPSEDNOW (elapsedd.msecsTo(QDateTime::currentDateTime()))
7 #define ELAPSEDSEC (elapsedd.secsTo(QDateTime::currentDateTime()))
8 #define DbTimMsg(a) DbTiming << my_rank << generation << ELAPSEDNOW << a;
9 #define DRTiming DbTiming << my_rank
10 
11 #if 1
12 #define UPDATE_FIT
13 #endif
14 
15 // Minimize prints debug level
16 //#define DL 0
17 #define DL 1
18 
20 {
21  static const double BASE_SIG = 0.2;
22  static const int GR_INCR = 10;
23  current_dataset = 0;
25  QStringList keys = parameters.keys();
26  p_grid = ( keys.contains( "p_grid" ) )
27  ? parameters[ "p_grid" ].toInt() : 100;
28  QString dbgtext = parameters[ "debug_text" ];
29  QString s_bsig = par_key_value( dbgtext, "base_sig" );
30  QString s_grinc = par_key_value( dbgtext, "g_redo_inc" );
31  base_sig = s_bsig .isEmpty() ? BASE_SIG : s_bsig .toDouble();
32  g_redo_inc = s_grinc.isEmpty() ? GR_INCR : s_grinc.toInt();
33 if(my_rank==1)
34 DbgLv(0) << my_rank << "dmga_worker: base_sig" << base_sig
35  << "g_redo_inc" << g_redo_inc;
36 
37  buckets.clear();
38 
39  simulation_values.noisflag = 0;
40  simulation_values.dbg_level = dbg_level;
41  simulation_values.dbg_timing = dbg_timing;
42 
43  if ( data_sets.size() == 1 )
44  {
46  data_sets[ 0 ]->run_data, 0.0 );
48  data_sets[ 0 ]->run_data, 0.0 );
49  }
50  else
51  {
52  int ntscan = data_sets[ 0 ]->run_data.scanCount();
53  for ( int ii = 1; ii < data_sets.size(); ii++ )
54  ntscan += data_sets[ ii ]->run_data.scanCount();
55  simulation_values.sim_data .scanData.resize( ntscan );
56  simulation_values.residuals.scanData.resize( ntscan );
57  }
58 //if(my_rank>=0)
59 //abort( "DMGA_WORKER not yet implemented" );
60 
61  // Read in the constraints model and build constraints
62  QString cmfname = "../" + parameters[ "DC_model" ];
63  wmodel.load( cmfname ); // Load the constraints model
64  constraints.load_constraints( &wmodel ); // Build the constraints object
65  constraints.get_work_model ( &wmodel ); // Get the base work model
66 DbgLv(1) << my_rank << "dmga_worker: cmfname" << cmfname;
67 DbgLv(1) << my_rank << "dmga_worker: wmodel #comps" << wmodel.components.size();
68 
69  MPI_GA_MSG msg;
70  MPI_Status status;
71  MPI_Job job;
72  bool finished = false;
73  int grp_nbr = ( my_rank / gcores_count );
74  int deme_nbr = my_rank - grp_nbr * gcores_count;
75  // Get the number of floating attributes
77  // Compute the number of possible float combinations
78  nfvari = ( 1 << nfloatc ) - 1;
79  lfvari.fill( 0, nfvari );
80  // Set up a working marker vector for a single gene.
81  // Note: the "marker" is a simple vector of doubles, consisting of the
82  // attribute values that are unique to each gene. That is, only the
83  // float attributes differ from gene to gene and the specific values
84  // of these floats form a unique signature for each gene. Each gene
85  // itself can be formed by initializing with the base model (wmodel)
86  // and then setting the floating attribute values from the marker vector.
87  dgmarker.resize( nfloatc );
88  do_astfem = ( wmodel.components[ 0 ].sigma == 0.0 &&
89  wmodel.components[ 0 ].delta == 0.0 &&
90  wmodel.coSedSolute < 0 &&
91  data_sets[ 0 ]->compress == 0.0 );
92 DbgLv(1) << my_rank << "dmga_worker: nfloatc nfvari do_astfem"
93  << nfloatc << nfvari << do_astfem;
94 //DbgLv(1) << my_rank << "dmga_worker: wmodel DUMP";
95 //wmodel.debug();
96 
97  while ( ! finished )
98  {
100 
101  msg.size = (int)max_rss();
102  DbgLv(0) << "Deme" << grp_nbr << deme_nbr
103  << ": Generations finished, second" << ELAPSEDSEC;
104 
105  MPI_Send( &msg, // This iteration is finished
106  sizeof( msg ), // to MPI #1
107  MPI_BYTE,
109  FINISHED,
110  my_communicator );
111 DbgLv(1) << my_rank << "dmgw: FIN sent";
112 if(my_rank<2) {
113 DbgLv(1) << my_rank << "lfvari n" << nfvari << lfvari.size();
114 for (int ii=0; ii<nfvari; ii++ )
115  DbgLv(1) << my_rank << " smask" << ii + 1 << "count" << lfvari[ii]; }
116 
117  MPI_Recv( &job, // Find out what to do next
118  sizeof( job ), // from MPI #0, MPI #7, MPI #9
119  MPI_BYTE,
120  MPI_ANY_SOURCE,
121  MPI_ANY_TAG,
123  &status );
124 
125  int dataset = job.dataset_offset;
126  int count = job.dataset_count;
127  int length = job.length;
128 DbgLv(1) << my_rank << "dmgw: job offs count len" << dataset << count << length
129  << "tag" << status.MPI_TAG;
130  int index = 0;
131 
132  switch ( status.MPI_TAG )
133  {
134  case FINISHED:
135  finished = true;
136  DbgLv(0) << " Deme" << grp_nbr << deme_nbr << ":"
137  << fitness_hits << "fitness hits of" << fitness_count
138  << "fitness checks maxrss" << maxrss;
139  break;
140 
141  case UPDATE:
142  // Update data for global fit or Monte Carlo
143  // Global fit comes before MC (if necessary), one dataset at a time
144  // Monte Carlo always comes as a sequence of all datasets
145 
146  mc_data.resize( length );
147 
148  MPI_Barrier( my_communicator );
149 
150  // This is a receive
151  MPI_Bcast( mc_data.data(), // from MPI #8, #10
152  length,
153  MPI_DOUBLE,
155  my_communicator );
156 
157  for ( int e = dataset; e < dataset + count; e++ )
158  {
159  US_DataIO::EditedData* data = &data_sets[ e ]->run_data;
160 
161  int scan_count = data->scanCount();
162  int radius_points = data->pointCount();
163 
164  for ( int s = 0; s < scan_count; s++ )
165  {
166  US_DataIO::Scan* scan = &data->scanData[ s ];
167 
168  for ( int r = 0; r < radius_points; r++ )
169  {
170  scan->rvalues[ r ] = mc_data[ index++ ];
171  }
172  }
173  }
174 
175  if ( count == data_sets.size() ) // Next iteration will be global
176  {
177  current_dataset = 0;
179  }
180  else // Next dataset is a part of global fit
181  {
182  current_dataset = dataset;
183  // datasets_to_process will stay at 1
184  }
185 
186  break;
187 
188  case GENERATION:
189  break;
190 
191  default:
192  abort( "Unknown message at end of GA worker loop" );
193  break;
194 
195  } // end switch
196  } // end while
197 }
198 
200 {
201  // Initialize genes
202  dgenes.clear();
203  population = parameters[ "population" ].toInt();
204 
205  // Build the first generation of genes
206  for ( int ii = 0; ii < population; ii++ )
207  {
208  dgenes << new_dmga_gene();
209  }
210 
211  fitness.reserve( population );
212 
213  Fitness empty_fitness;
214  empty_fitness.fitness = LARGE;
215 
216  for ( int ii = 0; ii < population; ii++ )
217  {
218  empty_fitness.index = ii;
219  fitness << empty_fitness;
220  }
221 
222  int generations = parameters[ "generations" ].toInt();
223  int crossover = parameters[ "crossover" ].toInt();
224  int mutation = parameters[ "mutation" ].toInt();
225  int plague = parameters[ "plague" ].toInt();
226  int elitism = parameters[ "elitism" ].toInt();
227 
228  int p_mutate = mutation;
229  int p_crossover = p_mutate + crossover;
230  int p_plague = p_crossover + plague;
231  int grp_nbr = my_rank / gcores_count;
232  int deme_nbr = my_rank - grp_nbr * gcores_count;
233 
234  fitness_map.clear();
235  fitness_count = 0;
236  fitness_hits = 0;
237 
238  max_rss();
239 
240  QDateTime start = QDateTime::currentDateTime();
241  MPI_GA_MSG msg;
242 
243  for ( generation = 0; generation < generations; generation++ )
244  {
245  max_rss();
246 
247 DbTimMsg("Worker start rank/generation/elapsed-secs");
248  // Calculate fitness
249  for ( int ii = 0; ii < population; ii++ )
250  {
251  fitness[ ii ].index = ii;
252  fitness[ ii ].fitness = get_fitness_dmga( dgenes[ ii ] );
253 DbgLv(1) << my_rank << "dg:getf: ii" << ii << " fitness"
254  << fitness[ii].fitness;
255  }
256 
257  // Sort fitness
258  qSort( fitness );
259 DbTimMsg("Worker after get_fitness loop + sort");
260 
261  // Refine with gradient search method (gsm) on last generation
262  if ( generation == generations - 1 )
263  {
264 DbgLv(DL) << "Deme" << grp_nbr << deme_nbr
265  << ": At last generation minimize.";
266 DbTimMsg("Worker before gsm rank/generation/elapsed");
267  in_gsm = TRUE;
268 
269  fitness[ 0 ].fitness = minimize_dmga( dgenes[ fitness[ 0 ].index ],
270  fitness[ 0 ].fitness );
271 DbgLv(DL) << "Deme" << grp_nbr << deme_nbr
272  << ": last generation minimize fitness=" << fitness[0].fitness;
273 DbTimMsg("Worker after gsm rank/generation/elapsed");
274  in_gsm = FALSE;
275  }
276 
277  max_rss();
278 
279  // Send best gene to master
280 if((deme_nbr%10)==1) {
281  DbgLv(1) << "Best gene to master: gen" << generation << "worker" << deme_nbr
282  << "fitness" << fitness[0].fitness;
283 dump_fitness( fitness ); }
284  msg.generation = generation;
285  dgene = dgenes[ fitness[ 0 ].index ];
287  msg.size = nfloatc;
288  msg.fitness = fitness[ 0 ].fitness;
289 
290  MPI_Send( &msg, // to MPI #1
291  sizeof( msg ),
292  MPI_BYTE,
294  GENERATION,
295  my_communicator );
296 
297  MPI_Send( dgmarker.data(), // to MPI #2
298  nfloatc,
299  MPI_DOUBLE,
301  GENE,
302  my_communicator );
303 
304 DbTimMsg("Worker after send fitness,genes");
305  // Receive instructions from master (continue or finish)
306  MPI_Status status;
307 
308  MPI_Recv( &msg, // from MPI #3
309  sizeof( msg ),
310  MPI_BYTE,
311  MPI_ANY_SOURCE,
312  MPI_ANY_TAG,
314  &status );
315 DbTimMsg("Worker after receive instructions");
316 
317  max_rss();
318 
319  if ( status.MPI_TAG == FINISHED )
320  {
321  DbgLv(0) << "Deme" << grp_nbr << deme_nbr
322  << ": Finish signalled at deme generation" << generation + 1;
323  break;
324  }
325 
326  // See if we are really done
327  if ( generation == generations - 1 )
328  {
329  DbgLv(0) << "Deme" << grp_nbr << deme_nbr
330  << ": At last generation";
331  continue;
332  }
333 
334  // Mark duplicate genes
335  int f0 = 0; // An index into the fitness array
336  int f1 = 1; // A second index
337  // The value of 1.0e-8 for close fitness is arbitrary. Parameterize?
338  const double NEAR_MATCH = 1.0e-8;
339  const double EPSF_SCALE = 1.0e-3;
340  double fitpwr = (double)qRound( log10( fitness[ 0 ].fitness ) );
341  double epsilon_f = pow( 10.0, fitpwr ) * EPSF_SCALE;
342 DbgLv(1) << "gw:" << my_rank << ": Dup best-gene clean: fitness0 fitpwr epsilon_f"
343  << fitness[0].fitness << fitpwr << epsilon_f;
344 
345  while ( f1 < population )
346  {
347  double fitdiff = qAbs( fitness[ f0 ].fitness - fitness[ f1 ].fitness );
348 
349  if ( fitdiff < epsilon_f )
350  {
351  bool match = true;
352  int g0 = fitness[ f0 ].index;
353  int g1 = fitness[ f1 ].index;
354  DGene gene0 = dgenes[ g0 ];
355  DGene gene1 = dgenes[ g1 ];
356 
357  for ( int ii = 0; ii < nfloatc; ii++ )
358  {
359  double val0;
360  double val1;
362  atype = cns_flt[ ii ].atype;
363  int mcompx = cns_flt[ ii ].mcompx;
364 
365  fetch_attr_value( val0, gene0, atype, mcompx );
366  fetch_attr_value( val1, gene1, atype, mcompx );
367 
368  double difv = qAbs( ( val0 - val1 ) / val0 );
369 
370  if ( difv > NEAR_MATCH )
371  {
372 DbgLv(1) << "gw:" << my_rank << ": Dup NOT cleaned: f0 f1 fit0 fit1"
373  << f0 << f1 << fitness[f0].fitness << fitness[f1].fitness << "ii g0 g1 g0v g1v"
374  << ii << g0 << g1 << val0 << val1;
375  match = false;
376  f0 = f1;
377  break;
378  }
379  }
380 
381  if ( match )
382  {
383 DbgLv(1) << "gw:" << my_rank << ": Dup cleaned: f0 f1 fit0 fit1"
384  << f0 << f1 << fitness[f0].fitness << fitness[f1].fitness;
385  fitness[ f1 ].fitness = LARGE; // Invalidate gene/sim_values
386  }
387  }
388  else
389  f0 = f1;
390 
391  f1++;
392  }
393 
394  // Re-sort
395  qSort( fitness );
396 DbgLv(1) << "gw:" << my_rank << ": fitness sorted";
397 
398  QList< DGene > old_genes = dgenes;
399  int gener_save = generation;
400 
401  if ( generation > 0 && ( generation % g_redo_inc ) == 0 )
402  { // Set up to re-do initial genes after minimization
403  double fit0 = fitness[ 0 ].fitness;
404  fitness[ 0 ].fitness = minimize_dmga( dgenes[ fitness[ 0 ].index ],
405  fitness[ 0 ].fitness );
406  double fit1 = fitness[ 0 ].fitness;
407  dgene = dgenes[ fitness[ 0 ].index ];
408  old_genes[0] = dgene;
409  double aval;
410 
411  for ( int ii = 0; ii < nfloatc; ii++ )
412  { // Insure all the new gene attribute values are inside range
414  atype = cns_flt[ ii ].atype;
415  int mcompx = cns_flt[ ii ].mcompx;
416  double vmin = cns_flt[ ii ].low;
417  double vmax = cns_flt[ ii ].high;
418  fetch_attr_value( aval, dgene, atype, mcompx );
419 
420  if ( aval < vmin || aval > vmax )
421  {
422 if((my_rank%10)==1)
423 DbgLv(0) << "gw:" << my_rank << ": re-do : ii" << ii
424  << "aval vmin vmax" << aval << vmin << vmax;
425  aval = qMax( vmin, qMin( vmax, aval ) );
426  store_attr_value( aval, dgene, atype, mcompx );
427  }
428  }
429 
430  p_mutate = p_plague;
431 
432  for ( int gg = 0; gg < population; gg++ )
433  old_genes[ gg ] = dgene;
434 if((my_rank%10)==1)
435 DbgLv(0) << "gw:" << my_rank << ": re-do genes at gen" << generation
436  << "gen fitness" << fit0 << "post-min fitness" << fit1;
437 
438  generation = 0;
439  }
440 
441  // Create new generation from old
442  // First copy elite genes
443  for ( int gg = 0; gg < elitism; gg++ )
444  dgenes[ gg ] = old_genes[ fitness[ gg ].index ];
445 DbgLv(1) << "gw:" << my_rank << ": elites copied";
446 
447  int immigr_count = migrate_dgenes();
448 DbgLv(1) << "gw:" << my_rank << ": immigr_count" << immigr_count
449  << "dgenes,old sizes" << dgenes.size() << old_genes.size() << population;
450 
451 DbTimMsg("Worker before elitism loop");
452 
453  for ( int gg = elitism + immigr_count; gg < population; gg++ )
454  {
455  // Select a random gene from old population using exponential
456  // distribution
457  int gene_index = e_random();
458  int probability = u_random( p_plague );
459 DbgLv(1) << "gw:" << my_rank << ": gg" << gg << "gene_index" << gene_index
460  << "probability" << probability;
461  dgene = old_genes[ gene_index ];
462 
463  // Do a self-mutate on some genes
464  if ( probability < p_mutate ) mutate_dgene( dgene );
465 
466  // Do cross-mutation on some other genes
467  else if ( probability < p_crossover ) cross_dgene ( dgene, old_genes );
468 
469  // Purge the remainder (replace with new gene)
470  else dgene = new_dmga_gene();
471 
472  dgenes[ gg ] = dgene;
473  }
474 DbTimMsg("Worker after elitism loop");
475 
476  generation = gener_save;
477  p_mutate = mutation;
478  max_rss();
479 
480  } // End of generation loop
481 DbTimMsg(" +++Worker after generation loop");
482 }
483 
484 // Create a new discrete GA Gene
486 {
487  DGene gene_new = wmodel; // Initialize gene to base model
488  double extn_p = (double)( p_grid - 1 ); // Par grid extent
489 
490  // Loop thru float attributes, getting random values for each
491  for ( int ii = 0; ii < nfloatc; ii++ )
492  {
493  // Get the current attribute characteristics
495  atype = cns_flt[ ii ].atype;
496  int mcompx = cns_flt[ ii ].mcompx;
497  bool logscl = cns_flt[ ii ].logscl;
498  double vmin = cns_flt[ ii ].low;
499  double vmax = cns_flt[ ii ].high;
500 DbgLv(1) << my_rank << "dg:newg: ii" << ii
501  << "vmin vmax" << vmin << vmax;
502 
503  if ( logscl )
504  { // Convert low,high to log scale
505  vmin = log( vmin );
506  vmax = log( vmax );
507  }
508 
509  // Get a value somewhere in the range
510  double delta = (double)u_random( p_grid );
511  double aval = vmin + ( ( vmax - vmin ) * delta / extn_p );
512  aval = qMax( vmin, qMin( vmax, aval ) );
513  // If need be, convert value back from log
514  aval = logscl ? exp( aval ) : aval;
515 DbgLv(1) << my_rank << "dg:newg: ii" << ii << "p_grid" << p_grid
516  << "delta" << delta << "aval" << aval;
517 
518  // Update the new attribute value
519  store_attr_value( aval, gene_new, atype, mcompx );
520  }
521 
522  return gene_new; // Then return the new gene
523 }
524 
525 // Mutate a discrete GA Gene
527 {
528  const double extn_p = (double)( p_grid - 1 );
529  const double sigfact = pow( 10.0, ( mutate_sigma * 0.1 ) );
530  // Get a random mask that selects which float attributes to vary
531  int smask = u_random( nfvari ) + 1;
532  // Compute the sigma based on parameter grid and generation
533  double genterm = log2( (double)generation * sigfact + 2.0 );
534  double sigma = base_sig * extn_p / genterm;
535  // Bump the count of hits for each unique smask (floats combination)
536  lfvari[ smask - 1 ]++;
537 DbgLv(1) << my_rank << "dg:mutg: smask p_grid sigma" << smask << p_grid << sigma;
538 //if(my_rank==11) DbgLv(0) << my_rank << "dg:mutg: smask p_grid sigma" << smask << p_grid << sigma;
539 
540  // Loop thru float attributes, varying the selected ones
541  for ( int ii = 0; ii < nfloatc; ii++ )
542  {
543  // Skip if this float attribute is not selected for this mutation
544  if ( ( ( 1 << ii ) & smask ) == 0 )
545  continue;
546 
547  // Get the current value of the attribute
548  double vv;
550  atype = cns_flt[ ii ].atype;
551  int mcompx = cns_flt[ ii ].mcompx;
552  bool logscl = cns_flt[ ii ].logscl;
553  fetch_attr_value( vv, dgene, atype, mcompx );
554 
555  // Get float range and, if need be, convert to log scale
556  double vl = cns_flt[ ii ].low;
557  double vh = cns_flt[ ii ].high;
558 DbgLv(1) << my_rank << "dg:mutg: ii" << ii
559  << "vl vh" << vl << vh << "vv" << vv;
560 
561  if ( logscl )
562  {
563  vv = log( vv );
564  vl = log( vl );
565  vh = log( vh );
566  }
567 
568  // Mutate the value
569  double xx = US_Math2::box_muller( 0.0, sigma );
570  double delta = qRound( xx );
571  vv += ( delta * ( vh - vl ) / extn_p );
572  vv = qMax( vv, vl );
573  vv = qMin( vv, vh );
574  vv = logscl ? exp( vv ) : vv;
575 DbgLv(1) << my_rank << "dg:mutg: ii" << ii << " xx" << xx
576  << "delta" << delta << "vv" << vv;
577 
578  // Update the mutated value
579  store_attr_value( vv, dgene, atype, mcompx );
580  }
581 //DbgLv(1) << "dg:mutgene dgene comps" << dgene.components.size();
582 }
583 
584 // Cross-mutate a pair of discrete GA Genes
585 void US_MPI_Analysis::cross_dgene( DGene& dgene, QList< DGene > dgenes )
586 {
587  // Get the crossing gene according to an exponential distribution
588  int gndx = e_random();
589  DGene dgene2 = dgenes[ fitness[ gndx ].index ];
590 
591  // Get a random mask that selects float attributes from the second gene
592  int smask = u_random( nfvari ) + 1;
593 DbgLv(1) << "dg:crsg: gndx" << gndx << "smask" << smask;
594 
595  for ( int ii = 0; ii < nfloatc; ii++ )
596  {
597  // Skip if float attribute is not selected for cross mutation
598  if ( ( ( 1 << ii ) & smask ) == 0 )
599  continue;
600 
601  // Get the current value of the attribute from the second gene
602  double vv;
604  atype = cns_flt[ ii ].atype;
605  int mcompx = cns_flt[ ii ].mcompx;
606  fetch_attr_value( vv, dgene2, atype, mcompx );
607 
608  // Store this value in the first gene
609  store_attr_value( vv, dgene, atype, mcompx );
610  }
611 }
612 
613 // Migrate a list of discrete GA Genes
615 {
616  static const int migrate_pcent = parameters[ "migration" ].toInt();
617  static const int elitism_count = parameters[ "elitism" ].toInt();
618 
619  QList < DGene > emmigres;
620  int migrate_count = ( migrate_pcent * population + 50 ) / 100;
621  int doubles_count = migrate_count * nfloatc;
622  QVector< double > emmigrants( doubles_count );
623 DbgLv(1) << my_rank << "dg:migrdg: migrate_count doubles_count" << migrate_count << doubles_count;
624 
625  // Send genes to master
626 
627  for ( int ii = 0; ii < migrate_count; ii++ )
628  { // Build the current list of emmigrating genes
629  int gene_index = e_random();
630  emmigres << dgenes[ fitness[ gene_index ].index ];
631  }
632 
633  // Represent migrating genes as marker vector
634  dgenes_to_marker( emmigrants, emmigres, 0, migrate_count );
635 DbgLv(1) << my_rank << "dg:migrdg: emmigres size" << emmigres.size() << "emmigrants size" << emmigrants.size();
636 
637  // MPI send msg type
638  MPI_GA_MSG msg;
639  msg.size = migrate_count;
640 
641  MPI_Send( &msg, // to MPI #1
642  sizeof( msg ),
643  MPI_BYTE,
645  EMMIGRATE,
646  my_communicator );
647 
648  // MPI send emmigrants
649  MPI_Send( emmigrants.data(), // to MPI #1
650  doubles_count,
651  MPI_DOUBLE,
653  EMMIGRATE,
654  my_communicator );
655 
656  // Get genes from master as marker vector
657  QVector< double > immigres( doubles_count );
658  MPI_Status status;
659 
660  MPI_Recv( immigres.data(), // to MPI #1
661  doubles_count,
662  MPI_DOUBLE,
664  IMMIGRATE,
666  &status );
667 
668  int doubles_sent;
669  MPI_Get_count( &status, MPI_DOUBLE, &doubles_sent );
670  int mgenes_count = doubles_sent / nfloatc;
671 DbgLv(1) << my_rank << "dg:migrdg: immigres size" << immigres.size() << "doubles_sent" << doubles_sent;
672 
673  // The number returned equals the number sent or zero
674  if ( mgenes_count > 0 )
675  {
676  marker_to_dgenes( immigres, dgenes, elitism_count, mgenes_count );
677  }
678 
679  return mgenes_count;
680 }
681 
682 // Get fitness from a vector (used in inverse hessian)
684 {
685  int vsize = vv.size();
686  dgmarker.resize( vsize );
687 
688  // Convert from US_Vector to Gene
689 
690  for ( int ii = 0; ii < vsize; ii++ )
691  { // De-normalize values and put into marker QVector<double>
692  dgmarker[ ii ] = vv[ ii ] / zz[ ii ];
693  }
694 
696 
697  // Compute and return the fitness
698 
699  return get_fitness_dmga( dgene );
700 }
701 
702 // Compute a derivatives vector (an inverse hessian minimization step)
704  US_Vector& zz )
705 {
706  static const double hh = 0.01;
707  static const double h2_recip = 0.5 / hh;
708 
709  // Work with a temporary vector
710  US_Vector tt = vv;
711 
712  for ( int ii = 0; ii < tt.size(); ii++ )
713  {
714  double save = tt[ ii ];
715 
716  tt.assign( ii, save - hh );
717  double y0 = get_fitness_v_dmga( tt, zz ); // Calc fitness value -h
718 
719  tt.assign( ii, save + hh );
720  double y2 = get_fitness_v_dmga( tt, zz ); // Calc fitness value +h
721 
722  vd.assign( ii, ( y2 - y0 ) * h2_recip ); // The derivative
723  tt.assign( ii, save );
724  }
725 }
726 
727 // Find the minimum fitness value close to a discrete GA gene using
728 // inverse hessian minimization
729 double US_MPI_Analysis::minimize_dmga( DGene& dgene, double fitness )
730 {
731 DbgLv(1) << my_rank << "dg:IHM:minimize dgene comps" << dgene.components.size() << fitness;
732  int vsize = nfloatc;
733  US_Vector vv( vsize ); // Input values
734  US_Vector uu( vsize ); // Vector of derivatives
735  US_Vector zz( vsize ); // Vector of normalizing factors
736 
737  // Create hessian as identity matrix
738  QVector< QVector< double > > hessian( vsize );
739 
740  for ( int ii = 0; ii < vsize; ii++ )
741  {
742  hessian[ ii ] = QVector< double >( vsize, 0.0 );
743  hessian[ ii ][ ii ] = 1.0;
744  }
745 
746  dgmarker.resize( vsize );
747  marker_from_dgene( dgmarker, dgene );
748 
749  // Convert gene to array of normalized doubles and save normalizing factors
750  for ( int ii = 0; ii < vsize; ii++ )
751  {
752  double vval = dgmarker[ ii ];
753  double vpwr = (double)qFloor( log10( vval ) );
754  double vnorm = pow( 10.0, -vpwr );
755  vv.assign( ii, vval * vnorm );
756  zz.assign( ii, vnorm );
757 DbgLv(1) << my_rank << "dg:IHM: ii" << ii << "vval vnorm" << vval << vnorm
758  << "vpwr" << vpwr << "vvi" << vv[ii];
759  }
760 
761  lamm_gsm_df_dmga( vv, uu, zz ); // uu is vector of derivatives
762 
763  static const double epsilon_f = 1.0e-7;
764  static const int max_iterations = 20;
765  int iteration = 0;
766  double epsilon = epsilon_f * fitness * 4.0;
767  bool neg_cnstr = ( vv[ 0 ] < 0.1 ); // Negative constraint?
768 
769  while ( uu.L2norm() >= epsilon_f && iteration < max_iterations )
770  {
771  iteration++;
772  if ( fitness == 0.0 ) break;
773 
774  US_Vector v_s1 = vv;
775  double g_s1 = fitness;
776  double s1 = 0.0;
777  double s2 = 0.5;
778  double s3 = 1.0;
779 DbgLv(1) << my_rank << "dg:IHM: iteration" << iteration << "fitness" << fitness;
780 
781  // v_s2 = vv - uu * s2
782  US_Vector v_s2( vsize );
783  vector_scaled_sum( v_s2, uu, -s2, vv );
784 
785  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
786  {
787  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
788  }
789 
790  double g_s2 = get_fitness_v_dmga( v_s2, zz );
791 DbgLv(1) << my_rank << "dg:IHM: g_s2" << g_s2 << "s2" << s2 << "epsilon" << epsilon;
792 
793  // Cut down until we have a decrease
794  while ( s2 > epsilon && g_s2 > g_s1 )
795  {
796  s3 = s2;
797  s2 *= 0.5;
798  // v_s2 = vv - uu * s2
799  vector_scaled_sum( v_s2, uu, -s2, vv );
800 
801  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
802  {
803  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
804  }
805 
806  g_s2 = get_fitness_v_dmga( v_s2, zz );
807  }
808 DbgLv(1) << my_rank << "dg:IHM: g_s2" << g_s2;
809 
810  // Test for initial decrease
811  if ( s2 <= epsilon || ( s3 - s2 ) < epsilon ) break;
812 
813  US_Vector v_s3( vsize );
814 
815  // v_s3 = vv - uu * s3
816  vector_scaled_sum( v_s3, uu, -s3, vv );
817 
818  if ( neg_cnstr && v_s3[ 0 ] < 0.1 )
819  {
820  v_s3.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
821  }
822 
823  double g_s3 = get_fitness_v_dmga( v_s3, zz );
824 
825  int reps = 0;
826  static const int max_reps = 100;
827 
828  while ( ( ( s2 - s1 ) > epsilon ) &&
829  ( ( s3 - s2 ) > epsilon ) &&
830  ( reps++ < max_reps ) )
831  {
832  double s1_s2 = 1.0 / ( s1 - s2 );
833  double s1_s3 = 1.0 / ( s1 - s3 );
834  double s2_s3 = 1.0 / ( s2 - s3 );
835 
836  double s1_2 = sq( s1 );
837  double s2_2 = sq( s2 );
838  double s3_2 = sq( s3 );
839 
840  double aa = ( ( g_s1 - g_s3 ) * s1_s3 -
841  ( g_s2 - g_s3 ) * s2_s3
842  ) * s1_s2;
843 
844  double bb = ( g_s3 * ( s2_2 - s1_2 ) +
845  g_s2 * ( s1_2 - s3_2 ) +
846  g_s1 * ( s3_2 - s2_2 )
847  ) *
848  s1_s2 * s1_s3 * s2_s3;
849 
850  static const double max_a = 1.0e-25;
851 
852  if ( qAbs( aa ) < max_a )
853  {
854  // Restore gene from array of normalized doubles
855  for ( int ii = 0; ii < vsize; ii++ )
856  {
857  dgmarker[ ii ] = vv[ ii ] / zz[ ii ];
858  }
859 
860  dgene_from_marker( dgmarker, dgene );
861 
862  return fitness;
863  }
864 
865  double xx = -bb / ( 2.0 * aa );
866  double prev_g_s2 = g_s2;
867 
868  if ( xx < s1 )
869  {
870  if ( xx < ( s1 + s1 - s2 ) ) // Keep it close
871  {
872  xx = s1 + s1 - s2; // xx <- s1 + ds
873  if ( xx < 0 ) xx = s1 / 2.0;
874  }
875 
876  if ( xx < 0 ) // Wrong direction!
877  {
878  if ( s1 < 0 ) s1 = 0.0;
879  xx = 0;
880  }
881 
882  // OK, take xx, s1, s2
883  v_s3 = v_s2;
884  g_s3 = g_s2; // 3 <- 2
885  s3 = s2;
886  v_s2 = v_s1;
887  g_s2 = g_s1;
888  s2 = s1; // 2 <- 1
889  s1 = xx; // 1 <- xx
890 
891  // v_s1 = vv - uu * s1
892  vector_scaled_sum( v_s1, uu, -s1, vv );
893 
894  if ( neg_cnstr && v_s1[ 0 ] < 0.1 )
895  {
896  v_s1.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
897  }
898 
899  g_s1 = get_fitness_v_dmga( v_s1, zz );
900  }
901  else if ( xx < s2 ) // Take s1, xx, s2
902  {
903  v_s3 = v_s2;
904  g_s3 = g_s2; // 3 <- 2
905  s3 = s2;
906  s2 = xx; // 2 <- xx
907 
908  // v_s2 = vv - uu * s2
909  vector_scaled_sum( v_s2, uu, -s2, vv );
910 
911  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
912  {
913  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
914  }
915 
916  g_s2 = get_fitness_v_dmga( v_s2, zz );
917  }
918  else if ( xx < s3 ) // Take s2, xx, s3
919  {
920  v_s1 = v_s2;
921  g_s1 = g_s2;
922  s1 = s2; // 2 <- 1
923  s2 = xx; // 2 <- xx
924 
925  // v_s2 = vv - uu * s2
926  vector_scaled_sum( v_s2, uu, -s2, vv );
927 
928  if ( neg_cnstr && v_s2[ 0 ] < 0.1 )
929  {
930  v_s2.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
931  }
932 
933  g_s2 = get_fitness_v_dmga( v_s2, zz );
934  }
935  else // xx >= s3
936  {
937  if ( xx > ( s3 + s3 - s2 ) ) // if xx > s3 + ds/2
938  {
939  // v_s4 = vv - uu * xx
940  US_Vector v_s4( vsize );
941  vector_scaled_sum( v_s4, uu, -xx, vv );
942 
943  if ( neg_cnstr && v_s4[ 0 ] < 0.1 )
944  {
945  v_s4.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
946  }
947 
948  double g_s4 = get_fitness_v_dmga( v_s4, zz );
949 
950  if ( g_s4 > g_s2 && g_s4 > g_s3 && g_s4 > g_s1 )
951  {
952  xx = s3 + s3 - s2; // xx = s3 + ds/2
953  }
954  }
955 
956  // Take s2, s3, xx
957  v_s1 = v_s2;
958  g_s1 = g_s2; // 1 <- 2
959  s1 = s2;
960  v_s2 = v_s3;
961  g_s2 = g_s3;
962  s2 = s3; // 2 <- 3
963  s3 = xx; // 3 <- xx
964 
965  // v_s3 = vv - uu * s3
966  vector_scaled_sum( v_s3, uu, -s3, vv );
967 
968  if ( neg_cnstr && v_s3[ 0 ] < 0.1 )
969  {
970  v_s3.assign( 0, 0.1 + u_random( 100 ) * 0.001 );
971  }
972 
973  g_s3 = get_fitness_v_dmga( v_s3, zz );
974  }
975 
976  if ( qAbs( prev_g_s2 - g_s2 ) < epsilon ) break;
977  } // end of inner loop
978 
979  US_Vector v_p( vsize );
980 
981  if ( g_s2 < g_s3 && g_s2 < g_s1 )
982  {
983  v_p = v_s2;
984  fitness = g_s2;
985  }
986  else if ( g_s1 < g_s3 )
987  {
988  v_p = v_s1;
989  fitness = g_s1;
990  }
991  else
992  {
993  v_p = v_s3;
994  fitness = g_s3;
995  }
996 
997  US_Vector v_g( vsize ); // Vector of derivatives
998  lamm_gsm_df_dmga( v_p, v_g, zz ); // New gradient in v_g (old in uu)
999 
1000  US_Vector v_dx( vsize );
1001  // v_dx = v_p - vv
1002  vector_scaled_sum( v_dx, vv, -1.0, v_p );
1003 
1004 
1005  vv = v_p; // vv = v_p
1006 
1007  // dgradient v_dg = v_g - uu
1008  US_Vector v_dg( vsize );
1009  vector_scaled_sum( v_dg, uu, -1.0, v_g );
1010 
1011  US_Vector v_hdg( vsize );
1012 
1013  // v_hdg = hessian * v_dg ( matrix * vector )
1014  for ( int ii = 0; ii < vsize; ii++ )
1015  {
1016  double dotprod = 0.0;
1017 
1018  for ( int jj = 0; jj < vsize; jj++ )
1019  dotprod += ( hessian[ ii ][ jj ] * v_dg[ jj ] );
1020 
1021  v_hdg.assign( ii, dotprod );
1022  }
1023 
1024  double fac = v_dg.dot( v_dx );
1025  double fae = v_dg.dot( v_hdg );
1026  double sumdg = v_dg.dot( v_dg );
1027  double sumxi = v_dx.dot( v_dx );
1028 
1029  if ( fac > sqrt( epsilon * sumdg * sumxi ) )
1030  {
1031  fac = 1.0 / fac;
1032  double fad = 1.0 / fae;
1033 
1034  for ( int ii = 0; ii < vsize; ii++ )
1035  {
1036  v_dg.assign( ii, fac * v_dx[ ii ] - fad * v_hdg[ ii ] );
1037  }
1038 
1039  for ( int ii = 0; ii < vsize; ii++ )
1040  {
1041  for ( int jj = ii; jj < vsize; jj++ )
1042  {
1043  hessian[ ii ][ jj ] +=
1044  fac * v_dx [ ii ] * v_dx [ jj ] -
1045  fad * v_hdg[ ii ] * v_hdg[ jj ] +
1046  fae * v_dg [ ii ] * v_dg [ jj ];
1047 
1048  // It's a symmetrical matrix
1049  hessian[ jj ][ ii ] = hessian[ ii ][ jj ];
1050  }
1051  }
1052  }
1053 
1054  // uu = hessian * v_g ( matrix * vector )
1055  for ( int ii = 0; ii < vsize; ii++ )
1056  {
1057  double dotprod = 0.0;
1058 
1059  for ( int jj = 0; jj < vsize; jj++ )
1060  dotprod += ( hessian[ ii ][ jj ] * v_g[ jj ] );
1061 
1062  uu.assign( ii, dotprod );
1063  }
1064 
1065  } // end while ( uu.L2norm() > epsilon )
1066 
1067  // Restore gene from array of normalized doubles
1068  for ( int ii = 0; ii < vsize; ii++ )
1069  {
1070  dgmarker[ ii ] = vv[ ii ] / zz[ ii ];
1071 DbgLv(1) << my_rank << "dg:IHM: ii" << ii << "vvi zzi dgmi"
1072  << vv[ii] << zz[ii] << dgmarker[ii];
1073  }
1074 
1075  dgene_from_marker( dgmarker, dgene );
1076 DbgLv(1) << my_rank << "dg:IHM: FITNESS" << fitness;
1077 
1078  return fitness;
1079 }
1080 
1081 // Get the fitness value for a discrete GA Gene
1083 {
1084  QString fkey = dgene_key( dgene ); // Get an identifying key string
1085  fitness_count++;
1086 
1087  if ( fitness_map.contains( fkey ) )
1088  { // We already have a match to this key, so use its fitness value
1089  fitness_hits++;
1090  return fitness_map.value( fkey );
1091  }
1092 
1094 
1095  // Compute the simulation and residuals for this model
1097 
1098  double fitness = sim.variance; // Get the computed fitness
1099  fitness_map.insert( fkey, fitness ); // Add it the fitness map
1100 
1101 //DbgLv(1) << "dg:get_fit fitness" << fitness << "count hits"
1102 // << fitness_count << fitness_hits;
1103  return fitness;
1104 }
1105 
1106 // Compose an identifying key string for a discrete GA Gene
1108 {
1109  // Get the marker for this gene
1110  marker_from_dgene( dgmarker, dgene );
1111 
1112  // Compose a key string that is a concatenation of marker value strings
1113  QString str;
1114  QString fkey = str.sprintf( "%.5e", dgmarker[ 0 ] );
1115 
1116  for ( int ii = 1; ii < nfloatc; ii++ )
1117  fkey += str.sprintf( " %.5e", dgmarker[ ii ] );
1118 
1119 DbgLv(1) << my_rank << "dg:dgkey" << fkey << "mv0 mvn" << dgmarker[0] << dgmarker[nfloatc-1];
1120  return fkey;
1121 }
1122 
1123 // Calculate residuals for a given discrete GA gene
1124 void US_MPI_Analysis::calc_residuals_dmga( int offset, int dset_count,
1125  US_SolveSim::Simulation& sim_vals, DGene& dgene )
1126 {
1127  int lim_offs = offset + dset_count;
1128 
1129  US_DataIO::RawData* sdata = &sim_vals.sim_data;
1130  US_DataIO::RawData* resid = &sim_vals.residuals;
1131  int jscan = 0;
1132 
1133  // Compute simulations for all data sets with the given model
1134  for ( int ee = offset; ee < lim_offs; ee++ )
1135  {
1136  US_Model model;
1138  US_SolveSim::DataSet* dset = data_sets[ ee ];
1139  US_DataIO::EditedData* edata = &dset->run_data;
1140  US_DataIO::RawData simdat;
1141  int nscans = edata->scanCount();
1142 
1143  // Initialize simulation data with experiment's grid
1144  US_AstfemMath::initSimData( simdat, *edata, 0.0 );
1145 
1146  // Create experimental-space model from DGene for the data set
1147 
1148  sd.viscosity = dset->viscosity; // Set up data corrections
1149  sd.density = dset->density;
1150  sd.manual = dset->manual;
1151 
1152  model_from_dgene( model, dgene ); // Compute apply model from base
1153 
1154  if ( ee == offset )
1155  data_sets[ 0 ]->model = model; // Save the s20w model
1156 
1157  for ( int cc = 0; cc < model.components.size(); cc++ )
1158  { // Loop to perform data corrections to s and D (experimental space)
1159  sd.vbar20 = model.components[ cc ].vbar20;
1160  sd.vbar = US_Math2::adjust_vbar20( sd.vbar20, dset->temperature );
1162 
1163  model.components[ cc ].s /= sd.s20w_correction;
1164  model.components[ cc ].D /= sd.D20w_correction;
1165  }
1166 if(my_rank<2 && dbg_level>0) {
1167 DbgLv(1) << my_rank << "CR: SIMPARAMS DUMP";
1168 dset->simparams.debug();
1169 DbgLv(1) << my_rank << "CR: dens visc scorr dcorr"
1170  << sd.density << sd.viscosity << sd.s20w_correction << sd.D20w_correction;
1171 DbgLv(1) << my_rank << "CR: MODEL DUMP";
1172 model.debug(); }
1173 
1174  // Compute the simulation
1175 
1176  if ( do_astfem )
1177  { // Calculate simulation for FEM (ideal) case
1178  US_Astfem_RSA astfem_rsa( model, dset->simparams );
1179  astfem_rsa.set_debug_flag( my_rank < 2 ? dbg_level : (-1) );
1180 //*DEBUG
1181 if(my_rank<1) {
1182 int dbg_flg=in_gsm?(-1):dbg_level;
1183 //DbgLv(0) << my_rank << "CR: call ASTFEM CALC";
1184 astfem_rsa.set_debug_flag(dbg_flg);
1185 }
1186 //*DEBUG
1187 
1188  astfem_rsa.calculate( simdat );
1189  }
1190 
1191  else
1192  { // Calculate simulation for FVM (non-ideal) case
1193  US_Buffer buffer;
1194  buffer.viscosity = dset->viscosity;
1195  buffer.density = dset->density;
1196  buffer.compressibility = dset->compress;
1197 
1198  US_LammAstfvm astfvm( model, dset->simparams );
1199  astfvm.set_buffer( buffer );
1200  astfvm.calculate( simdat );
1201  }
1202 
1203  // Save simulation for this data set
1204  if ( ee == offset )
1205  { // Initialize simulation data with (first) sim data
1206  *sdata = simdat;
1207  jscan += nscans;
1208  }
1209  else
1210  { // Append sim_data for second and subsequent data set
1211  for ( int ss = 0; ss < nscans; ss++ )
1212  {
1213  sdata->scanData[ jscan++ ] = simdat.scanData[ ss ];
1214  }
1215  }
1216  }
1217 
1218  int ks = 0;
1219  double variance = 0.0;
1220 
1221  // Compute overall residual and variance
1222  for ( int ee = 0; ee < lim_offs; ee++ )
1223  {
1224  US_SolveSim::DataSet* dset = data_sets[ ee ];
1225  US_DataIO::EditedData* edata = &dset->run_data;
1226  US_DataIO::RawData simdat;
1227  int npoints = dset->run_data.pointCount();
1228  int nscans = dset->run_data.scanCount();
1229 
1230  for ( int ss = 0; ss < nscans; ss++, ks++ )
1231  {
1232  resid->scanData[ ks ] = sdata->scanData[ ks ];
1233 
1234  for ( int rr = 0; rr < npoints; rr++ )
1235  {
1236  double resval = edata->value( ss, rr )
1237  - sdata->value( ks, rr );
1238  variance += sq( resval );
1239 
1240  resid->setValue( ks, rr, resval );
1241  }
1242  }
1243  }
1244 
1245  sim_vals.variance = variance / (double)total_points;
1246 if(my_rank==0)
1247 DbgLv(1) << my_rank << "CR: variance" << sim_vals.variance << variance << total_points;
1248 }
1249 
1250 // Get the value part of a "key value " substring
1251 QString US_MPI_Analysis::par_key_value( const QString kvtext, const QString key )
1252 {
1253  QString value( "" );
1254  int keyx = kvtext.indexOf( key );
1255  value = ( keyx < 0 ) ? value
1256  : QString( kvtext ).mid( keyx ).section( " ", 1, 1 );
1257 if(my_rank<2)
1258 DbgLv(0) << my_rank << ": p_k_v key" << key << "value" << value << "kvtext" << kvtext;
1259 
1260  return value;
1261 }
1262