UltraScan III
us_dmga_mc_stats.cpp
Go to the documentation of this file.
1 
3 #include "us_dmga_mc_stats.h"
4 #include "us_math2.h"
5 
6 // Class of static functions for DMGA-MC statistics
7 
8 // Build a vector of iteration models from a DMGA-MC model
10  QVector< US_Model >& imodels )
11 {
12  imodels.clear();
13  QStringList xmls;
14  int niters = 0;
15  int ncomp = 0;
16  int nasso = 0;
17  int kcomp = 0;
18  int kasso = 0;
19  int nxmls = model.mc_iter_xmls( xmls );
20 
21  for ( int ii = 0; ii < nxmls; ii++ )
22  { // Build the individual iteration models
23  QString mxml = xmls[ ii ];
24  int jj = mxml.indexOf( "description=" );
25  QString mdesc = QString( mxml ).mid( jj, 100 )
26  .section( '"', 1, 1 );
27 
28  US_Model imodel;
29  imodel.load_string( mxml );
30  kcomp = imodel.components.size();
31  kasso = imodel.associations.size();;
32 qDebug() << "DmS:AdjMd: ii" << ii << "xml.desc" << mdesc;
33 qDebug() << "DmS:AdjMd: ncomp" << kcomp << "nassoc" << kasso;
34 
35  if ( ii == 0 )
36  {
37  ncomp = kcomp;
38  nasso = kasso;
39  }
40 
41  if ( kcomp == ncomp || kasso == nasso )
42  {
43  niters++;
44  imodels << imodel;
45  }
46  else
47 qDebug() << "DmS:AdjMd: ***ii=" << ii << "kcomp" << kcomp << "kassoc" << kasso;
48 
49  }
50 
51  return niters;
52 }
53 
54 // Build RMSD statistics from iteration models
56  QVector< US_Model >& imodels, QVector< double >& rstats )
57 {
58  const int stsiz = 23;
59  QVector< double > rmsds;
60  QVector< double > rconcs;
61 
62  rstats.fill( 0.0, stsiz );
63 
64  // Get statistics for RMSDs
65  for ( int jj = 0; jj < niters; jj++ )
66  {
67  double rmsd = sqrt( imodels[ jj ].variance );
68  rmsds << rmsd;
69  rconcs << 1.0;
70  }
71 
72  compute_statistics( niters, rmsds, rconcs, rstats );
73 qDebug() << "Dst:BRs: iters" << niters << "RMSD min max mean median"
74  << rstats[0] << rstats[1] << rstats[2] << rstats[3];
75 }
76 
77 // Build Model attribute statistics from iteration models
79  QVector< US_Model >& imodels, QVector< QVector< double > >& astats )
80 {
81  QVector< double > concs;
82  QVector< double > vbars;
83  QVector< double > mwts;
84  QVector< double > scos;
85  QVector< double > dcos;
86  QVector< double > ff0s;
87  QVector< double > kds;
88  QVector< double > koffs;
89  const int ncatt = 6;
90  const int naatt = 2;
91  int ncomp = imodels[ 0 ].components.size();
92  int nasso = imodels[ 0 ].associations.size();
93  int ntatts = ncomp * ncatt + nasso * naatt;
94  astats.resize( ntatts );
95 
96  int kt = 0;
97 qDebug() << "DmS:BMs: ncomp" << ncomp << "nasso" << nasso
98  << "ntatts" << ntatts << "niters" << niters;
99 
100  // Get statistics for each component
101  for ( int ii = 0; ii < ncomp; ii++ )
102  {
103  concs.fill( 0.0, niters );
104  vbars.fill( 0.0, niters );
105  mwts .fill( 0.0, niters );
106  scos .fill( 0.0, niters );
107  dcos .fill( 0.0, niters );
108  ff0s .fill( 0.0, niters );
109  int i0 = qMax( 0, ii - 1 );
110 
111  for ( int jj = 0; jj < niters; jj++ )
112  {
113  US_Model::SimulationComponent* sc = &imodels[ jj ].components[ ii ];
114  US_Model::SimulationComponent* s0 = &imodels[ jj ].components[ i0 ];
115  concs[ jj ] = sc->signal_concentration > 0.0 ?
118  vbars[ jj ] = sc->vbar20;
119  mwts [ jj ] = sc->mw;
120  scos [ jj ] = sc->s;
121  dcos [ jj ] = sc->D;
122  ff0s [ jj ] = sc->f_f0;
123  }
124 
125  compute_statistics( niters, concs, concs, astats[ kt++ ] );
126  compute_statistics( niters, vbars, concs, astats[ kt++ ] );
127  compute_statistics( niters, mwts , concs, astats[ kt++ ] );
128  compute_statistics( niters, scos , concs, astats[ kt++ ] );
129  compute_statistics( niters, dcos , concs, astats[ kt++ ] );
130  compute_statistics( niters, ff0s , concs, astats[ kt++ ] );
131 qDebug() << "DmS:BMs: ii" << ii << "mean c v w s d k"
132  << astats[kt-6][2] << astats[kt-5][2] << astats[kt-4][2]
133  << astats[kt-3][2] << astats[kt-2][2] << astats[kt-1][2];
134  }
135 
136  // Get statistics for reactions
137  for ( int ii = 0; ii < nasso; ii++ )
138  {
139  US_Model::Association* as = &imodels[ 0 ].associations[ ii ];
140  int nrcs = as->rcomps.size();
141  int rc1 = as->rcomps[ 0 ];
142  int rc2 = as->rcomps[ 1 ];
143  //int stoi1 = as->stoichs[ 0 ];
144  //int stoi2 = as->stoichs[ 1 ];
145  //int stoi3 = ( nrcs == 2 ) ? stoi2 : as->stoichs[ 2 ];
146  concs.fill( 0.0, niters );
147  kds .fill( 0.0, niters );
148  koffs.fill( 0.0, niters );
149 
150  if ( nrcs == 2 )
151  { // Single reactant and a product
152  for ( int jj = 0; jj < niters; jj++ )
153  {
154  US_Model::Association* as = &imodels[ jj ].associations[ ii ];
155  US_Model::SimulationComponent* sc = &imodels[ jj ].components[rc1];
156  concs[ jj ] = sc->signal_concentration;
157  kds [ jj ] = as->k_d;
158  koffs[ jj ] = as->k_off;
159  }
160 
161  compute_statistics( niters, kds, concs, astats[ kt++ ] );
162  compute_statistics( niters, koffs, concs, astats[ kt++ ] );
163  }
164 
165  else
166  { // Two reactants and a product (sum concentrations)
167  for ( int jj = 0; jj < niters; jj++ )
168  {
169  US_Model::Association* as = &imodels[ jj ].associations[ ii ];
170  US_Model::SimulationComponent* sc = &imodels[ jj ].components[rc1];
171  US_Model::SimulationComponent* c2 = &imodels[ jj ].components[rc2];
172  concs[ jj ] = sc->signal_concentration
173  + c2->signal_concentration;
174  kds [ jj ] = as->k_d;
175  koffs[ jj ] = as->k_off;
176  }
177 
178  compute_statistics( niters, kds, concs, astats[ kt++ ] );
179  compute_statistics( niters, koffs, concs, astats[ kt++ ] );
180 qDebug() << "DmS:BMs: ii" << ii << "mean kd ko"
181  << astats[kt-2][2] << astats[kt-1][2];
182  }
183  }
184 
185 qDebug() << "DmS:BMs: RETURN w ntatts" << ntatts;
186  return ntatts;
187 }
188 
189 // Compute the statistical values for a vector of values
191  QVector< double >& vals, QVector< double >& concs,
192  QVector< double >& stats )
193 {
194  bool is_fixed = false;
195  const int stsiz = 23;
196  int nbins = 50;
197  double vsiz = (double)nvals;
198  double binsz = 50.0;
199  double vlo = 9.9e30;
200  double vhi = -9.9e30;
201 
202  QVector< double > xpvec( qMax( nvals, nbins ) );
203  QVector< double > ypvec( qMax( nvals, nbins ) );
204 
205  double *xplot = xpvec.data();
206  double *yplot = ypvec.data();
207  double vsum = 0.0;
208  double vm2 = 0.0;
209  double vm3 = 0.0;
210  double vm4 = 0.0;
211  double vmean, vmedi;
212  double mode_cen, mode_lo, mode_hi;
213  double conf99lo, conf99hi, conf95lo, conf95hi;
214  double clim99lo, clim99hi, clim95lo, clim95hi;
215  double skew, kurto, slope, vicep, sigma;
216  double corr, sdevi, sderr, vari, area;
217  double bininc, val, conc;
218  double vctot = 0.0;
219 
220  stats.resize( stsiz );
221 
222  // Get basic min,max,mean information
223 
224  for ( int jj = 0; jj < nvals; jj++ )
225  {
226  val = vals.at( jj );
227  conc = concs.at( jj );
228  vsum += ( val * conc );
229  vctot += conc;
230  vlo = qMin( vlo, val );
231  vhi = qMax( vhi, val );
232  xplot[jj] = (double)jj;
233  yplot[jj] = val;
234  }
235 
236  vmean = vsum / vctot;
237  is_fixed = ( vlo == vhi );
238 
239  if ( is_fixed )
240  { // Values are the same, special statistics for fixed attribute
241  vmedi = vmean;
242  mode_cen = vmean;
243  skew = 0.0;
244  kurto = 0.0;
245  mode_lo = 0.0;
246  mode_hi = 0.0;
247  conf95lo = vmean;
248  conf95hi = vmean;
249  conf99lo = vmean;
250  conf99hi = vmean;
251  clim95lo = vmean;
252  clim95hi = vmean;
253  clim99lo = vmean;
254  clim99hi = vmean;
255  vari = 0.0;
256  corr = 0.0;
257  sdevi = 0.0;
258  sderr = 0.0;
259  area = 0.0;
260  }
261 
262  else
263  { // Values are not the same, so statistics need to be computed
264 
265  for ( int jj = 0; jj < nvals; jj++ )
266  { // Get difference information
267  val = vals.at( jj );
268  double dif = val - vmean;
269  double dsq = dif * dif;
270  vm2 += dsq; // diff squared
271  vm3 += ( dsq * dif ); // cubed
272  vm4 += ( dsq * dsq ); // to the 4th
273  }
274 
275  vm2 /= vsiz;
276  vm3 /= vsiz;
277  vm4 /= vsiz;
278  skew = vm3 / pow( vm2, 1.5 );
279  kurto = vm4 / pow( vm2, 2.0 ) - 3.0;
280 
281  // Do line fit (mainly for corr value)
282  US_Math2::linefit( &xplot, &yplot, &slope, &vicep, &sigma, &corr, nvals );
283 
284  // Sort Y values and determine median
285  qSort( ypvec );
286  int hx = nvals / 2;
287  vmedi = ypvec[ hx ];
288  if ( ( hx * 2 ) == nvals )
289  vmedi = ( vmedi + ypvec[ hx - 1 ] ) * 0.5;
290 
291  // Standard deviation and error
292  sdevi = pow( vm2, 0.5 );
293  sderr = sdevi / pow( vsiz, 0.5 );
294  vari = vm2;
295  area = 0.0;
296 
297  bininc = ( vhi - vlo ) / binsz;
298 
299  // Mode and confidence
300  for ( int ii = 0; ii < nbins; ii++ )
301  {
302  xplot[ii] = vlo + bininc * (double)ii;
303  yplot[ii] = 0.0;
304 
305  for ( int jj = 0; jj < nvals; jj++ )
306  {
307  val = vals.at( jj );
308 
309  if ( val >= xplot[ ii ] && val < ( xplot[ ii ] + bininc ) )
310  {
311  yplot[ii] += ( concs.at( jj ) );
312  }
313  }
314 
315  area += yplot[ ii ] * bininc;
316  }
317 
318  //double fvdif = qAbs( ( vhi - vlo ) / vlo );
319  val = -1.0;
320  int thisb = 0;
321 
322  for ( int ii = 0; ii < nbins; ii++ )
323  {
324  if ( yplot[ii] > val )
325  {
326  val = yplot[ ii ];
327  thisb = ii;
328  }
329  }
330 
331  mode_lo = xplot[ thisb ];
332  mode_hi = mode_lo + bininc;
333  mode_cen = ( mode_lo + mode_hi ) * 0.5;
334  conf99lo = vmean - 2.576 * sdevi;
335  conf99hi = vmean + 2.576 * sdevi;
336  conf95lo = vmean - 1.960 * sdevi;
337  conf95hi = vmean + 1.960 * sdevi;
338  clim95lo = conf95hi - mode_cen;
339  clim95hi = mode_cen - conf95lo;
340  clim99lo = conf99hi - mode_cen;
341  clim99hi = mode_cen - conf99lo;
342  }
343 
344  stats[ 0 ] = vlo; // Minimum
345  stats[ 1 ] = vhi; // Minimum
346  stats[ 2 ] = vmean; // Mean
347  stats[ 3 ] = vmedi; // Median
348  stats[ 4 ] = skew; // Skew
349  stats[ 5 ] = kurto; // Kurtosis
350  stats[ 6 ] = mode_lo; // Lower Mode
351  stats[ 7 ] = mode_hi; // Upper Mode
352  stats[ 8 ] = mode_cen; // Mode Center
353  stats[ 9 ] = conf95lo; // 95% Confidence Interval Low
354  stats[ 10 ] = conf95hi; // 95% Confidence Interval High
355  stats[ 11 ] = conf99lo; // 99% Confidence Interval Low
356  stats[ 12 ] = conf99hi; // 99% Confidence Interval High
357  stats[ 13 ] = sdevi; // Standard Deviation
358  stats[ 14 ] = sderr; // Standard Error
359  stats[ 15 ] = vari; // Variance
360  stats[ 16 ] = corr; // Correlation Coefficient
361  stats[ 17 ] = binsz; // Number of Bins
362  stats[ 18 ] = area; // Distribution Area
363  stats[ 19 ] = clim95lo; // 95% Confidence Limit Low
364  stats[ 20 ] = clim95hi; // 95% Confidence Limit High
365  stats[ 21 ] = clim99lo; // 99% Confidence Limit Low
366  stats[ 22 ] = clim99hi; // 99% Confidence Limit High
367 
368  return is_fixed;
369 }
370 
371 // Build Model attribute statistics from iteration models
372 int US_DmgaMcStats::build_used_model( const QString smtype, const int iter,
373  QVector< US_Model >& imodels, US_Model& umodel )
374 {
375 
376  int stx = -iter; // Iteration index
377  stx = ( smtype == "mean" ) ? 2 : stx; // Mean stats index
378  stx = ( smtype == "median" ) ? 3 : stx; // Median stats index
379  stx = ( smtype == "mode" ) ? 8 : stx; // Mode stats index
380 qDebug() << "DmS:AdjMd: stx" << stx;
381 
382  if ( stx < 0 )
383  { // Used model is one of the iterations
384  umodel = imodels[ iter - 1 ];
385  }
386 
387  else
388  { // Used model uses mean|median|mode of each attribute
389  umodel = imodels[ 0 ];
390  int niters = imodels.size();
391  QVector< QVector< double > > mstats;
392 
393  // Build statistics across iterations
394  US_DmgaMcStats::build_model_stats( niters, imodels, mstats );
395 
396  int ncomp = umodel.components.size();
397  int nasso = umodel.associations.size();
398  int ks = 0;
399 
400  for ( int ii = 0; ii < ncomp; ii++ )
401  { // Pick up mean|median|mode of any floating attributes
402  bool fixs = ( mstats[ ks + 3 ][ 0 ] == mstats[ ks + 3 ][ 1 ] );
403  bool fixd = ( mstats[ ks + 4 ][ 0 ] == mstats[ ks + 4 ][ 1 ] );
404  bool fixk = ( mstats[ ks + 5 ][ 0 ] == mstats[ ks + 5 ][ 1 ] );
405  double conc = mstats[ ks++ ][ stx ];
406  double vbar = mstats[ ks++ ][ stx ];
407  double mw = mstats[ ks++ ][ stx ];
408  double sedc = mstats[ ks++ ][ stx ];
409  double difc = mstats[ ks++ ][ stx ];
410  double ff0 = mstats[ ks++ ][ stx ];
411  conc = umodel.is_product( ii ) ? 0.0 : conc;
412 
413  umodel.components[ ii ].signal_concentration = conc;
414  umodel.components[ ii ].vbar20 = vbar;
415  umodel.components[ ii ].mw = mw;
416  umodel.components[ ii ].s = 0.0;
417  umodel.components[ ii ].D = 0.0;
418  umodel.components[ ii ].f_f0 = 0.0;
419  umodel.components[ ii ].f = 0.0;
420 
421  if ( fixk )
422  umodel.components[ ii ].f_f0 = ff0; // Set fixed f/f0
423  else if ( fixs )
424  umodel.components[ ii ].s = sedc; // Set fixed s
425  else if ( fixd )
426  umodel.components[ ii ].D = difc; // Set fixed D
427  else
428  umodel.components[ ii ].f_f0 = ff0; // Set f/f0 if none fixed
429 //*DEBUG*
430 qDebug() << "DmS:UajMd: Cii=" << ii << "c v w s d k"
431  << umodel.components[ii].signal_concentration
432  << umodel.components[ii].vbar20
433  << umodel.components[ii].mw
434  << umodel.components[ii].s
435  << umodel.components[ii].D
436  << umodel.components[ii].f_f0 << "fixs,d,k" << fixs << fixd << fixk;
437  umodel.calc_coefficients( umodel.components[ ii ] );
438 qDebug() << "DmS:AdjMd: Cii=" << ii << "c v w s d k"
439  << umodel.components[ii].signal_concentration
440  << umodel.components[ii].vbar20
441  << umodel.components[ii].mw
442  << umodel.components[ii].s
443  << umodel.components[ii].D
444  << umodel.components[ii].f_f0;
445 umodel.components[ii].D =0.0;
446 umodel.components[ii].f_f0=0.0;
447 umodel.components[ii].f =0.0;
448 umodel.calc_coefficients(umodel.components[ii]);
449 qDebug() << "DmS:AdjMd: fixS s d k" << umodel.components[ii].s
450  << umodel.components[ii].D << umodel.components[ii].f_f0;
451 umodel.components[ii].s =0.0;
452 umodel.components[ii].f_f0=0.0;
453 umodel.components[ii].f =0.0;
454 umodel.calc_coefficients(umodel.components[ii]);
455 qDebug() << "DmS:AdjMd: fixD s d k" << umodel.components[ii].s
456  << umodel.components[ii].D << umodel.components[ii].f_f0;
457 umodel.components[ii].s =0.0;
458 umodel.components[ii].D =0.0;
459 umodel.components[ii].f =0.0;
460 umodel.calc_coefficients(umodel.components[ii]);
461 qDebug() << "DmS:AdjMd: fixK s d k" << umodel.components[ii].s
462  << umodel.components[ii].D << umodel.components[ii].f_f0;
463 umodel.components[ii].mw =0.0;
464 umodel.components[ii].D =0.0;
465 umodel.components[ii].f =0.0;
466 umodel.calc_coefficients(umodel.components[ii]);
467 qDebug() << "DmS:AdjMd: fixD w d k" << umodel.components[ii].mw
468  << umodel.components[ii].D << umodel.components[ii].f_f0;
469 umodel.components[ii].s =0.0;
470 umodel.components[ii].mw =0.0;
471 umodel.components[ii].f =0.0;
472 umodel.calc_coefficients(umodel.components[ii]);
473 qDebug() << "DmS:AdjMd: fixK s d w" << umodel.components[ii].s
474  << umodel.components[ii].D << umodel.components[ii].mw;
475 //*DEBUG*
476  } // END: components loop
477 
478  for ( int ii = 0; ii < nasso; ii++ )
479  {
480  bool fltd = ( mstats[ ks ][ 0 ] != mstats[ ks ][ 1 ] );
481  bool flto = ( mstats[ ks + 1 ][ 0 ] != mstats[ ks + 1 ][ 1 ] );
482  double k_d = fltd ? mstats[ ks++ ][ stx ] : mstats[ ks++ ][ 0 ];
483  double k_off = flto ? mstats[ ks++ ][ stx ] : mstats[ ks++ ][ 0 ];
484  umodel.associations[ ii ].k_d = k_d;
485  umodel.associations[ ii ].k_off = k_off;
486 qDebug() << "DmS:AdjMd: Aii=" << ii << "d off"
487  << umodel.associations[ii].k_d << umodel.associations[ii].k_off;
488  } // END: associations loop
489  } // END: smtype==mean|median|mode
490 
491  return stx;
492 }
493