diff --git a/App/inc/TestMiscellaneous.h b/App/inc/TestMiscellaneous.h index 8f3da69359654490858232debf2dd786cbceb71c..a186a1997a5819d0f8f2e8b4beb99f5b1a78b289 100644 --- a/App/inc/TestMiscellaneous.h +++ b/App/inc/TestMiscellaneous.h @@ -31,6 +31,7 @@ public: void test_DoubleToComplexInterpolatingFunction(); void test_FormFactor(); void test_DrawMesocrystal(); + void test_FastSin(); }; diff --git a/App/src/TestMesoCrystal.cpp b/App/src/TestMesoCrystal.cpp index 1ad73e65d2efb555d61a94a3cd9ae7cefb39075b..aa7cf2fc3a340ad54c99dcc18c7d594a8e82bd03 100644 --- a/App/src/TestMesoCrystal.cpp +++ b/App/src/TestMesoCrystal.cpp @@ -146,7 +146,7 @@ void TestMesoCrystal::initializeSample() mp_sample = p_multi_layer; - mp_sample->walk_and_print(); + //mp_sample->walk_and_print(); std::cout << "Average layer index: " << n_avg << std::endl; std::cout << "Adapted particle index: " << n_particle_adapted << std::endl; diff --git a/App/src/TestMiscellaneous.cpp b/App/src/TestMiscellaneous.cpp index fc4caa92d32319d5875bfe42cc2d80ec455b02f4..d349974063e9deb93f858277ad1b6b7dba2cb190 100644 --- a/App/src/TestMiscellaneous.cpp +++ b/App/src/TestMiscellaneous.cpp @@ -13,12 +13,15 @@ #include "MesoCrystal.h" #include "Crystal.h" #include "LatticeBasis.h" +#include "MathFunctions.h" #include "TGraph.h" #include "TH2D.h" #include "TH3D.h" #include "TCanvas.h" #include "TGraphPolar.h" +#include "TRandom.h" +#include "TBenchmark.h" TestMiscellaneous::TestMiscellaneous() { @@ -26,16 +29,63 @@ TestMiscellaneous::TestMiscellaneous() - void TestMiscellaneous::execute() { - test_DoubleToComplexInterpolatingFunction(); + test_FastSin(); + //test_DoubleToComplexInterpolatingFunction(); //test_FormFactor(); //test_DrawMesocrystal(); } +/* ************************************************************************* */ +// test of fast sin function approximation +/* ************************************************************************* */ +void TestMiscellaneous::test_FastSin() +{ + double xmin = -2*M_PI; + double xmax = 2*M_PI; + int npx(100); + double dx = (xmax-xmin)/double(npx-1); + for(int i=0; i<npx; ++i){ + double x = (xmin + dx*i); +// std::cout << x << " " << std::sin(x) << " " << MathFunctions::FastSin<double, true>(x) << std::endl; +// std::cout << "x:" << x << " std::sin " << std::sin(x) << " sine:" << sine(y) << " FastSin:" << MathFunctions::FastSin<double, true>(y) << std::endl; + //std::cout << "xx:" << x << " std::sin " << std::sin(x) << " sine:" << sine1(mod_pi(x)) << std::endl; +// double s1 = std::sin(x); +// double s2 = sine2(x); +// double s2 = MathFunctions::FastSin(x); +// std::cout << "xx:" << x << " std::sin " << s1 << " sine:" << s2 << " diff:" << s1-s2 << std::endl; + + complex_t cx(x, x/2.); + complex_t cs1 = std::sin(cx); + complex_t cs2 = MathFunctions::FastSin(cx); + std::cout << "xx:" << cx << " std::sin " << cs1 << " sine:" << cs2 << " diff:" << cs1-cs2 << std::endl; + } + +// const int nevents = 1000000000; +//// TRandom mr; +//// mr.SetSeed(1); + +// TBenchmark mb; +// mb.Start("sin"); +// for(int i=0; i<nevents; i++){ +//// double x = mr.Rndm(); +// //double y = std::sin(1.0) + std::sin(2.0); +// //double y = MathFunctions::FastSin(1.0)+MathFunctions::FastSin(2.0); +// complex_t x(1.0,-.5); +// //complex_t y = std::sin(x); +// complex_t y = MathFunctions::FastComplexSin(x); +// (void)y; +// } +// mb.Stop("sin"); + +// mb.Show("sin"); + +} + + /* ************************************************************************* */ // test double to complex interpolating function /* ************************************************************************* */ @@ -287,3 +337,4 @@ void TestMiscellaneous::test_DoubleToComplexInterpolatingFunction() gr3_diff->Draw("apl"); } + diff --git a/App/src/main.cpp b/App/src/main.cpp index 8dcd54c3766b5000446685f5fc62af421ca33bde..4f5fc52866986af37134f2c0e3f23ea935a7e413 100644 --- a/App/src/main.cpp +++ b/App/src/main.cpp @@ -17,9 +17,13 @@ int main(int argc, char **argv) ProgramOptions::instance().parseCommandLine(argc, argv); // setting graphics environment - TApplication theApp("theApp", 0, 0); - DrawHelper::SetStyle(); - if( ProgramOptions::instance().find("batch") ) gROOT->SetBatch(true); + TApplication *theApp(0); + if( ProgramOptions::instance().find("batch") ) { + gROOT->SetBatch(true); + } else { + theApp = new TApplication("theApp", 0, 0); + DrawHelper::SetStyle(); + } // running functional tests if( ProgramOptions::instance().find("all") ) { diff --git a/Core/Algorithms/src/MultiLayerDWBASimulation.cpp b/Core/Algorithms/src/MultiLayerDWBASimulation.cpp index 2f61e7a783928e80b033f23e486340e86335a835..f5f078b59fd8d95326d49e99db49619b8adab114 100644 --- a/Core/Algorithms/src/MultiLayerDWBASimulation.cpp +++ b/Core/Algorithms/src/MultiLayerDWBASimulation.cpp @@ -40,6 +40,7 @@ void MultiLayerDWBASimulation::init(const Experiment& experiment) } } + void MultiLayerDWBASimulation::run() { OpticalFresnel fresnelCalculator; @@ -51,14 +52,19 @@ void MultiLayerDWBASimulation::run() const NamedVector<double> *p_alpha_axis = dynamic_cast<const NamedVector<double> *>(getDWBAIntensity().getAxis("alpha_f")); double lambda = 2.0*M_PI/m_ki_real.mag(); std::map<double, OpticalFresnel::MultiLayerCoeff_t> fresnel_coeff_map; +// double x =0; for (size_t i=0; i<p_alpha_axis->getSize(); ++i) { double angle = (*p_alpha_axis)[i]; +// std::cout << "mmm " << angle << " " << angle - x << std::endl; +// x = angle; kvector_t kvec; kvec.setLambdaAlphaPhi(lambda, -angle, 0.0); OpticalFresnel::MultiLayerCoeff_t coeffs; fresnelCalculator.execute(*mp_multi_layer, kvec, coeffs); fresnel_coeff_map[angle] = coeffs; } +// std::cout << m_alpha_i << std::endl; +// throw 1; // Also add input angle OpticalFresnel::MultiLayerCoeff_t coeffs; fresnelCalculator.execute(*mp_multi_layer, m_ki_real, coeffs); @@ -69,6 +75,7 @@ void MultiLayerDWBASimulation::run() std::map<double, complex_t> kz_map; std::map<double, complex_t> T_map; std::map<double, complex_t> R_map; + for (std::map<double, OpticalFresnel::MultiLayerCoeff_t>::const_iterator it=fresnel_coeff_map.begin(); it!=fresnel_coeff_map.end(); ++it) { double angle = (*it).first; diff --git a/Core/FormFactors/src/FormFactorFullSphere.cpp b/Core/FormFactors/src/FormFactorFullSphere.cpp index 12823d1312e3c4a99fde49576a6622430962ed2d..150168d558d4446cb7e38f1c036b491d177d60f1 100644 --- a/Core/FormFactors/src/FormFactorFullSphere.cpp +++ b/Core/FormFactors/src/FormFactorFullSphere.cpp @@ -1,6 +1,7 @@ #include "FormFactorFullSphere.h" #include "StochasticDiracDelta.h" #include "Numeric.h" +#include "MathFunctions.h" #include <cmath> @@ -54,7 +55,16 @@ complex_t FormFactorFullSphere::evaluate_for_q(const cvector_t &q) const radial = volume; } else { - radial = 3*volume*(std::sin(qR) - qR*std::cos(qR))/(qR*qR*qR); + // way1 (standard) + //radial = 3*volume*(std::sin(qR) - qR*std::cos(qR))/(qR*qR*qR); + + // way2 (fast) + radial = 3*volume*(MathFunctions::FastSin(qR) - qR*MathFunctions::FastCos(qR))/(qR*qR*qR); + + // way3 (experimental) + // complex_t xsin, xcos; + // MathFunctions::FastSinCos(qR, xsin, xcos); + // radial = 3*volume*(xsin - qR*xcos)/(qR*qR*qR); } return radial*z_part; diff --git a/Core/Tools/inc/MathFunctions.h b/Core/Tools/inc/MathFunctions.h index 777315e9574adc399fb5c040874720f5b34a63c9..238c14791c0e977bf9ee7b07ade7c20aaa33d2b1 100644 --- a/Core/Tools/inc/MathFunctions.h +++ b/Core/Tools/inc/MathFunctions.h @@ -19,6 +19,7 @@ #include "gsl/gsl_sf_bessel.h" #include "gsl/gsl_sf_trig.h" #include "Types.h" +#include "Units.h" #include "Numeric.h" @@ -53,6 +54,55 @@ std::vector<complex_t > FastFourierTransform(const std::vector<double > &data, T std::vector<complex_t> ConvolveFFT(const std::vector<double> &signal, const std::vector<double> &resfunc); +//! fast sine calculations (not actually fast) +inline double FastSin(const double& x) { + const double P = 0.225f; + const double A = 16 * std::sqrt(P); + const double B = (1 - P) / std::sqrt(P); + double y = x / (2 * M_PI); + y = y - std::floor(y + 0.5); // y in range -0.5..0.5 + y = A * y * (0.5 - std::abs(y)); + return y * (B + std::abs(y)); +} + +//! fast cosine calculation (not actually fast) +inline double FastCos(const double& x) { + const double P = 0.225f; + const double A = 16 * std::sqrt(P); + const double B = (1 - P) / std::sqrt(P); + double y = x / (2 * M_PI)+0.25; // pi/2. shift + y = y - std::floor(y + 0.5); // y in range -0.5..0.5 + y = A * y * (0.5 - std::abs(y)); + return y * (B + std::abs(y)); +} + +//! fast complex sine calculation +inline complex_t FastSin(const complex_t &x) { + // sin(a+bi) = sin(a)cosh(b) + i*cos(a)*sinh(b); + //return complex_t( FastSin(x.real())*std::cosh(x.imag()), FastCos(x.real())*std::sinh(x.imag())); + return complex_t( std::sin(x.real())*std::cosh(x.imag()), std::cos(x.real())*std::sinh(x.imag())); +} + +//! fast complex cosine calculation +inline complex_t FastCos(const complex_t &x) { + // cos(a+bi) = cos(a)cosh(b) - i*sin(a)*sinh(b); + //return complex_t( FastCos(x.real())*std::cosh(x.imag()), -1*FastSin(x.real())*std::sinh(x.imag())); + return complex_t( std::cos(x.real())*std::cosh(x.imag()), -1*std::sin(x.real())*std::sinh(x.imag())); +} + +//! simultaneous complex sine and cosine calculations +inline void FastSinCos(const complex_t &x, complex_t &xsin, complex_t &xcos) +{ + double sina = FastSin(x.real()); + double cosa = std::sqrt(1-sina*sina); + double sinhb = std::sinh(x.imag()); + double coshb = std::sqrt(1-sinhb*sinhb); + xsin.real() =sina*coshb; + xsin.imag() =cosa*sinhb; + xcos.real() =cosa*coshb; + xcos.imag() =-sina*sinhb; +} + } // Namespace MathFunctions inline double MathFunctions::GenerateNormalRandom(double average, double std_dev) @@ -95,4 +145,7 @@ inline complex_t MathFunctions::Laue(complex_t value, size_t N) // Exp(iNx/2)*Si return std::exp(complex_t(0.0, 1.0)*value*(double)N/2.0)*std::sin(value*(N+1.0)/2.0)/std::sin(value/2.0); } + + + #endif // MATHFUNCTIONS_H diff --git a/Core/Tools/inc/Units.h b/Core/Tools/inc/Units.h index b0220d494a0ca7ba1af500cac69d807118ccebc9..989de1c6dab69aef0a317059b841d3f79671d27a 100644 --- a/Core/Tools/inc/Units.h +++ b/Core/Tools/inc/Units.h @@ -43,6 +43,11 @@ static const double mrad = milliradian; static const double sr = steradian; static const double deg = degree; +// definitions of Pi for fast sine calculations +static const double PI = 3.14159265358979323846264338327950288f; +static const double PI2 = 6.28318530717958647692528676655900577f; +static const double PID2 = 1.57079632679489661923132169163975144f; +static const double PI_SQR = 9.86960440108935861883449099987615114f; } diff --git a/Examples/Performance/perf_history.txt b/Examples/Performance/perf_history.txt index ed5cf33c9f57635a19136eed3470fb1b874659ef..c7d4cf9a2e061286a718891e00853aa95d1e5dd3 100644 --- a/Examples/Performance/perf_history.txt +++ b/Examples/Performance/perf_history.txt @@ -82,3 +82,39 @@ 2012-09-18 13:54:59 | jcnsopc73 | macosx64, 2800 MHz | 80321.3 | 4.70588 | 4.04858 | 0.90497 | 2012-09-20 17:29:44 | jcnsopc73 | macosx64, 2800 MHz | 82644.6 | 4.77327 | 4.29185 | 0.06997 | + +# state of the art (coherence, incoherence), threads=-1 +# debug=off +2012-09-25 14:55:19 | surfer-30- | macosx64, 2800 MHz | 263158 | 15.0376 | 14.7059 | 0.33444 | + +# playing with compilere keys +# debug=off -ffast-math +2012-09-25 16:50:37 | surfer-30- | macosx64, 2800 MHz | 270270 | 18.3486 | 18.1818 | 0.36231 | +# debug=off -ffast-math -o3 -msse3 +2012-09-25 16:54:40 | surfer-30- | macosx64, 2800 MHz | 270270 | 18.5185 | 18.3486 | 0.36363 | +# debug=off -ffast-math -o3 (that's out choice) +2012-09-25 16:57:30 | surfer-30- | macosx64, 2800 MHz | 256410 | 18.3486 | 18.018 | 0.36496 | +# debug=off -o3 +2012-09-25 17:00:26 | surfer-30- | macosx64, 2800 MHz | 270270 | 15.1515 | 14.7059 | 0.33898 | + +# current status of the art (debug=off); +2012-09-26 12:07:01 | jcnsops73 | macosx64, 2800 MHz | 273973 | 18.6916 | 18.3486 | 0.36429 | + +# Fast complex sin and cos in FullSphere ff +2012-09-27 10:40:50 | jcnsopc73 | macosx64, 2800 MHz | 270270 | 18.018 | 18.1818 | 0.47281 | +2012-09-27 10:41:25 | jcnsopc73 | macosx64, 2800 MHz | 256410 | 17.6991 | 16.6667 | 0.42105 | +2012-09-27 10:41:41 | jcnsopc73 | macosx64, 2800 MHz | 263158 | 18.018 | 17.8571 | 0.46511 | + +# normal std sin and cos +2012-09-27 10:42:17 | jcnsopc73 | macosx64, 2800 MHz | 263158 | 17.094 | 17.2414 | 0.33613 | +2012-09-27 10:42:29 | jcnsopc73 | macosx64, 2800 MHz | 256410 | 17.5439 | 17.5439 | 0.34305 | + +# simultaneous calculation of sin and cos +2012-09-27 10:59:06 | jcnsopc73 | macosx64, 2800 MHz | 277778 | 18.1818 | 17.5439 | 0.48309 | +2012-09-27 10:59:25 | jcnsopc73 | macosx64, 2800 MHz | 277778 | 18.6916 | 18.3486 | 0.48661 | + +# Fast complex sin and cos which use std::sin +2012-09-27 11:07:29 | jcnsopc73 | macosx64, 2800 MHz | 273973 | 18.1818 | 18.018 | 0.4914 | +2012-09-27 11:07:52 | jcnsopc73 | macosx64, 2800 MHz | 273973 | 18.5185 | 18.1818 | 0.49019 | +2012-09-27 11:08:49 | jcnsopc73 | macosx64, 2800 MHz | 256410 | 18.018 | 18.1818 | 0.35778 | +2012-09-27 11:08:59 | jcnsopc73 | macosx64, 2800 MHz | 273973 | 18.3486 | 18.018 | 0.35906 | diff --git a/Macros/GitUtils/gisasfw_loc.png b/Macros/GitUtils/gisasfw_loc.png index be444d5c123b046c5904f0c818f68fbf52df73fc..22ca4636170a52471e96e72c5132f39a6b40c7e1 100644 Binary files a/Macros/GitUtils/gisasfw_loc.png and b/Macros/GitUtils/gisasfw_loc.png differ diff --git a/shared.pri b/shared.pri index 036bd62047cf9f5f1df1d7f413a447b4c7400176..0e764cbd38111cc52f933063e12e82721aa0c3b9 100644 --- a/shared.pri +++ b/shared.pri @@ -87,10 +87,10 @@ NumberOfSuchFiles=$$system(ls $${GENERAL_EXTERNALS_DIR}/lib/libboost_thread-mt* # ----------------------------------------------------------------------------- # optimization flag used in release builds (the -O2 is the default used by qmake) -# QMAKE_CXXFLAGS_RELEASE -= -O2 -# QMAKE_CXXFLAGS_RELEASE += -O3 -QMAKE_CXXFLAGS_RELEASE += -fdiagnostics-show-option # to find out in gcc which option control warning - +QMAKE_CXXFLAGS_DEBUG += -fdiagnostics-show-option # to find out in gcc which option control warning +#QMAKE_CXXFLAGS_RELEASE += -O3 -ffast-math -msse3 +QMAKE_CXXFLAGS_RELEASE -= -O2 +QMAKE_CXXFLAGS_RELEASE += -O3 -ffast-math # uncommenting line below produces non-stripped (very large) libraries #QMAKE_STRIP=: