Index: lib/mra/convolution1d.h =================================================================== --- lib/mra/convolution1d.h (revision 1177) +++ lib/mra/convolution1d.h (working copy) @@ -8,6 +8,8 @@ #include #include + + namespace madness { void aligned_add(long n, double* restrict a, const double* restrict b); @@ -424,9 +426,17 @@ const double expnt; const Level natlev; +#ifdef PDT_PARSER +#define log(x) x +#endif + + GaussianConvolution1D(int k, Q coeff, double expnt, double sign=1.0) - : Convolution1D(k,k+11,sign), coeff(coeff), expnt(expnt), natlev(0.5*log(expnt)/log(2)+1) {} + : Convolution1D(k,k+11,sign), coeff(coeff), expnt(expnt), natlev(0.5*log(expnt)/log(2)+1) {} +#ifdef PDT_PARSER +#undef log +#endif virtual ~GaussianConvolution1D() {} Level natural_level() const { Index: lib/mra/startup.cc =================================================================== --- lib/mra/startup.cc (revision 1177) +++ lib/mra/startup.cc (working copy) @@ -102,8 +102,8 @@ load_quadrature(world, data_dir); // This to init static data while single threaded - double djunk; - legendre_scaling_functions(0.0,0,&djunk); + double djunk[2]; + legendre_scaling_functions(0.0,0,djunk); if (world.rank() == 0) print("testing coeffs, etc."); Index: lib/mra/key.h =================================================================== --- lib/mra/key.h (revision 1177) +++ lib/mra/key.h (working copy) @@ -284,10 +284,16 @@ /// Assumes key and this are at the same level bool is_neighbor_of(const Key& key) const { - for (int i=0; i 1) return false; - } - return true; + for (int i=0; i 1) { +#endif + return false; + } + } + return true; } }; Index: lib/world/worldhashmap.h =================================================================== --- lib/world/worldhashmap.h (revision 1177) +++ lib/world/worldhashmap.h (working copy) @@ -228,6 +228,10 @@ class HashAccessor : NO_DEFAULTS { template friend class ConcurrentHashMap; private: + /* TAU */ +#ifdef PDT_PARSER + public: +#endif entryT* entry; bool gotlock; Index: lib/world/worldprofile.h =================================================================== --- lib/world/worldprofile.h (revision 1177) +++ lib/world/worldprofile.h (working copy) @@ -13,6 +13,12 @@ #define __thread #endif +/* TAU */ +#ifdef PDT_PARSER +#define __thread +#endif +/* TAU */ + namespace madness { /// Simple container for parallel profile statistic Index: lib/world/worldthread.h =================================================================== --- lib/world/worldthread.h (revision 1177) +++ lib/world/worldthread.h (working copy) @@ -1,6 +1,10 @@ #ifndef MAD_WORLDTHREAD_H #define MAD_WORLDTHREAD_H +//#ifdef TAU_ENABLED +#include +//#endif + /// \file worldthread.h /// \brief Implements Dqueue, Thread, ThreadBase and ThreadPool @@ -274,6 +278,10 @@ static int cpuhi[3]; static void* main(void* self) { + printf ("ThreadBase::main!!!\n"); +#ifdef TAU_ENABLED + TAU_START("ThreadBase::main"); +#endif #ifdef HAVE_PAPI begin_papi_measurement(); #endif @@ -312,6 +320,9 @@ #ifdef HAVE_PAPI end_papi_measurement(); #endif +#ifdef TAU_ENABLED + TAU_STOP("ThreadBase::main"); +#endif return 0; } Index: lib/world/madatomic.h =================================================================== --- lib/world/madatomic.h (revision 1177) +++ lib/world/madatomic.h (working copy) @@ -67,13 +67,13 @@ #define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) #if GCC_VERSION < 30402 -error GCC older than 3.4.3 does not seem to have working atomic operations +//error GCC older than 3.4.3 does not seem to have working atomic operations #endif // version 4.* up seems to have switched to ext directory -//#include -#include +#include +//#include typedef volatile int MADATOMIC_INT; #define MADATOMIC_FENCE Index: lib/world/worldtime.h =================================================================== --- lib/world/worldtime.h (revision 1177) +++ lib/world/worldtime.h (working copy) @@ -75,7 +75,10 @@ __asm__ volatile(".byte 0x0f, 0x31" : "=A"(x)); #elif defined(X86_64) unsigned int a,d; + +#ifndef PDT_PARSER /* TAU */ __asm__ volatile("rdtsc" : "=a"(a), "=d"(d)); +#endif /* TAU */ x = ((uint64_t)a) | (((uint64_t)d)<<32); #else x = wall_time()*1e9; Index: lib/tensor/mtxmq.cc =================================================================== --- lib/tensor/mtxmq.cc (revision 1177) +++ lib/tensor/mtxmq.cc (working copy) @@ -336,6 +336,8 @@ #define NEXT(loop) "sub $1,%%r11; jnz "#loop";" #define INCB "add $16,%%r8;\n" +#ifndef PDT_PARSER + const long jtile = 12; const double_complex* asave = a; for (long jlo=0; jlo>2)<<2; long rem = n-n4; +#ifndef PDT_PARSER if (n4) { //std::cout << "entering asm " << (void *) a << " " << n4 << std::endl; __asm__ __volatile__( @@ -24,6 +25,7 @@ //std::cout << "leaving asm " << (void *) a << " " << n4 << std::endl; a+=n4; } +#endif for (long i=0; i>2)<<2; long rem = n-n4; if (n4) { +#ifndef PDT_PARSER #if ( (!defined(ON_A_MAC)) && (defined(X86_32) || defined(X86_64)) ) // On core-2 this will give 2 cycles/element - optimal is 1.5 __asm__ __volatile__( @@ -52,6 +55,7 @@ a[3] += b[3]; } #endif +#endif /* PDT_PARSER */ } for (long i=0; i>2)<<2; long rem = n-n4; if (n4) { +#ifndef PDT_PARSER #if ( (!defined(ON_A_MAC)) && (defined(X86_32) || defined(X86_64)) ) // On core-2 this will give 2 cycles/element - optimal is 1.5 __asm__ __volatile__( @@ -110,6 +115,7 @@ a[3] -= b[3]; } #endif +#endif /* PDT_PARSER */ } for (long i=0; i Tensor& Tensor::screen(double x) { T zero = 0; +#ifndef PDT_PARSER UNARY_OPTIMIZED_ITERATOR(T,(*this), if (std::abs(*_p0) typename Tensor::scalar_type Tensor::absmin(long* ind) const { +#ifdef PDT_PARSER + scalar_type result = 0; +#else scalar_type result = std::abs(*(this->pointer)); +#endif if (ind) { for (long i=0; i absval) { @@ -642,19 +650,28 @@ ind[nd] = _j; } ); +#endif + } else { +#ifndef PDT_PARSER UNARY_OPTIMIZED_ITERATOR(T,(*this),result=std::min(result,std::abs(*_p0))); +#endif } return result; } template typename Tensor::scalar_type Tensor::absmax(long* ind) const { +#ifdef PDT_PARSER + scalar_type result = 0; +#else scalar_type result = std::abs(*(this->pointer)); +#endif if (ind) { for (long i=0; i(result,std::abs(*_p0))); +#endif } return result; } @@ -1188,7 +1208,9 @@ Tensor< typename Tensor::scalar_type > abs(const Tensor& t) { typedef typename Tensor::scalar_type scalar_type; Tensor result(t.ndim,t.dim,false); +#ifndef PDT_PARSER BINARY_OPTIMIZED_ITERATOR(scalar_type,result,T,t,*_p0 = std::abs(*_p1)); +#endif return result; }