he following is a prototype for the main class.
class
Poly {
public:
typedef
CoeffType::type real;
private:
class
Impl;
boost::shared_ptr<Impl>
theImpl;
public:
Poly();
Poly(
const Poly& x );
Poly&
operator=( const Poly& x );
Poly(
const boost::python::object& params, const boost::python::object&
coeffs );
Poly(
const Piece& spt, int degree );
Poly(
int d, int a, int b, int degree );
Poly(
const Piece& spt, int degree, ots::cuda::memory::Host2D<real> coeffs
);
Poly(
int d, int a, int b, int degree, ots::cuda::memory::Host2D<real> coeffs
);
real
operator() ( real x ) const;
boost::python::object
batch( const boost::python::object& x ) const;
std::string
toString() const;
Piece
support() const;
int
degree() const;
Poly
upscale( unsigned deltaD ) const;
Poly
segment( const Piece& p ) const;
std::size_t
len() const;
Poly
mult( real x ) const;
Poly
mult( const Poly& x ) const;
Poly
add( const Poly& x ) const;
Poly
sub( const Poly& x ) const;
real
integral() const;
real
integral( const Piece& p ) const;
Poly
pow( unsigned n ) const;
Poly
T( int k ) const;
Poly
S( unsigned d ) const;
Poly
Op( unsigned d, int k ) const;
Poly
convolution( const Poly& x ) const;
Poly
D() const;
boost::python::object
description() const;
Poly
Cr() const;
};
This class is functional in the Regular compiler scope and was exported in
Python shell. The function "batch" performs parallel calculation of several
values given by a Python list. The function "upscale" returns the same
function but on a finer mesh. The function "segment" restricts to a given
interval. The T,S are operations from the section
(
Wavelet analysis
) and the Op is the
combination of the two. The D is the differentiation. The Cr is the
crudification operator, see the definition
(
Crudification operator
). The rest is
self-explanatory.
Implementation of every function is similar. We consider the function "add" as
an example. The following is the part of the poly.cpp (Regular compiler
scope).
class
Poly::Impl : boost::noncopyable
{
ots::cuda::memory::Device<int>
theParams;
ots::cuda::memory::Host<int>
theParams2; //kept synchronized with theParams
ots::cuda::memory::Device2D<real>
theCoeffs;
static
const int theParamSize=5;
int
getD() const { return theParams2.get(0); }
int
getA() const { return theParams2.get(1); }
int
getB() const { return theParams2.get(2); }
int
getDegree() const { return theParams2.get(3); }
int
getScale() const { return theParams2.get(4); }
...
Poly
add( const Poly& x ) const
{
Piece
spt=support();
Piece
spt2=x.support();
if(
spt2.d()>spt.d() ) return upscale(spt2.d()-spt.d()).add(x);
Poly
xx;
if(
spt2.d()<spt.d() ) xx=x.upscale(spt.d()-spt2.d()); else xx=x;
Poly
r(support().Union(xx.support()),std::max(getDegree(),x.degree()));
impl::add::act(
theParams.h(),
theCoeffs.h(),
xx.theImpl->theParams.h(),
xx.theImpl->theCoeffs.h(),
r.theImpl->theParams.h(),
r.theImpl->theCoeffs.h()
);
return
r;
}
...
};
Poly
Poly::add( const Poly& x ) const { return theImpl->add(x); }
The function impl::add::act is implemented in the Host compiler scope as
follows.
void
act(
indexes
params1, reals2 coeffs1,
indexes
params2, reals2 coeffs2,
indexes
paramsR, reals2 coeffsR
)
{
if(
coeffsR.rows()>0 )
{
checkPolyData::act(params1,coeffs1);
checkPolyData::act(params2,coeffs2);
checkPolyData::act(paramsR,coeffsR);
check(
de::isOk(), "prior device error" );
sy::deviceWait();
act_<<<
kernel::first(coeffsR.rows()),
kernel::second(),
kernel::third()
>>>
(params1,coeffs1,params2,coeffs2,paramsR,coeffsR);
sy::deviceWait();
check(
de::isOk(), "device error generated" );
}
}
Finally, the function impl::add::act_ does the actual work (Device compiler
scope).
__global__
void act_(
indexes
params1_, reals2 coeffs1_,
indexes
params2_, reals2 coeffs2_,
indexes
paramsR_, reals2 coeffsR_
)
{
Indexes
params1(params1_);
Reals2
coeffs1(coeffs1_);
Indexes
params2(params2_);
Reals2
coeffs2(coeffs2_);
Indexes
paramsR(paramsR_);
Reals2
coeffsR(coeffsR_);
index
th=kernel::threadId();
if(
!coeffsR.isValidRowIndex(th) ) return;
Reals
_R=coeffsR.getRow(th);
_R.assign(0);
index
a1=polyParams::a(params1);
index
a2=polyParams::a(params2);
index
aR=polyParams::a(paramsR);
index
loc=aR+th;
index
th1=loc-a1;
index
th2=loc-a2;
if(
coeffs1.isValidRowIndex(th1) )
{
Reals
src=coeffs1.getRow(th1);
index
sz=min(_R.size(),src.size());
for(
index p=0; p<sz; ++p )
_R.put(p,_R.get(p)+src.get(p));
}
if(
coeffs2.isValidRowIndex(th2) )
{
Reals
src=coeffs2.getRow(th2);
index
sz=min(_R.size(),src.size());
for(
index p=0; p<sz; ++p )
_R.put(p,_R.get(p)+src.get(p));
}
}
|