thrust copy_if with const source - cuda

My problem is in the following code:
The filter function compiles, and runs as it should when the source is not constant (the iterators are adjusted accordingly). However when I change the source to const, the compiler gives me the following error for the first two variables of the copy_if statement:
"the object has type qualifiers that are not compatible with the member function".
I believe there is a const to not const conversion error somewhere but frankly I have no idea where. Any help would be appreciated.
#include "thrust\device_vector.h"
#include "thrust\copy.h"
typedef thrust::device_vector<float>::const_iterator Dc_FloatIterator;
typedef thrust::device_vector<float>::iterator D_FloatIterator;
typedef thrust::device_vector<int>::const_iterator Dc_IntIterator;
typedef thrust::device_vector<int>::iterator D_IntIterator;
typedef thrust::tuple< Dc_IntIterator, Dc_IntIterator, Dc_FloatIterator> Dc_ListIteratorTuple;
typedef thrust::zip_iterator<Dc_ListIteratorTuple> Dc_ListIterator;//type of the class const iterator
typedef thrust::tuple< D_IntIterator, D_IntIterator, D_FloatIterator > D_ListIteratorTuple;
typedef thrust::zip_iterator<D_ListIteratorTuple> D_ListIterator;//type of the class iterator
struct selector{//selector functor for the copy if call
const int val;
selector(int _val) : val(_val) {}
__host__ __device__
bool operator()(const int& x ) {
return ( x == val );
}
};
class Foo{
public:
thrust::device_vector<int> ivec1;
thrust::device_vector<int> ivec2;
thrust::device_vector<float> fvec1;
Foo(){;}
~Foo(){;}
D_ListIterator begin(){//cast of begin iterator
return D_ListIterator(D_ListIteratorTuple( ivec1.begin(), ivec2.begin(), fvec1.begin() ));
}
D_ListIterator end(){//cast of end iterator
return D_ListIterator(D_ListIteratorTuple( ivec1.end(), ivec2.end(), fvec1.end() ));
}
Dc_ListIterator cbegin(){//cast of const begin iterator
return Dc_ListIterator(Dc_ListIteratorTuple( ivec1.cbegin(), ivec2.cbegin(), fvec1.cbegin() ));
}
Dc_ListIterator cend(){//cast of const end iterator
return Dc_ListIterator(Dc_ListIteratorTuple( ivec1.cend(), ivec2.cend(), fvec1.cend() ));
}
void const_filter( const Foo& TheOther, const int& target ){//doesnt work
//This function should copy those member of the vectors where
//the ivec2[i] == target is true
thrust::copy_if(
TheOther.cbegin(),
TheOther.cend(),
TheOther.ivec2.cbegin(),
this->begin(),
selector(target) );
}
void filter( Foo& TheOther, const int& target ){//works
//This function should copy those member of the vectors where
//the ivec2[i] == target is true
thrust::copy_if(
TheOther.begin(),
TheOther.end(),
TheOther.ivec2.cbegin(),
this->begin(),
selector(target) );
}
void insert(const int& one, const int& two,const float& three ){
ivec1.push_back(one);
ivec2.push_back(two);
fvec1.push_back(three);
}
int size(){
return ivec1.size();
}
};
bool CheckIfSublistIsConnected(const Foo& list,const int& sublist_num){
Foo tmp;
tmp.const_filter( list, sublist_num );
return (bool)tmp.size();//for symplicity, othervise here is a function that check if
//the edge list represents a connected graph
}
int main(void){
Foo list;
bool connected;
list.insert(10,2,1.0);
list.insert(11,2,1.0);
list.insert(12,2,1.0);
list.insert(10,3,1.0);
list.insert(10,3,1.0);
connected=CheckIfSublistIsConnected(list,2);
if( connected ) return 0;
else return -1;
}
I've found that replacing TheOther.cbegin() / .cend() with the folowing the compiler accepts it. This means I messed up somewhere in the typedef section, but where?
thrust::make_zip_iterator(
thrust::make_tuple(
TheOther.ivec1.cbegin(),
TheOther.ivec2.cbegin(),
TheOther.fvec1.cbegin() ))

As it comes out I've frogotten to add the const magic word at the definition of cend/cbegin.
Dc_ListIterator cbegin() const {
return Dc_ListIterator(Dc_ListIteratorTuple( ivec1.cbegin(), ivec2.cbegin(), fvec1.cbegin() ));
}
Dc_ListIterator cend() const {
return Dc_ListIterator(Dc_ListIteratorTuple( ivec1.cend(), ivec2.cend(), fvec1.cend() ));
}

Related

Use member function as template argument to create a static wrapper

I'm trying to write a c++11 wrapper around a C API, and basically there is a way to register notifications with a static function pointer, which also passes me back an "opaque" pointer, which are provided at a later point, basically a pointer to classes I create, in this example the class foo. Basically, I'm trying to create a static function `helper<..>::call that has the API's signature, but generates code to call my member function on the instance that the c++ wrapper created, and is passed in through an "opaque" pointer along with it. This static function then also converts the arguments when finally calling the member function.
I seem to have this almost working, but I'm having trouble creating a "nicer" public function register_handler in this example, which hides the "uglier" internals. This is the error I'm getting:
test.cpp:154:37: error: no matching function for call to ‘register_handler<&foo::bar>(const char [6])’
154 | register_handler<&foo::bar>("test2"); // <-- trying to wrap it into a function so I can use only one template argument
| ^
test.cpp:137:6: note: candidate: ‘template<class T, class R, class ... Args, R (T::* Func)(Args ...)> void register_handler(const char*)’
137 | void register_handler(const char* name)
| ^~~~~~~~~~~~~~~~
This is my test code:
#include <iostream>
#include <memory>
#include <vector>
#include <map>
#include <cassert>
// inspired by https://stackoverflow.com/a/7943765/2129246
template <typename T>
struct func_traits:
public func_traits<decltype(&T::operator())>
{
};
template <typename R, typename... Args>
struct func_traits<R(*)(Args...)>
{
enum { arity = sizeof...(Args) };
typedef R result_type;
using all_args = std::tuple<Args...>;
template <size_t i>
struct arg
{
typedef typename std::tuple_element<i, std::tuple<Args...>>::type type;
};
};
template <typename C, typename R, typename... Args>
struct func_traits<R(C::*)(Args...) const>
{
enum { arity = sizeof...(Args) };
typedef C class_type;
typedef R result_type;
using all_args = std::tuple<Args...>;
template <size_t i>
struct arg
{
typedef typename std::tuple_element<i, std::tuple<Args...>>::type type;
};
};
template< std::size_t... Ns >
struct indices {
typedef indices< Ns..., sizeof...( Ns ) > next;
};
template< std::size_t N >
struct make_indices {
typedef typename make_indices< N - 1 >::type::next type;
};
template<>
struct make_indices< 0 > {
typedef indices<> type;
};
struct value
{
std::string str_;
template <typename T>
value(T val):
str_(std::to_string(val))
{
}
value(const char* str):
str_(str)
{
}
value(const std::string& str):
str_(str)
{
}
operator int() const
{
return std::stoi(str_);
}
operator double() const
{
return std::stof(str_);
}
operator std::string() const
{
return str_;
}
};
std::map<std::string, void(*)(void*, const std::vector<value>&)> g_handlers;
template <typename T, T>
struct helper;
template <typename T, typename R, typename... Args, R(T::*Func)(Args...)>
struct helper<R(T::*)(Args...), Func>
{
template <size_t... Is>
static void expand(T* obj, const std::vector<value>& args, indices<Is...>)
{
assert(sizeof...(Is) <= args.size());
(obj->*Func)((args[Is])...);
}
static void call(void *p, const std::vector<value>& args)
{
T* obj = reinterpret_cast<T*>(p);
expand(obj, args, typename make_indices<sizeof...(Args)>::type());
}
static void reg_handler(const char* name)
{
g_handlers.insert(std::make_pair(name, call));
};
};
template <typename Obj>
void call_handler(Obj& obj, const char* name, const std::vector<value>& args)
{
auto it = g_handlers.find(name);
if (it != g_handlers.end())
it->second(reinterpret_cast<void*>(&obj), args);
else
std::cout << "handler not registered: " << name << std::endl;
}
// The code below somehow doesn't ever match this template
template <typename T, typename R, typename... Args, R(T::*Func)(Args...)>
void register_handler(const char* name)
{
helper<R(T::*)(Args...), Func>::reg_handler(name);
}
struct foo
{
void bar(int v, const std::string& str, double f)
{
std::cout << "bar: v=" << v << " str=" << str << " f=" << f << std::endl;
};
};
int main()
{
// register member function handlers before we have any instances
helper<decltype(&foo::bar), &foo::bar>::reg_handler("test"); // <-- works, but "ugly" and exposes internal implementation
register_handler<&foo::bar>("test2"); // <-- trying to wrap it into a function so I can use only one template argument
// now we have an instance
foo f;
// call the previously registered handler
call_handler(f, "test", {1, "2", 3.45});
call_handler(f, "test2", {1, "2", 3.45});
return 0;
}
The simple answer for C++11 is: You can't!
From C++17 you are able to use auto also for non type template parameters as a function pointer or member function pointer is not a type here and you have no syntax to describe your function pointer type.
In C++17 you can use it like this:
struct foo
{
void bar(){}
};
template <typename T, T>
struct helper;
template <typename T, typename R, typename... Args, R(T::*Func)(Args...)>
struct helper<R(T::*)(Args...), Func>
{
static void reg_handler(const char* name)
{
// ... here your code continues
}
};
template < auto T >
struct X
{
};
template <typename T, typename R, typename... Args, R(T::*Func)(Args...)>
struct X<Func>
{
static void register_handler( const char* name )
{
helper<R(T::*)(Args...), Func>::reg_handler(name);
}
};
int main()
{
X<&foo::bar>::register_handler("check");
}

Passing Host Function as a function pointer in __global__ OR __device__ function in CUDA

I am currently developing a GPU version of a CPU function
(e.g. function Calc(int a, int b, double* c, souble* d, CalcInvFunction GetInv )), in which a host function is passes as a function pointer(e.g. in above example GetInv is the host function of CalcInvFunction type). My question is, if i have to put Calc() function entirely in GPU, i have to pass the GetInv function as a function pointer argument in device function/kernel function, and is that possible?
Yes, for a GPU implementation of Calc, you should pass the GetInv as a __device__ function pointer.
It is possible, here are some worked examples:
Ex. 1
Ex. 2
Ex. 3
Most of the above examples demonstrate bringing the device function pointer all the way back to the host code. This may not be necessary for your particular case. But it should be fairly obvious from above how to grab a __device__ function pointer (in device code) and use it in a kernel.
Finally, i have been able to pass a host function as a function pointer in cuda kernel function (__global__ function). Thanks to Robert Crovella and njuffa for the answer. I have been able to pass a class member function(cpu function) as a function pointer to a cuda kernel. But, the main problem is, i can only pass the static class member function. I am not being able to pass the function not declared as static.
For Example:
/**/
__host__ __device__
static int
CellfunPtr(
void*ptr, int a
);
/**/
The above function work because this member function is declared as static member function. If i do not declare this member function as a static member as ,
/**/
__host__ __device__
int
CellfunPtr(
void*ptr, int a
);
/**/
then it doesnt work.
The complete code has four files.
First file
/*start of fundef.h file*/
typedef int (*pFunc_t)(void* ptr, int N);
/*end of fundef.h file*/
Second file
/*start of solver.h file*/
class CalcVars {
int eqnCount;
int numCell;
int numTri;
int numTet;
public:
double* cellVel;
double* cellPre;
/** Constructor */
CalcVars(
const int eqnCount_,
const int numCell_,
const int numTri_,
const int numTet_
);
/** Destructor */
~CalcVars(void);
public:
void
CalcAdv();
__host__ __device__
static int
CellfunPtr(
void*ptr, int a
);
};
/*end of solver.h file*/
Third file
/*start of solver.cu file*/
#include "solver.h"
__device__ pFunc_t pF1_d = CalcVars::CellfunPtr;
pFunc_t pF1_h ;
__global__ void kernel(int*a, pFunc_t func, void* thisPtr_){
int tid = threadIdx.x;
a[tid] = (*func)(thisPtr_, a[tid]);
};
/* Constructor */
CalcVars::CalcVars(
const int eqnCount_,
const int numCell_,
const int numTri_,
const int numTet_
)
{
this->eqnCount = eqnCount_;
this->numCell = numCell_;
this->numTri = numTri_;
this->cellVel = (double*) calloc((size_t) eqnCount, sizeof(double));
this->cellPre = (double*) calloc((size_t) eqnCount, sizeof(double));
}
/* Destructor */
CalcVars::~CalcVars(void)
{
free(this->cellVel);
free(this->cellPre);
}
void
CalcVars::CalcAdv(
){
/*int b1 = 0;
b1 = CellfunPtr(this, 1);*/
int Num = 50;
int *a1, *a1_dev;
a1 = (int *)malloc(Num*sizeof(int));
cudaMalloc((void**)&a1_dev, Num*sizeof(int));
for(int i = 0; i <Num; i++){
a1[i] = i;
}
cudaMemcpy(a1_dev, a1, Num*sizeof(int), cudaMemcpyHostToDevice);
//copy addresses of device functions to host
cudaMemcpyFromSymbol(&pF1_h, pF1_d, sizeof(pFunc_t));
kernel<<<1,42>>>(a1_dev, pF1_h, this);
cudaDeviceSynchronize();
cudaMemcpy(a1, a1_dev, Num*sizeof(int), cudaMemcpyDeviceToHost);
};
int
CalcVars::CellfunPtr(
void* ptr, int a
){
//CalcVars* ClsPtr = (CalcVars*)ptr;
printf("Printing from CPU function\n");
//int eqn_size = ClsPtr->eqnCount;
//printf("The number is %d",eqn_size);
return a-1;
};
/*end of solver.cu file*/
Fourth file
/*start of main.cpp file*/
#include "solver.h"
int main(){
int n_Eqn, n_cell, n_tri, n_tetra;
n_Eqn = 100;
n_cell = 200;
n_tri = 300;
n_tetra = 400;
CalcVars* calcvars;
calcvars = new CalcVars(n_Eqn, n_cell, n_tri, n_tetra );
calcvars->CalcAdv();
system("pause");
}
/*end of main.cpp file*/

Using functors in CUDA

I have the following class functor in CUDA
class forSecondMax{
private:
int toExclude;
public:
__device__ void setToExclude(int val){
toExclude = val;
}
__device__ bool operator ()
(const DereferencedIteratorTuple& lhs, const DereferencedIteratorTuple& rhs)
{
using thrust::get;
//if you do <=, returns last occurence of largest element. < returns first
if (get<0>(lhs)== get<2>(lhs) /*&& get<0>(rhs) == get<2>(rhs)*/ && get<0>(lhs) != toExclude/* && get<0>(rhs)!= toExclude */) return get<1>(lhs) < get<1>(rhs); else
return true ;
}
};
is there a way to set the value of toExclude from the host?
All you need to do to solve achieve this is to define a constructor for the functor which sets the data member from an argument. So your class would look something like this:
class forSecondMax{
private:
int toExclude;
public:
__device__ __host__ forSecondMax(int x) : toExclude(x) {};
__device__ __host__ bool operator ()
(const DereferencedIteratorTuple& lhs,
const DereferencedIteratorTuple& rhs)
{
using thrust::get;
if (get<0>(lhs)== get<2>(lhs) && get<0>(lhs) != toExclude)
return get<1>(lhs) < get<1>(rhs);
else
return true ;
}
};
[disclaimer: written in browser, never tested or compiled, use at own risk]
To set the value prior to passing the functor to a thrust algorithm, create and instance of the functor and pass it to the thrust call, for example:
forSecondMax op(10);
thrust::remove_if(A.begin(), A.end(), op);
which would set the data member toExclude to a value of 10 in a new instance of the class, and use the instance in the stream compaction call.

use host function on device

How can I use a host function in a device one ?
For example in below function ,I want to return a value
__device__ float magnitude2( void ) {
return r * r + i * i;
}
But this function is a device function and I received this error :
calling a host function from a __device__/__global__ function is not allowed
What's the best approach for this problem ?
for extra comment on the code :
I want to define this struct :
struct cuComplex {
float r;
float i;
cuComplex( float a, float b ) : r(a), i(b) {}
__device__ float magnitude2( void ) {
return r * r + i * i;
}
__device__ cuComplex operator*(const cuComplex& a) {
return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
}
__device__ cuComplex operator+(const cuComplex& a) {
return cuComplex(r+a.r, i+a.i);
}
};
Now that we know the question involves a C++ structure, the answer is obvious - the constructor of the class must also be available as a __device__ function in order to be able to instantiate the class inside a kernel. In your example, the structure should be defined like this:
struct cuComplex {
float r;
float i;
__device__ __host__
cuComplex( float a, float b ) : r(a), i(b) {}
__device__
float magnitude2( void ) {
return r * r + i * i;
}
__device__
cuComplex operator*(const cuComplex& a) {
return cuComplex(r*a.r - i*a.i, i*a.r + r*a.i);
}
__device__
cuComplex operator+(const cuComplex& a) {
return cuComplex(r+a.r, i+a.i);
}
};
The error you are seeing arises because the constructor needs to be called whenever the class is instantiated. In your original code, the constructor is a declared only as a host function, leading to a compilation error.

change SWIG wrapper-function return value

I'm using SWIG to make C# bindings that are compatible with the compact framework (WinCE). I've got most of the immediate issues worked through, but my next blocker is that some of the functions return a double. Wrappers are generated but they fail at run time because the CF framework will not marshal non-integral datatypes (http://msdn.microsoft.com/en-us/library/aa446536.aspx)
My example failure is an attempt to wrap this function:
double getMaxMagnification() const
{
return m_maxMag;
}
SWIG generates this wrapper:
SWIGEXPORT double SWIGSTDCALL CSharp_LTIImageFilter_getMaxMagnification(void * jarg1) {
double jresult ;
LizardTech::LTIImageFilter *arg1 = (LizardTech::LTIImageFilter *) 0 ;
double result;
arg1 = (LizardTech::LTIImageFilter *)jarg1;
result = (double)((LizardTech::LTIImageFilter const *)arg1)->getMaxMagnification();
jresult = result;
return jresult;
}
which is NG because it requires marshalling a double return value.
I manually changed this to return the double via a passed-in pointer:
SWIGEXPORT void SWIGSTDCALL CSharp_LTIImageFilter_getMaxMagnification(void * jarg1, void *jarg2) {
fprintf(stderr, "CSharp_LTIImageFilter_getMaxMagnification\n");
//double jresult ;
LizardTech::LTIImageFilter *arg1 = (LizardTech::LTIImageFilter *) 0 ;
double result;
arg1 = (LizardTech::LTIImageFilter *)jarg1;
result = (double)((LizardTech::LTIImageFilter const *)arg1)->getMaxMagnification();
*((double*)jarg2) = result;
//jresult = result ;
//return jresult;
}
After making the corresponding changes in the C# declaration file and implementation class, this works as expected.
That is,
Interop Declaration
NG:
[DllImport("LizardTech_SdkInterop.dll", EntryPoint="CSharp_LTIImageFilter_getMaxMagnification")]
public static extern double LTIImageFilter_getMaxMagnification(IntPtr jarg1);
OK:
[DllImport("LizardTech_SdkInterop.dll", EntryPoint="CSharp_LTIImageFilter_getMaxMagnification")]
public static extern void LTIImageFilter_getMaxMagnification(IntPtr jarg1, ref double jarg2);
Implementation class
NG:
public override double getMaxMagnification() {
double ret = RasterSDKPINVOKE.LTIImageFilter_getMaxMagnification(swigCPtr);
return ret;
}
OK:
public override double getMaxMagnification() {
double ret = 0;
RasterSDKPINVOKE.LTIImageFilter_getMaxMagnification(swigCPtr, ref ret);
return ret;
}
How can I get SWIG to do this for me? I think the tasks are:
(a) change the return type of the wrapper function (only) from double to void
(b) add an argument (pointer to double) to the argument list so that the wrapper can send back the value that way
(c) make the interop declaration reflect the above two changes
(d) make the C# wrapper invoke the new wrapper function.
As always big-picture re-orientation is appreciated.
I'm indebted to David Piepgrass for this. It's not perfect but its good enough for me.
http://sourceforge.net/mailarchive/message.php?msg_id=26952332
////////////////////////////////////////////////////////////////////////////////
// Floating-point value marshalling for .NET Compact Framework:
// All floating-point values must be passed by reference. MULTITHREADING DANGER:
// For return values a pointer to a static variable is returned.
%define %cs_compact_framework_float(FLOAT)
%typemap(ctype, out="FLOAT*") FLOAT "FLOAT*"
%typemap(ctype, out="FLOAT*") FLOAT*, FLOAT&, const FLOAT& "FLOAT*"
%typemap(imtype, out="IntPtr") FLOAT, FLOAT*, FLOAT&, const FLOAT& "ref FLOAT"
%typemap(cstype, out="FLOAT") FLOAT, const FLOAT& "FLOAT"
%typemap(cstype, out="FLOAT") FLOAT*, FLOAT& "ref FLOAT"
%typemap(in) FLOAT %{ $1 = *$input; %}
%typemap(in) FLOAT*, FLOAT&, const FLOAT& %{ $1 = $input; %}
%typemap(out, null="NULL") FLOAT, FLOAT*, FLOAT&, const FLOAT& %{
// Not thread safe! FLOAT must be returned as a pointer in Compact Framework
static FLOAT out_temp;
out_temp = $1;
$result = &out_temp;
%}
%typemap(csin) FLOAT, const FLOAT& "ref $csinput"
%typemap(csin) FLOAT*, FLOAT& "ref $csinput"
%typemap(csout, excode=SWIGEXCODE) FLOAT, FLOAT*, FLOAT&, const FLOAT& {
IntPtr ptr = $imcall;$excode
FLOAT ret = (FLOAT)Marshal.PtrToStructure(ptr, typeof(FLOAT));
return ret;
}
%typemap(csvarout, excode=SWIGEXCODE2) FLOAT, FLOAT*, FLOAT&, const FLOAT&
%{
get {
IntPtr ptr = $imcall;$excode
FLOAT ret = (FLOAT)Marshal.PtrToStructure(ptr, typeof(FLOAT));
return ret;
}
%}
%enddef
%cs_compact_framework_float(float)
%cs_compact_framework_float(double)