A consistent example for using the C++ API of Pyarrow - pyarrow

I am trying to use the C++ API of Pyarrow. There is currently no example for it on the official documentation, and this is the best I am able to come up with for a simple thing:
#include <arrow/python/pyarrow.h>
#include <arrow/python/platform.h>
#include "arrow/python/init.h"
#include "arrow/python/datetime.h"
#include <iostream>
void MyFunction(PyObject * obj)
{
Py_Initialize();
std::cout << Py_IsInitialized() << std::endl;
int ret = arrow_init_numpy();
std::cout << ret << std::endl;
if (ret != 0) {
throw 0;
}
::arrow::py::internal::InitDatetime();
if(arrow::py::import_pyarrow() != 0)
{
std::cout << "problem initializing pyarrow" << std::endl;
throw 0;}
std::cout << "test" << std::endl;
Py_Finalize();
//return arrow::py::is_array(obj);
}
I am trying to compile it with
gcc -pthread -B /home/ziheng/anaconda3/envs/da/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O0 -Wall -Wstrict-prototypes -fPIC -D_GLIBCXX_USE_CXX11_ABI=0 -I/home/ziheng/anaconda3/envs/da/lib/python3.7/site-packages/numpy/core/include -I/home/ziheng/anaconda3/envs/da/lib/python3.7/site-packages/pyarrow/include -I/home/ziheng/anaconda3/envs/da/include/python3.7m -c example.cpp -o example.o -std=c++11
g++ -pthread -shared -fPIC -B /home/ziheng/anaconda3/envs/da/compiler_compat -L/home/ziheng/anaconda3/envs/da/lib -Wl,-rpath=/home/ziheng/anaconda3/envs/da/lib -Wl,--no-as-needed -Wl,--sysroot=/ example.o -L/home/ziheng/anaconda3/envs/da/lib/python3.7/site-packages/pyarrow -l:libarrow.so.600 -l:libarrow_python.so.600 -l:libpython3.7m.so -o example.cpython-37m-x86_64-linux-gnu.so
The compilation works with no problems. However when I try to use ctypes to call the compiled .so file, like this:
from ctypes import *
lib = CDLL('example.cpython-37m-x86_64-linux-gnu.so')
lib._Z10MyFunctionP7_object(1)
I get segmentation fault at arrow_init_numpy, after Py_IsInitialized() prints 1.
When I run it through gdb, I get/tmp/build/80754af9/python_1614362349910/work/Python/ceval.c: No such file or directory.
If I try to compile my C code as a standalone executable, however, it works with no problems.
Can someone please help? Thank you.

First, the call to Py_Initialize() is superfluous. You are calling your code from within python and so, presumably, python has already been initialized. That would be needed if you were writing your own main and not a plugin-type library. Correspondingly, the call to Py_Finalize() is probably a bad idea.
Second, and more significant for the error at hand, is that you are using ctypes.CDLL (and not, for example, ctypes.PyDLL) which states (emphasis mine):
The returned function prototype creates functions that use the standard C calling convention. The function will release the GIL during the call. If use_errno is set to true, the ctypes private copy of the system errno variable is exchanged with the real errno value before and after the call; use_last_error does the same for the Windows error code.
And, finally, the Arrow initialization routines assume you are holding the GIL (this should probably be added to the documentation). So the easiest way to fix your program is probably to change CDLL to PyDLL:
from ctypes import *
lib = PyDLL('example.cpython-37m-x86_64-linux-gnu.so')
lib._Z10MyFunctionP7_object(1)

Related

How do you get the stream associated with a thrust execution policy?

I want to be able to get the stream-id which is associated with an execution policy in thrust. I am trying to access this function.
I have tried this :
cudaStream_t stream = 0;
auto policy = thrust::cuda::par.on(stream);
cudaStream_t str = stream(policy);
but I am getting a compilation error :
stream.cu(7): error: expression preceding parentheses of apparent call must have (pointer-to-) function type
Could I get some ideas on how to do this?
"I am trying to access this function." Trying to directly use e.g. things in detail are part of the implementation and may change from one version to the next. To wit: the file you are referring to does not even exist in the the current thrust distributed with CUDA 10.
However, this seems to work for me:
$ cat t354.cu
#include <thrust/execution_policy.h>
#include <iostream>
#include <cstring>
int main(){
cudaStream_t mystream;
cudaStreamCreate(&mystream);
auto policy = thrust::cuda::par.on(mystream);
cudaStream_t str = stream(policy);
for (int i = 0; i < sizeof(cudaStream_t); i++)
if ( *(reinterpret_cast<unsigned char *>(&mystream)+i) != *(reinterpret_cast<unsigned char *>(&str)+i)) {std::cout << "mismatch" << std::endl; return -1;}
std::cout << "match" << std::endl;
}
$ nvcc -std=c++11 -o t354 t354.cu
$ cuda-memcheck ./t354
========= CUDA-MEMCHECK
match
========= ERROR SUMMARY: 0 errors
$

SWIG tcl : undefined symbol error for log4cpp wrapper

I am new in log4cpp and swig wrapper. I am trying to write an interface for simple logging using log4cpp.
I have installed log4cpp and swig in my Ubuntu machine.
log4cpp.cpp:
#include "log4cpp/Category.hh"
#include "log4cpp/Appender.hh"
#include "log4cpp/FileAppender.hh"
#include "log4cpp/OstreamAppender.hh"
#include "log4cpp/Layout.hh"
#include "log4cpp/BasicLayout.hh"
#include "log4cpp/Priority.hh"
#include "log4cpp.h"
void writeLog() {
log4cpp::Appender *appender1 = new log4cpp::OstreamAppender("console", &std::cout);
appender1->setLayout(new log4cpp::BasicLayout());
log4cpp::Appender *appender2 = new log4cpp::FileAppender("default", "program.log");
appender2->setLayout(new log4cpp::BasicLayout());
log4cpp::Category& root = log4cpp::Category::getRoot();
root.setPriority(log4cpp::Priority::WARN);
root.addAppender(appender1);
log4cpp::Category& sub1 = log4cpp::Category::getInstance(std::string("sub1"));
sub1.addAppender(appender2);
// use of functions for logging messages
root.error("root error");
root.info("root info");
sub1.error("sub1 error");
sub1.warn("sub1 warn");
// printf-style for logging variables
root.warn("%d + %d == %s ?", 1, 1, "two");
// use of streams for logging messages
root << log4cpp::Priority::ERROR << "Streamed root error";
root << log4cpp::Priority::INFO << "Streamed root info";
sub1 << log4cpp::Priority::ERROR << "Streamed sub1 error";
sub1 << log4cpp::Priority::WARN << "Streamed sub1 warn";
// or this way:
root.errorStream() << "Another streamed error";
}
log4cpp.h:
void writeLog(void);
log4cpp.i:
%module log4cpp
%{
#include "log4cpp.h"
%}
%inline %{
extern void writeLog(void);
%}
I have done following steps to generate log4cpp.so file:
swig -tcl -c++ log4cpp.i
g++ -c -fPIC log4cpp.cpp log4cpp_wrap.cxx -I/usr/include/tcl8.5
g++ -shared log4cpp.o log4cpp_wrap.o -o log4cpp.so
It generates the log4cpp_wrap.cxx, log4cpp.o, log4cpp_wrap.o and log4cpp.so files without any warning and error.
Whenever I am running the below command in tcl.
load ./log4cpp.so
It generates an undefined symbol error:
% load ./log4cpp.so
couldn't load file "./log4cpp.so": ./log4cpp.so: undefined symbol: _ZN7log4cpp8Appender29AppenderMapStorageInitializerD1Ev
What to do that to remove this error?
You need to link your SWIG shared library to log4cxx like you would with any other C++ application that uses this library. So when you call
g++ -shared log4cpp.o log4cpp_wrap.o -o log4cpp.so
It really needs to be something like this (but adapted to have real library and search path)
g++ -shared log4cpp.o log4cpp_wrap.o -L/path/to/your/install/of/log4cxx -llog4cxx -o log4cpp.so

Determining which gencode (compute_, arch_) values I need for nvcc - within CMake

I'm using CMake as a build system for my code, which involves CUDA. I was thinking of automating the task of deciding which compute_XX and arch_XX I need to to pass to my nvcc in order to compile for the GPU(s) on my current machine.
Is there a way to do this:
With the NVIDIA GPU deployment kit?
Without the NVIDIA GPU deployment kit?
Does CMake's FindCUDA help you in determining the values for these switches?
My strategy has been to compile and run a bash script that probes the card and returns the gencode for cmake. Inspiration came from University of Chicago's SLURM. To handle errors or multiple gpus or other circumstances, modify as necessary.
In your project folder create a file cudaComputeVersion.bash and ensure it is executable from the shell. Into this file put:
#!/bin/bash
# create a 'here document' that is code we compile and use to probe the card
cat << EOF > /tmp/cudaComputeVersion.cu
#include <stdio.h>
int main()
{
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop,0);
int v = prop.major * 10 + prop.minor;
printf("-gencode arch=compute_%d,code=sm_%d\n",v,v);
}
EOF
# probe the card and cleanup
/usr/local/cuda/bin/nvcc /tmp/cudaComputeVersion.cu -o /tmp/cudaComputeVersion
/tmp/cudaComputeVersion
rm /tmp/cudaComputeVersion.cu
rm /tmp/cudaComputeVersion
And in your CMakeLists.txt put:
# at cmake-build-time, probe the card and set a cmake variable
execute_process(COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/cudaComputeVersion.bash OUTPUT_VARIABLE GENCODE)
# at project-compile-time, include the gencode into the compile options
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; "${GENCODE}")
# this makes CMake all chatty and allows you to see that GENCODE was set correctly
set(CMAKE_VERBOSE_MAKEFILE TRUE)
cheers
You can use the cuda_select_nvcc_arch_flags() macro in the FindCUDA module for this without any additional scripts when using CMake 3.7 or newer.
include(FindCUDA)
set(CUDA_ARCH_LIST Auto CACHE STRING
"List of CUDA architectures (e.g. Pascal, Volta, etc) or \
compute capability versions (6.1, 7.0, etc) to generate code for. \
Set to Auto for automatic detection (default)."
)
cuda_select_nvcc_arch_flags(CUDA_ARCH_FLAGS ${CUDA_ARCH_LIST})
list(APPEND CUDA_NVCC_FLAGS ${CUDA_ARCH_FLAGS})
The above sets CUDA_ARCH_FLAGS to -gencode arch=compute_61,code=sm_61 on my machine, for example.
The CUDA_ARCH_LIST cache variable can be configured by the user to generate code for specific compute capabilites instead of automatic detection.
Note: the FindCUDA module has been deprecated since CMake 3.10. However, no equivalent alternative to the cuda_select_nvcc_arch_flags() macro appears to be provided yet in the latest CMake release (v3.14). See this relevant issue at the CMake issue tracker for further details.
A slight improvement over #orthopteroid's answer, which pretty much ensures a unique temporary file is generated, and only requires one instead of two temporary files.
The following goes into scripts/get_cuda_sm.sh:
#!/bin/bash
#
# Prints the compute capability of the first CUDA device installed
# on the system, or alternatively the device whose index is the
# first command-line argument
device_index=${1:-0}
timestamp=$(date +%s.%N)
gcc_binary=$(which g++)
gcc_binary=${gcc_binary:-g++}
cuda_root=${CUDA_DIR:-/usr/local/cuda}
CUDA_INCLUDE_DIRS=${CUDA_INCLUDE_DIRS:-${cuda_root}/include}
CUDA_CUDART_LIBRARY=${CUDA_CUDART_LIBRARY:-${cuda_root}/lib64/libcudart.so}
generated_binary="/tmp/cuda-compute-version-helper-$$-$timestamp"
# create a 'here document' that is code we compile and use to probe the card
source_code="$(cat << EOF
#include <stdio.h>
#include <cuda_runtime_api.h>
int main()
{
cudaDeviceProp prop;
cudaError_t status;
int device_count;
status = cudaGetDeviceCount(&device_count);
if (status != cudaSuccess) {
fprintf(stderr,"cudaGetDeviceCount() failed: %s\n", cudaGetErrorString(status));
return -1;
}
if (${device_index} >= device_count) {
fprintf(stderr, "Specified device index %d exceeds the maximum (the device count on this system is %d)\n", ${device_index}, device_count);
return -1;
}
status = cudaGetDeviceProperties(&prop, ${device_index});
if (status != cudaSuccess) {
fprintf(stderr,"cudaGetDeviceProperties() for device ${device_index} failed: %s\n", cudaGetErrorString(status));
return -1;
}
int v = prop.major * 10 + prop.minor;
printf("%d\\n", v);
}
EOF
)"
echo "$source_code" | $gcc_binary -x c++ -I"$CUDA_INCLUDE_DIRS" -o "$generated_binary" - -x none "$CUDA_CUDART_LIBRARY"
# probe the card and cleanup
$generated_binary
rm $generated_binary
and the following goes into CMakeLists.txt or a CMake module:
if (NOT CUDA_TARGET_COMPUTE_CAPABILITY)
if("$ENV{CUDA_SM}" STREQUAL "")
set(ENV{CUDA_INCLUDE_DIRS} "${CUDA_INCLUDE_DIRS}")
set(ENV{CUDA_CUDART_LIBRARY} "${CUDA_CUDART_LIBRARY}")
set(ENV{CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}")
execute_process(COMMAND
bash -c "${CMAKE_CURRENT_SOURCE_DIR}/scripts/get_cuda_sm.sh"
OUTPUT_VARIABLE CUDA_TARGET_COMPUTE_CAPABILITY_)
else()
set(CUDA_TARGET_COMPUTE_CAPABILITY_ $ENV{CUDA_SM})
endif()
set(CUDA_TARGET_COMPUTE_CAPABILITY "${CUDA_TARGET_COMPUTE_CAPABILITY_}"
CACHE STRING "CUDA compute capability of the (first) CUDA device on \
the system, in XY format (like the X.Y format but no dot); see table \
of features and capabilities by capability X.Y value at \
https://en.wikipedia.org/wiki/CUDA#Version_features_and_specifications")
execute_process(COMMAND
bash -c "echo -n $(echo ${CUDA_TARGET_COMPUTE_CAPABILITY})"
OUTPUT_VARIABLE CUDA_TARGET_COMPUTE_CAPABILITY)
execute_process(COMMAND
bash -c "echo ${CUDA_TARGET_COMPUTE_CAPABILITY} | sed 's/^\\([0-9]\\)\\([0-9]\\)/\\1.\\2/;' | xargs echo -n"
OUTPUT_VARIABLE FORMATTED_COMPUTE_CAPABILITY)
message(STATUS
"CUDA device-side code will assume compute capability \
${FORMATTED_COMPUTE_CAPABILITY}")
endif()
set(CUDA_GENCODE
"arch=compute_${CUDA_TARGET_COMPUTE_CAPABILITY}, code=compute_${CUDA_TARGET_COMPUTE_CAPABILITY}")
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode ${CUDA_GENCODE} )

C code cant run select query in cron

We have a C code as below. This is how we have compiled it gcc -o get1Receive $(mysql_config --cflags) get1ReceiveSource.c $(mysql_config --libs) -lrt. I works fine when we run from the terminal. Then we tried to run it using cron job and when we review this two line printf("\nNumf of fields : %d",num_fields); and printf("\nNof of row : %lu",mysql_num_rows(localRes1));. The first line shows 4 as the value and second line never give any values and is always 0. We have took the same select query and run on the db and confirm there is value but it is just not delivering when running via cron job.The script is given executable permission too.
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <stdio.h>
#include <time.h>
#include <signal.h>
#include <mysql.h>
#include <string.h>
int flag = 0;
int main () {
MYSQL *localConn;
MYSQL_RES *localRes1;
MYSQL_ROW localRow1;
char *server = "localhost";
char *user = "user1";
char *password = "*****";
char *database = "test1";
localConn = mysql_init(NULL);
if (!mysql_real_connect(localConn, server,
user, password, database, 0, NULL, 0)) {
fprintf(stderr, "%s\n", mysql_error(localConn));
exit(1);
}
struct timeval tv;
char queryBuf1[500],queryBuf2[500];
char buff1[20] = {0};
char buff2[20] = {0};
gettimeofday (&tv, NULL);
//fprintf (stderr, "[%d.%06d] Flag set to 1 on ", tv.tv_sec, tv.tv_usec);
//tv.tv_sec -= 5;
strftime(buff1, 20, "%Y-%m-%d %H:%M:00", localtime(&tv.tv_sec));
strftime(buff2, 20, "%Y-%m-%d %H:%M:59", localtime(&tv.tv_sec));
printf("\nTime from %s", buff1);
printf("\nTime to %s", buff2);
sprintf(queryBuf1,"SELECT ipDest, macDest,portDest, sum(totalBits) FROM dataReceive WHERE timeStampID between '%s' And '%s' GROUP BY ipDest, macDest, portDest ",buff1,buff2);
printf("\nQuery receive %s",queryBuf1);
if(mysql_query(localConn, queryBuf1))
{
printf("Error in first query of select %s\n",mysql_error(localConn));
exit(1);
}
localRes1 = mysql_store_result(localConn);
int num_fields = mysql_num_fields(localRes1);
printf("\nNumf of fields : %d",num_fields);
printf("\nNof of row : %lu",mysql_num_rows(localRes1));
while((localRow1 = mysql_fetch_row(localRes1)) !=NULL)
{
int totalBits = atoi(localRow1[3]);
printf("totalBits %d\n", totalBits);
printf("RECEIVE %s,%s\n", localRow1[0], localRow1[1]);
if(totalBits>5000)
{
sprintf(queryBuf1,"INSERT INTO alertReceive1 (timeStampID,ipDest, macDest, portDest, totalBits)VALUES ('%s','%s','%s','%s',%s)",buff1, localRow1[0],localRow1[1],localRow1[2],localRow1[3]);
printf("Query 1 before executing %s\n",queryBuf1);
if (mysql_real_query(localConn,queryBuf1,strlen(queryBuf1))) {
printf("Error in first insert %s\n",mysql_error(localConn));
fprintf(stderr, "%s\n", mysql_error(localConn));
exit(1);
}
//printf("Query 1 after executing %s\n",queryBuf1);*/
}
}
mysql_free_result(localRes1);
mysql_close(localConn);
}
We have run this command file get1Receive and resulting to
file get1Receive
get1Receive.c: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked (uses shared libs), for GNU/Linux 2.6.18, not stripped
We have also run this command * * * * * set > /tmp/myvars and below is the results.
GROUPS=()
HOME=/root
HOSTNAME=capture
HOSTTYPE=x86_64
IFS='
'
LOGNAME=root
MACHTYPE=x86_64-redhat-linux-gnu
OPTERR=1
OPTIND=1
OSTYPE=linux-gnu
PATH=/usr/bin:/bin
POSIXLY_CORRECT=y
PPID=11086
PS4='+ '
PWD=/root
SHELL=/bin/sh
SHELLOPTS=braceexpand:hashall:interactive-comments:posix
SHLVL=1
TERM=dumb
UID=0
USER=root
_=/bin/sh
Generic hints (see also my comments):
Take time to read documentation notably from Advanced Linux Programming, man pages (which you can also get by typing man man or man 2 intro on the terminal, etc etc...), and MySQL 5.5 reference. Be sure to understand what GIYF or STFW means.
Put the \n at the end of printf format strings, not the beginning.
Also, call fflush(NULL) if appropriate, notably before any MySQL queries e.g. before your mysql_real_query calls, and at the end of your while loops
Compile with gcc -Wall -g e.g. with the following command in your terminal
gcc -Wall -g $(mysql_config --cflags) get1ReceiveSource.c \
$(mysql_config --libs) -lrt -o get1Receive
Improve the code till no warnings are given. (You may even want to have -Wall -Wextra instead of just -Wall). Don't forget to use a version control system like git.
use the gdb debugger (you need to learn how to use it).
(only once you are sure there is no more bugs in your code replace -g by -O2 -g in your compilation command)
use sizeof; most occurrences of 20 should be a sizeof, or at the very least use #define SMALLSIZE 20 and then only SMALLSIZE not 20.
Use snprintf not sprintf (and test its result size, which should fit!). snprintf(3) takes an extra size argument, e.g.
if (snprintf(querybuf, sizeof querybuf,
"SELECT ipDest, macDest, portDest, sum(totalBits)"
" FROM dataReceive"
" WHERE timeStampID between '%s' And '%s' "
" GROUP BY ipDest, macDest, portDest ",
buff1, buff2) >= (int) (sizeof querybuf))
abort();
consider using syslog(3) with openlog, and look into your system logs.
I don't see how is queryBuf1 declared. (Your code, as posted, probably don't even compile!). You might want something like char querybuf[512]; ...
And most importantly, calling mysql_real_query inside a mysql_fetch_row loop is wrong: you should have fetched all the rows before issuing the next MySQL query. Read more about MySQL C API.
You also forgot to test the result localRes1 of mysql_store_result(localConn); show somehow (perhaps thru syslog) the mysql_error(localConn) when localRes1 is NULL ....

CUDA with C/C++ Compilation fails

I am trying to integrate CUDA code with my existing C++ application. As instructed on some web side, I need to have a "file.cu" where in I have a wrapper function which does the memory allocation on the GPU and launch kernel. I followed that advise but, I am not able to compile the code now.
file.cu
#include <cuda.h>
#include <stdio.h>
void preComputeCorrelation_gpu( int * d )
{
//I shall write the kernel later once I am confirmed that CUDA code works
cudaDeviceProp prop;
cudaGetDeviceProperties( &prop, 0 );
printf( "name = %s\n", prop.name );
}
main.cpp
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <cuda.h>
#define __CUDA_SUPPORT__
#ifdef __CUDA_SUPPORT__
// Defination to be found in "cudaWrap.cu"
extern void preComputeCorrelation_gpu( int * d );
#endif
int main()
{
//code to read d from the file and other initialization
int * d;
.
.
#ifdef __CUDA_SUPPORT__
fprintf( stderr, "GPU Computation starts" );
// Defination to be found in "cudaWrap.cu"
preComputeCorrelation_gpu( d );
#else
fprintf( stderr, "CPU Computation starts" );
preComputeCorrelation( d );
#endif
.
.
//more code
return 0 ;
}
Now, I put following commands to compile the code
$ nvcc -c cudaWrap.cu <br/>
$ g++ -I /usr/local/cuda-5.0/include -L /usr/local/cuda-5.0/lib -o GA_omp GA_dev_omp.cpp main_omp.cpp data_stats.cpp cudaWrap.o
Compilation fails and I get the following message after the 2nd command. Although the 1st command works.
cudaWrap.o: In function `preComputeCorrelation_gpu(DataSet*)':
tmpxft_00001061_00000000-3_cudaWrap.cudafe1.cpp:(.text+0x2f): undefined reference to `cudaGetDeviceProperties'
cudaWrap.o: In function `__cudaUnregisterBinaryUtil()':
tmpxft_00001061_00000000-3_cudaWrap.cudafe1.cpp:(.text+0x6b): undefined reference to `__cudaUnregisterFatBinary'
cudaWrap.o: In function `__sti____cudaRegisterAll_43_tmpxft_00001061_00000000_6_cudaWrap_cpp1_ii_f8a043c5()':
tmpxft_00001061_00000000-3_cudaWrap.cudafe1.cpp:(.text+0x8c): undefined reference to `__cudaRegisterFatBinary'
collect2: ld returned 1 exit status
How to I sort this out really?
The solution to this problem is that linking of the ordinary c++ code and cuda code needs to be done with libcudart.so
Compilation should look some like --
$ nvcc -c cudaWrap.cu
$ g++ -lcudart -I /usr/local/cuda-5.0/include -L /usr/local/cuda-5.0/lib -o GA_omp GA_dev_omp.cpp main_omp.cpp data_stats.cpp cudaWrap.o
In this case, cudaWrap.cu contains the cuda code. main_omp.cpp contains the main() and there are some other application files that need compiling too.