Faster RCNN make fail - deep-learning

I have been trying for some days to make faster RCNN work in a laptop with GPU(Quadro K5100M).
I was able to run the demo.py in a laptop with only CPU.
I am using CUDA 8 and CuDnn 4 and caffe build works but the build process of caffe in faster rcnn does not. I have CuDnn commented out now because of some other errors.
Can anyone give me some suggestions.
I get this error during make -j8 && make pycaffe.
.build_release/tools/extract_features.o: In functionint feature_extraction_pipeline(int, char**)':
extract_features.cpp:(.text._Z27feature_extraction_pipelineIfEiiPPc[_Z27feature_extraction_pipelineIfEiiPPc]+0x162): undefined reference to caffe::Net<float>::Net(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, caffe::Phase, caffe::Net<float> const*)'
collect2: error: ld returned 1 exit status
CXX/LD -o .build_release/examples/mnist/convert_mnist_data.bin
Makefile:607: recipe for target '.build_release/tools/extract_features.bin' failed
make: *** [.build_release/tools/extract_features.bin] Error 1
make: *** Waiting for unfinished jobs....
.build_release/tools/caffe.o: In functiontest()':
caffe.cpp:(.text+0x1157): undefined reference to caffe::Net<float>::Net(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, caffe::Phase, caffe::Net<float> const*)'
.build_release/tools/caffe.o: In functiontrain()':
caffe.cpp:(.text+0x2a3e): undefined reference to caffe::P2PSync<float>::P2PSync(boost::shared_ptr<caffe::Solver<float> >, caffe::P2PSync<float>*, caffe::SolverParameter const&)'
caffe.cpp:(.text+0x2a6b): undefined reference tocaffe::P2PSync::run(std::vector > const&)'
caffe.cpp:(.text+0x2a73): undefined reference to caffe::P2PSync<float>::~P2PSync()'
caffe.cpp:(.text+0x3c53): undefined reference tocaffe::P2PSync::~P2PSync()'
caffe.cpp:(.text+0x3f45): undefined reference to caffe::P2PSync<float>::~P2PSync()'
.build_release/tools/caffe.o: In functiontime()':
caffe.cpp:(.text+0x4136): undefined reference to caffe::Net<float>::Net(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, caffe::Phase, caffe::Net<float> const*)'
caffe.cpp:(.text+0x44ef): undefined reference tocaffe::Layer::Lock()'
caffe.cpp:(.text+0x45ed): undefined reference to caffe::Layer<float>::Unlock()'
collect2: error: ld returned 1 exit status
Makefile:607: recipe for target '.build_release/tools/caffe.bin' failed
This is my makefile
# USE_CUDNN := 1
# CPU-only switch (uncomment to build without GPU support).
# CPU_ONLY := 1
# Uncomment if you're using OpenCV 3
OPENCV_VERSION := 3
# CUDA directory contains bin/ and lib/ directories that we need.
CUDA_DIR := /usr/local/cuda-8.0
# On Ubuntu 14.04, if cuda tools are installed via
# "sudo apt-get install nvidia-cuda-toolkit" then use this instead:
# CUDA_DIR := /usr
CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
-gencode arch=compute_20,code=sm_21 \
-gencode arch=compute_30,code=sm_30 \
-gencode arch=compute_35,code=sm_35 \
-gencode arch=compute_50,code=sm_50 \
-gencode arch=compute_50,code=compute_50
BLAS := atlas
#BLAS := open
# NOTE: this is required only if you will compile the python interface.
# We need to be able to find Python.h and numpy/arrayobject.h.
PYTHON_INCLUDE := /usr/include/python2.7 \
/usr/local/lib/python2.7/dist-packages/numpy/core/include \
/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy
# We need to be able to find libpythonX.X.so or .dylib.
PYTHON_LIB := /usr/lib
# PYTHON_LIB := $(ANACONDA_HOME)/lib
# Uncomment to support layers written in Python (will link against Python libs)
WITH_PYTHON_LAYER := 1
# Whatever else you find you need goes here.
INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include /usr/include/hdf5/serial /usr/include/opencv /usr/local/cuda-8.0/include
LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib /usr/lib/x86_64-linux-gnu /usr/lib/x86_64-linux-gnu/hdf5/serial /usr/local/share/OpenCV /usr/local/cuda-8.0/lib64
# N.B. both build and distribute dirs are cleared on `make clean`
BUILD_DIR := build
DISTRIBUTE_DIR := distribute
# Uncomment for debugging. Does not work on OSX due to https://github.com/BVLC/caffe/issues/171
# DEBUG := 1
# The ID of the GPU that 'make runtest' will use to run unit tests.
TEST_GPUID := 0
# enable pretty build (comment to see full commands)
Q ?= #

I had the same error because my library path (/usr/lib/) contained a previous build of libcaffe.so. Removing it fixed the problem.

Related

Github Action - Can't build gtest using ccache, mingw, cmake and ninja

I am using this workflow: cmake_build.yaml
Here is my toplevel CMakeLists.txt:
cmake_minimum_required(VERSION 3.15)
project(
container
VERSION 0.1.0
DESCRIPTION "An extension to the standard container library in c++"
HOMEPAGE_URL ""
LANGUAGES CXX
)
add_executable(${PROJECT_NAME} src/main.cpp)
set_target_properties(
${PROJECT_NAME}
PROPERTIES
LINKER_LANGUAGE CXX
ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/${CMAKE_BUILD_TYPE}"
LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib/${CMAKE_BUILD_TYPE}"
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin/${CMAKE_BUILD_TYPE}"
)
target_compile_features(${PROJECT_NAME} PUBLIC cxx_std_17)
target_include_directories(
${PROJECT_NAME}
PUBLIC
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
)
install(
DIRECTORY
include/${PROJECT_NAME_LOWERCASE}
DESTINATION
include
)
enable_testing()
add_subdirectory(test)
and test/CMakeLists.txt
cmake_minimum_required(VERSION 3.14)
project(my_project)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 17)
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/03597a01ee50ed33e9dfd640b249b4be3799d395.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
add_executable(
hello_test
hello_test.cc
)
target_link_libraries(
hello_test
GTest::gtest_main
)
include(GoogleTest)
gtest_discover_tests(hello_test)
I am using the example from gtest docs:
test/hello_test.cc:
#include <gtest/gtest.h>
TEST(HelloTest, BasicAssertions) {
// Expect two strings not to be equal.
EXPECT_STRNE("hello", "world");
// Expect equality.
EXPECT_EQ(7 * 6, 42);
}
int main(int argc, char **argv)
{
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
It builds fine on Windows MSVC, Macos Clang, Linux GCC, but fails using Mingw GCC, here is error shown:
FAILED: test/hello_test.exe test/hello_test[1]_tests.cmake D:/a/tsst/tsst/build/test/hello_test[1]_tests.cmake
cmd.exe /C "cd . && C:\ProgramData\chocolatey\bin\g++.exe -O3 -DNDEBUG test/CMakeFiles/hello_test.dir/hello_test.cc.obj -o test\hello_test.exe -Wl,--out-implib,test\libhello_test.dll.a -Wl,--major-image-version,0,--minor-image-version,0 lib/libgtest_main.a lib/libgtest.a -lkernel32 -luser32 -lgdi32 -lwinspool -lshell32 -lole32 -loleaut32 -luuid -lcomdlg32 -ladvapi32 && cmd.exe /C "cd /D D:\a\tsst\tsst\build\test && D:\a\tsst\tsst\cmake-3.24.3-windows-x86_64\bin\cmake.exe -D TEST_TARGET=hello_test -D TEST_EXECUTABLE=D:/a/tsst/tsst/build/test/hello_test.exe -D TEST_EXECUTOR= -D TEST_WORKING_DIR=D:/a/tsst/tsst/build/test -D TEST_EXTRA_ARGS= -D TEST_PROPERTIES= -D TEST_PREFIX= -D TEST_SUFFIX= -D TEST_FILTER= -D NO_PRETTY_TYPES=FALSE -D NO_PRETTY_VALUES=FALSE -D TEST_LIST=hello_test_TESTS -D CTEST_FILE=D:/a/tsst/tsst/build/test/hello_test[1]_tests.cmake -D TEST_DISCOVERY_TIMEOUT=5 -D TEST_XML_OUTPUT_DIR= -P D:/a/tsst/tsst/cmake-3.24.3-windows-x86_64/share/cmake-3.24/Modules/GoogleTestAddTests.cmake""
CMake Error at D:/a/tsst/tsst/cmake-3.24.3-windows-x86_64/share/cmake-3.24/Modules/GoogleTestAddTests.cmake:112 (message):
Error running test executable.
Path: 'D:/a/tsst/tsst/build/test/hello_test.exe'
Result: Exit code 0xc0000139
Output:
Call Stack (most recent call first):
D:/a/tsst/tsst/cmake-3.24.3-windows-x86_64/share/cmake-3.24/Modules/GoogleTestAddTests.cmake:225 (gtest_discover_tests_impl)
ninja: build stopped: subcommand failed.
Build works fine without building gtest, and it fails when building test executable with gtest

Command to run callback_profiling sample from CUPTI

I am running the sample code available for Nvidia CUDA CUPTI in /usr/local/cuda-11.8/extras/CUPTI/samples/callback_profiling. There is a Makefile, but I want to run it using single command (without the Makefile) because it is giving me permission errors with the Makefile. Based on the Makefile, this is the command I am writing:
nvcc --generate-line-info callback_profiling.cu -o callback_profiling -lnvperf_host -lnvperf_target -lcuda -lcupti -I/usr/local/cuda-11.8/extras/CUPTI/samples/callback_profiling/../extensions/include/profilerhost_util -I/usr/local/cuda-11.8/extras/CUPTI/samples/callback_profiling/../extensions/include/c_util -I/usr/local/cuda-11.8/extras/CUPTI/samples/callback_profiling/../../include -L /usr/local/cuda-11.8/extras/CUPTI/samples/callback_profiling/../extensions/src/profilerhost_util
I am getting the error
/usr/bin/ld: /tmp/tmpxft_00005e71_00000000-11_callback_profiling.o: in function `setupProfiling(ProfilingData_t*)':
tmpxft_00005e71_00000000-6_callback_profiling.cudafe1.cpp:(.text+0xe48): undefined reference to `NV::Metric::Config::GetConfigImage(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<unsigned char, std::allocator<unsigned char> >&, unsigned char const*)'
/usr/bin/ld: tmpxft_00005e71_00000000-6_callback_profiling.cudafe1.cpp:(.text+0xed3): undefined reference to `NV::Metric::Config::GetCounterDataPrefixImage(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, std::vector<unsigned char, std::allocator<unsigned char> >&, unsigned char const*)'
/usr/bin/ld: /tmp/tmpxft_00005e71_00000000-11_callback_profiling.o: in function `main':
tmpxft_00005e71_00000000-6_callback_profiling.cudafe1.cpp:(.text+0x2751): undefined reference to `NV::Metric::Eval::PrintMetricValues(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::vector<unsigned char, std::allocator<unsigned char> > const&, std::vector<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::allocator<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > > const&, unsigned char const*)'
collect2: error: ld returned 1 exit status
Can someone help me with what will be the right command to run the application?
I have an Ubuntu machine with CUDA 11.8
There is a library (libprofilerHostUtil.a) that doesn't get built as part of the samples build process that you will need to build manually.
On a typical linux install, the Makefile to build that library is in /usr/local/cuda/extras/CUPTI/samples/extensions/src/profilerhost_util. As a root user you should be able to go into that directory, and type make, and then the necessary library will get built.
Once you have done that, as an ordinary user, you should be able to build the code you are asking about like this:
nvcc callback_profiling.cu -o callback_profiling -I/usr/local/cuda/extras/CUPTI/samples/callback_profiling/../extensions/include/profilerhost_util -I/usr/local/cuda/extras/CUPTI/samples/callback_profiling/../extensions/include/c_util -I/usr/local/cuda/extras/CUPTI/samples/callback_profiling/../../include -L /usr/local/cuda/extras/CUPTI/samples/callback_profiling/../extensions/src/profilerhost_util -L/usr/local/cuda/extras/CUPTI/lib64 -lcupti -lnvperf_host -lnvperf_target -lprofilerHostUtil -lcuda
wherever you see /cuda/ in the paths above, you may need to change that to /cuda-11.8/ or whatever is needed to match your install, if you have not set up the symbolic link that the CUDA installer asks to set up.
If you want to build the library I mentioned somewhere else, manually, you will need to do something like this. First, change to the indicated directory (cd /usr/local/cuda/extras/CUPTI/samples/extensions/src/profilerhost_util), then use these commands, replacing /my/build/dir/ with the path to a location you have write access to:
nvcc -c --std=c++11 -Xcompiler -fPIC -I../../../../include -I../../../../../../include -I../../include/profilerhost_util -I../../include/c_util List.cpp -o /my/build/dir/List.o
nvcc -c --std=c++11 -Xcompiler -fPIC -I../../../../include -I../../../../../../include -I../../include/profilerhost_util -I../../include/c_util Metric.cpp -o /my/build/dir/Metric.o
nvcc -c --std=c++11 -Xcompiler -fPIC -I../../../../include -I../../../../../../include -I../../include/profilerhost_util -I../../include/c_util Eval.cpp -o /my/build/dir/Eval.o
nvcc -o /my/build/dir/libprofilerHostUtil.a -lib /my/build/dir/List.o /my/build/dir/Metric.o /my/build/dir/Eval.o -lcuda -L ../../../../../../lib64 -lnvperf_host -lnvperf_target
And if you use that route, you will need to add -L. to the command to build the executable.

Runtime crash using fstream exceptions in c++

I am trying to use fstream and exceptions but alway I run a program using exception it crash, no erros reported from compiller.
I think is a bug in MinGw compiler or compatibility with my windows system.
could you help me solve my problem. Thank you!
Code::Blocks 17.12! rev 11256 Win32 / gcc version 3.4.5 (mingw-vista special r3) / Windows 7 ServicePack1
Example code:
#include <iostream>
#include <fstream>
int main() {
int ivalue;
try {
std::ifstream in("in.txt");
in.exceptions(std::ifstream::failbit);
in >> ivalue;
} catch (std::ios_base::failure &fail) {
// handle exception here
}
}
```
Link:https://es.cppreference.com/w/cpp/io/basic_ios/exceptions
Compiller:
-------------- Build: Release in Exceptions (compiler: GNU GCC Compiler)---------------
mingw32-g++.exe -Wall -fexceptions -O2 -IC:\C++\TestExecLibs\Include -c C:\Programing\C&C++\Exceptions\main.cpp -o obj\Release\main.o
mingw32-g++.exe -o bin\Release\Exceptions.exe obj\Release\main.o -s "C:\Program Files\CodeBlocks\MinGW\lib\libcomctl32.a"
Output file is bin\Release\Exceptions.exe with size 784.50 KB
Process terminated with status 0 (0 minute(s), 4 second(s))
0 error(s), 0 warning(s) (0 minute(s), 4 second(s))-------------- Build: Release in Exceptions (compiler: GNU GCC Compiler)---------------
mingw32-g++.exe -Wall -fexceptions -O2 -IC:\C++\TestExecLibs\Include -c C:\Programing\C&C++\Exceptions\main.cpp -o obj\Release\main.o
mingw32-g++.exe -o bin\Release\Exceptions.exe obj\Release\main.o -s "C:\Program Files\CodeBlocks\MinGW\lib\libcomctl32.a"
Output file is bin\Release\Exceptions.exe with size 784.50 KB
Process terminated with status 0 (0 minute(s), 4 second(s))
0 error(s), 0 warning(s) (0 minute(s), 4 second(s))
After Run CMD:
terminate called after throwing an instance of 'std::ios_base::failure'
what(): basic_ios::clear
This application has requested the Runtime to terminate it in an unusual way.
Please contact the application's support team for more information.
Process returned 3 (0x3) execution time : 2.948 s
Press any key to continue.
[CMD Window Result][1]
[1]: https://i.stack.imgur.com/vCudt.jpg

Can not link libmysqlclient.a?

I'm using Ubuntu 14.04, and I installed libmysqlclient-dev package already. But I always get link error because mysql_init symbol missing.
My source code, Makefile, run-time result and symbol info are as follows:
igsrd#naivechou/~/project/m01/uuid_sign>cat main.cpp
#include <cstdio>
#include <cstdlib>
#include "mysql/mysql.h"
int main(int argc,char** argv)
{
auto con = mysql_init(nullptr);
//printf("mysql client version : %s\n",mysql_get_client_info());
exit(EXIT_SUCCESS);
}
igsrd#naivechou/~/project/m01/uuid_sign>LANG= && make -B
g++ -o main.o -c main.cpp -std=c++11 -ggdb
g++ -o uuid_sign -L/usr/lib/x86_64-linux-gnu -lmysqlclient -lpthread -lz -lm -ldl main.o
main.o: In function `main':
/home/igsrd/project/m01/uuid_sign/main.cpp:6: undefined reference to `mysql_init'
collect2: error: ld returned 1 exit status
make: *** [uuid_sign] Error 1
igsrd#naivechou/~/project/m01/uuid_sign>nm -C /usr/lib/x86_64-linux-gnu/libmysqlclient.a | grep mysql_init
0000000000002b10 T mysql_init
0000000000002d30 T mysql_init_character_set
U mysql_init_character_set
Source is very simple, just one line to call mysql_init().
Making process shows every options for compiler and linker, I think there are no missing options.
Error message is undefined reference of linking error, so I dump libmysqlclient.a to grep the mysql_init, and it is not on undefined state.
Now I really have no idea. What's wrong with this ?
Change your second command, try this (change the position of main.o):
g++ -o uuid_sign main.o -L/usr/lib/x86_64-linux-gnu -lmysqlclient -lpthread -lz -lm -ldl

Why do I get a segmentation fault when calling a CUDA kernel directly from a shared library?

The way I tried it (see question title) it compiled, but I get a segmentation fault. So is it me, CMake or CUDA which doesn't support direct kernel calls from a shared library? The solution doesn't have to be with CMake
Further details:
I have the following file structure:
testKernel.hpp
__global__ void kernelTest( float x );
void callKernel( float x );
testKernel.cu
#include "testKernel.hpp"
__global__ void kernelTest( float x ) {}
void callKernel( float x ) { kernelTest<<<1,1>>>( x ); }
useKernel.cu
#include <cstdio>
#include "testKernel.hpp"
int main( void )
{
kernelTest<<<1,1>>>( 3.0f );
//callKernel( 3.0f );
printf("OK\n");
return 0;
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.3.1)
project(testKernelCall)
find_package(CUDA REQUIRED)
cuda_add_library( ${PROJECT_NAME} SHARED testKernel.cu testKernel.hpp )
target_link_libraries( ${PROJECT_NAME} ${CUDA_LIBRARIES} )
cuda_add_executable("useKernel" useKernel.cu)
target_link_libraries("useKernel" ${PROJECT_NAME})
Compiling and running this with:
cmake .; make && ./useKernel
results in a segmentation fault. The backtrace with gdb is:
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff75726bd in cudart::configData::addArgument(void const*, unsigned long, unsigned long) ()
from ./libtestKernelCall.so
(gdb) bt
#0 0x00007ffff75726bd in cudart::configData::addArgument(void const*, unsigned long, unsigned long) ()
from ./libtestKernelCall.so
#1 0x00007ffff7562eb7 in cudart::cudaApiSetupArgument(void const*, unsigned long, unsigned long) ()
from ./libtestKernelCall.so
#2 0x00007ffff7591ca2 in cudaSetupArgument ()
from ./libtestKernelCall.so
#3 0x00007ffff7556125 in __device_stub__Z10kernelTestf (__par0=3)
at /tmp/tmpxft_00003900_00000000-4_testKernel.cudafe1.stub.c:7
#4 0x00007ffff755616c in kernelTest (__cuda_0=3) at ./testKernel.cu:2
#5 0x000000000040280e in main () at ./useKernel.cu:6
Tested with (which means the segfault appears in those setups):
Setup 1
cmake 3.4.1
CUDA 7.0.27
g++ 4.9.2
Debian
Setup 2
cmake 3.3.1
CUDA 6.5.14
g++ 4.7.1
There are two ways to solve this error:
change SHARED to STATIC in CMakeList.txt
use the wrapper function callKernel instead of calling the kernel directly
I don't really know how to build a CUDA shared library without CMake. I know how to build a CUDA static library, but that case seems to work with CMake, so I didn't test it without CMake.
Here are the relevant CMake commands I got with make VERBOSE=1. I changed absolute paths to relative paths, where possible, but I wasn't sure about all these library paths. Putting these commands in a file and sourcing that file compiles the shared library and the program correctly and "correctly" leads to the segmentation fault. I also added command because for me nvcc is aliased with the `-ccbin`` option.
make.sh
command nvcc "./testKernel.cu" -c -o "./testKernel.cu.o" -ccbin /usr/bin/cc -m64 -DtestKernelCall_EXPORTS -Xcompiler ,\"-fPIC\",\"-g\" -DNVCC -I/opt/cuda-7.0/include -I/opt/cuda-7.0/include
/usr/bin/c++ -fPIC -shared -Wl,-soname,libtestKernelCall.so -o libtestKernelCall.so ./testKernel.cu.o /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so
command nvcc "./useKernel.cu" -c -o "./useKernel.cu.o" -ccbin /usr/bin/cc -m64 -Xcompiler ,\"-g\" -DNVCC -I/opt/cuda-7.0/include -I/opt/cuda-7.0/include
/usr/bin/c++ ./useKernel.cu.o -o useKernel -rdynamic /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so libtestKernelCall.so /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so -Wl,-rpath,"."
Your code compiles and runs correctly for me using ordinary nvcc commands (not CMake) if I add the -cudart shared switch to each nvcc command. Here's a fully-worked sequence:
$ cat testKernel.hpp
__global__ void kernelTest( float x );
void callKernel( float x );
$ cat testKernel.cu
#include "testKernel.hpp"
__global__ void kernelTest( float x ) {}
void callKernel( float x ) { kernelTest<<<1,1>>>( x ); }
$ cat useKernel.cu
#include <cstdio>
#include "testKernel.hpp"
int main( void )
{
kernelTest<<<1,1>>>( 3.0f );
//callKernel( 3.0f );
cudaDeviceSynchronize();
printf("OK\n");
return 0;
}
$ nvcc -shared -cudart shared -o test.so -Xcompiler -fPIC testKernel.cu
$ nvcc -cudart shared -o test test.so useKernel.cu
$ cuda-memcheck ./test
========= CUDA-MEMCHECK
OK
========= ERROR SUMMARY: 0 errors
$
If I omit -cudart shared on either of the above nvcc commands, then the compile will still proceed, but on execution I will witness the aforementioned seg fault. Tested with CUDA 7.5 on Fedora 20.
Regarding your CMake setup, it's necessary to link against the shared cudart, according to my testing. Therefore it's insufficient to add -cudart shared to the -c commands (which are compile commands. Sorry if I was unclear. My "compile" commands above are doing both compiling and linking, at each step.)
When linking with nvcc, the correct switch is -cudart shared. However, your make.sh indicates final link is being done by the host c++ compiler:
command nvcc "./testKernel.cu" -c -o "./testKernel.cu.o" -ccbin /usr/bin/cc -m64 -DtestKernelCall_EXPORTS -Xcompiler ,\"-fPIC\",\"-g\" -DNVCC -I/opt/cuda-7.0/include -I/opt/cuda-7.0/include
/usr/bin/c++ -fPIC -shared -Wl,-soname,libtestKernelCall.so -o libtestKernelCall.so ./testKernel.cu.o /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so
command nvcc "./useKernel.cu" -c -o "./useKernel.cu.o" -ccbin /usr/bin/cc -m64 -Xcompiler ,\"-g\" -DNVCC -I/opt/cuda-7.0/include -I/opt/cuda-7.0/include
/usr/bin/c++ ./useKernel.cu.o -o useKernel -rdynamic /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so libtestKernelCall.so /opt/cuda-7.0/lib64/libcudart_static.a -lpthread /usr/lib/x86_64-linux-gnu/librt.so /usr/lib/x86_64-linux-gnu/libdl.so -Wl,-rpath,"."
In that case, you don't want to link against:
/opt/cuda-7.0/lib64/libcudart_static.a
but instead against libcudart.so:
/opt/cuda-7.0/lib64/libcudart.so
If you were editing your make.sh directly, you would want to make that change in both of the /usr/bin/c++ command lines you have shown. For example, if I were to modify my compile sequence already presented to reflect your usage of the host c++ compiler to do the linking, it would look like this:
$ nvcc -c -Xcompiler -fPIC testKernel.cu
$ g++ -fPIC -shared -o test.so -L/usr/local/cuda/lib64 -lcudart testKernel.o
$ nvcc -c useKernel.cu
$ g++ -o test -L/usr/local/cuda/lib64 -lcudart test.so useKernel.o
$ cuda-memcheck ./test
========= CUDA-MEMCHECK
OK
========= ERROR SUMMARY: 0 errors
$
put set(CUDA_USE_STATIC_CUDA_RUNTIME OFF) before find_package(CUDA REQUIRED) will do the job equivalent to set(CUDA_LIBRARIES "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart.so")
This is an extension to Robert Crovella's answer.
I use the following CMakeLists.txt and it works well.
cmake_minimum_required(VERSION 3.8)
project(cmake_and_cuda LANGUAGES CXX CUDA)
add_library(my_cu SHARED testKernel.cu testKernel.h)
target_link_libraries(my_cu PRIVATE cudart) #MUST!!
set(CMAKE_CUDA_FLAGS "-shared -cudart shared -Xcompiler -fPIC"
CACHE STRING "Use libcudart.dylib" FORCE)
set(CMAKE_MACOSX_RPATH FALSE)
add_executable(app useKernel.cu)
target_link_libraries(app PRIVATE cudart) #MUST!!
target_link_libraries(app PRIVATE my_cu)
I'm using CMake 3.10 and my OS is OS X EI Capitan 10.11.6.
For me, if I don't set CMAKE_MACOSX_RPATH to FALSE, I will get a Library not loaded error. Maybe it is not necessary for you.
Note that since CMake 3.8, the FindCUDA is superseded, so setting CUDA_USE_STATIC_CUDA_RUNTIME affects nothing.
You can check this post and this document for details.
In addition, this post provides a good example about how to deal with CUDA after CMake 3.8.