Unable to use CMSIS library in Atollic - fft

I am trying to use CFFT function for my STM32 microcontroller in Atollic TrueStudio. But I am unable to use any of the DSP functions. I get the error -
undefined reference to `arm_cfft_f32' and undefined reference to 'arm_cfft_sR_f32_len16'. I don't know what the problem is because it works on Keil. What am I doing wrong?
#include "stm32f4xx.h"
#include "arm_math.h"
#include "arm_const_structs.h"
#include "core_cm4.h"
#include "math.h"
#define TEST_LENGTH_SAMPLES 32
float32_t ffttestip[TEST_LENGTH_SAMPLES]={0};
static float32_t ffttestop[TEST_LENGTH_SAMPLES/2];
/* Private macro */
/* Private variables */
/* Private function prototypes */
/* Private functions */
uint32_t fftSize = 16;
uint8_t ifftFlag = 0;
uint8_t doBitReverse = 1;
int main(void)
{
int i = 0;
i=15;
i=pow(i,2);
/**
* IMPORTANT NOTE!
* The symbol VECT_TAB_SRAM needs to be defined when building the project
* if code has been located to RAM and interrupts are used.
* Otherwise the interrupt table located in flash will be used.
* See also the <system_*.c> file and how the SystemInit() function updates
* SCB->VTOR register.
* E.g. SCB->VTOR = 0x20000000;
*/
/* TODO - Add your application code here */
arm_cfft_f32(&arm_cfft_sR_f32_len16, ffttestip, ifftFlag, doBitReverse);
/* Infinite loop */
while(1);
}
EDIT - additionally,I get the following error -
Info: Internal Builder is used for build
arm-atollic-eabi-g++ -o fftreal.elf Libraries\STM32F4xx_StdPeriph_Driver\src\misc.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_adc.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_can.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_crc.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_cryp.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_cryp_aes.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_cryp_des.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_cryp_tdes.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_dac.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_dbgmcu.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_dcmi.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_dma.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_exti.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_flash.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_fsmc.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_gpio.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_hash.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_hash_md5.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_hash_sha1.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_i2c.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_iwdg.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_pwr.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_rcc.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_rng.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_rtc.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_sdio.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_spi.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_syscfg.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_tim.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_usart.o Libraries\STM32F4xx_StdPeriph_Driver\src\stm32f4xx_wwdg.o src\main.o src\startup_stm32f40xx.o src\stm32f4xx_it.o src\system_stm32f4xx.o src\tiny_printf.o -mthumb -mcpu=cortex-m4 -mfloat-abi=hard -mfpu=fpv4-sp-d16 -T../stm32f4_flash.ld -specs=nosys.specs -static -Wl,-cref,-u,Reset_Handler -Wl,-Map=fftreal.map -Wl,--gc-sections -Wl,--defsym=malloc_getpagesize_P=0x1000 -Wl,--start-group -lc -lm -lstdc++ -lsupc++ -Wl,--end-group -specs=nano.specs
src\main.o: In function `main':
E:\truestudio workspace\fftreal\Debug/..\src/main.cpp:70: undefined reference to `arm_rfft_f32'
collect2.exe: error: ld returned 1 exit status

First of all drop this old unsupported SPL.
Secondly you need to add the .c files containing the CMSIS DSP functions to your project.
Finally - your program as I see does not have anything in common with the C++ so why your project is set as C++?

Related

Is there any way to print from Google Chrome to thermal printer (escpos) in local network without using apps like QZ tray?

Are PWA functionality (service-worker) can help or there no way to do that?
This document contains a sample program that prints easily using the socket interface in C language.
UB-E04 Technical Reference Guide
It seems that the equivalent can be implemented using JavaScript WebSocket.
This article is available in both Japanese and English, and both provide examples of using WebSocket easily from vanilla JavaScript.
5分で動かせるwebsocketのサンプル3つ / WebSocket Tutorials
Introducing WebSockets: Bringing Sockets to the Web
The following is a sample program in Linux C language described in the document.
Sending to a printer can be done with such a simple program.
The data to be sent must be created in the format described in the ESC/POS command reference.
/* TCP9100 programming sample for LINUX
* HOW TO BUILD
* cc ltcp.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netdb.h>
int main(int argc, char* argv[])
{
int sock;
struct sockaddr_in addr;
if (argc != 2) {
printf("usage: ltcp <ip address>\n");
exit(1);
}
/* create socket */
sock = socket(AF_INET, SOCK_STREAM, 0);
if (sock < 0) {
perror("socket()");
exit(1);
}
/* initialize the parameter */
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_port = htons(9100);
addr.sin_addr.s_addr = inet_addr(argv[1]);
/* connect */
if (connect(sock, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
perror("connect()");
}
printf("connected\n");
/* send data */
send(sock, "EPSON UB-E04\n", 13, 0);
/* close socket */
close(sock);
return 0;
}

Load device function from shared library with dlopen

I'm relatively new to cuda programming and can't find a solution to my problem.
I'm trying to have a shared library, lets call it func.so, that defines a device function
__device__ void hello(){ prinf("hello");}
I then want to be able to access that library via dlopen, and use that function in my programm. I tried something along the following lines:
func.cu
#include <stdio.h>
typedef void(*pFCN)();
__device__ void dhello(){
printf("hello\n")
}
__device__ pFCN ptest = dhello;
pFCN h_pFCN;
extern "C" pFCN getpointer(){
cudaMemcpyFromSymbol(&h_pFCN, ptest, sizeof(pFCN));
return h_pFCN;
}
main.cu
#include <dlfcn.h>
#include <stdio.h>
typedef void (*fcn)();
typedef fcn (*retpt)();
retpt hfcnpt;
fcn hfcn;
__device__ fcn dfcn;
__global__ void foo(){
(*dfcn)();
}
int main() {
void * m_handle = dlopen("gputest.so", RTLD_NOW);
hfcnpt = (retpt) dlsym( m_handle, "getpointer");
hfcn = (*hfcnpt)();
cudaMemcpyToSymbol(dfcn, &hfcn, sizeof(fcn), 0, cudaMemcpyHostToDevice);
foo<<<1,1>>>();
cudaThreadSynchronize();
return 0;
}
But this way I get the following error when debugging with cuda-gdb:
CUDA Exception: Warp Illegal Instruction
Program received signal CUDA_EXCEPTION_4, Warp Illegal Instruction.
0x0000000000806b30 in dtest () at func.cu:5
I appreciate any help you all can give me! :)
Calling a __device__ function in one compilation unit from device code in another compilation unit requires separate compilation with device linking usage of nvcc.
However, such usage with libraries only works with static libraries.
Therefore if the target __device__ function is in the .so library, and the calling code is outside of the .so library, your approach cannot work, with the current nvcc toolchain.
The only "workarounds" I can suggest would be to put the desired target function in a static library, or else put both caller and target inside the same .so library. There are a number of questions/answers on the cuda tag which give examples of these alternate approaches.

Debugging CUFFTW interface plan creation

I am begining to port an existing fftw3 application to make use of the cuda fftw libraries. The initial stage is to simply replace the fftw3.h header with the cufft.h header and link the cufft libraries instead of the fftw3 libraries.
That is simple enough, and the code compiles with nvcc. However when I execute the code the application is unable to create a plan using the fftw_plan_guru_dft command (it just returns 0 instead of a valid plan).
Since there are no errors reported I am at a loss as to how I might debug this issue. cuda-gdb and gdb do not provide any further insight. They simply report
Error: Internal error reported by CUDA debugger API (error=7). The application cannot be further debugged.
UPDATE: So here is the minimum working example. As mentioned in my comment to Talonmies, this code is autogenerated by a scientific differential equation solver. So please excuse the function names etc.
#define real Re
#define imag Im
#include <complex>
#undef real
#undef imag
#include <cufftw.h>
#include <stdio.h>
int main(void) {
int _transform_sizes_index = 1, _loop_sizes_index = 0;
fftw_iodim _transform_sizes[1], _loop_sizes[2];
_transform_sizes[0].n = 128;
_transform_sizes[0].is = 0;
_transform_sizes[0].os = 0;
fftw_complex _data_in[128] = {0.};
static fftw_plan _fftw_forward_plan = NULL;
_fftw_forward_plan = fftw_plan_guru_dft(
_transform_sizes_index, _transform_sizes,
_loop_sizes_index, _loop_sizes,
reinterpret_cast<fftw_complex*>(_data_in),
reinterpret_cast<fftw_complex*>(_data_in),
FFTW_FORWARD, FFTW_PATIENT);
if (!_fftw_forward_plan)
printf("Error: Unable to create forward plan\n");
return 0;
}
Unless anyone else knows what I am doing wrong, it looks like this particular functionality of fftw3 may not be supported by cufftw.
As talonmies pointed out, the fftw_plan_guru_dft only has partial support in the cufftw library. The above example will run if you instead make use of the basic level fftw_plan_dft. More concretely
#define real Re
#define imag Im
#include <complex>
#undef real
#undef imag
#include <cufftw.h>
#include <stdio.h>
int main(void) {
int _transform_sizes_index = 1, _loop_sizes_index = 0;
int _transform_sizes[1] = {128};
fftw_complex _data_in[128] = {0.};
static fftw_plan _fftw_forward_plan = NULL;
_fftw_forward_plan = fftw_plan_dft(
_transform_sizes_index, _transform_sizes,
reinterpret_cast<fftw_complex*>(_data_in),
reinterpret_cast<fftw_complex*>(_data_in),
FFTW_FORWARD, FFTW_PATIENT);
if (!_fftw_forward_plan)
printf("Error: Unable to create forward plan\n");
return 0;
}

invalid resource in a windows cuda project

I've ported a cuda project from linux to windows (basically just added few defines and typedefs in the header file). I'm using visual studio 2008, and the cuda runtime api custom build rules from the SDK. The code is c, not c++ (and I'm compiling /TC not /TP)
I'm having scope issues that I didn't have in linux. Global variables in my header file aren't shared between the .c files and .cu files.
I've created a simplified project, and here is all of the code:
main.h:
#ifndef MAIN_H
#define MAIN_H
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
cudaEvent_t cudaEventStart;
#if defined __cplusplus
extern "C" void func(void);
#else
extern void func(void);
#endif
#endif
main.c:
#include "main.h"
int main(void)
{
int iDevice = 0;
cudaSetDevice(iDevice);
cudaFree(0);
cudaGetDevice(&iDevice);
printf("device: %d\n", iDevice);
cudaEventCreate(&cudaEventStart);
printf("create event: %d\n", (int) cudaEventStart);
func();
cudaEventDestroy(cudaEventStart);
printf("destroy event: %d\n", (int) cudaEventStart);
return cudaThreadExit();
}
kernel.cu:
#include "main.h"
void func()
{
printf("event in cu: %d\n", (int) cudaEventStart);
}
output:
device: 0
create event: 44199920
event in cu: 0
event destroy: 441999920
Any ideas about what I am doing wrong here? How do I need to change my setup so that it works in visual studio? Ideally, I'd like a setup that works multi-platform.
CUDA 3.2, GTX 480, 64-bit Win7, 263.06 general
What you are trying to do
Would not work even without CUDA -- try renaming kernel.cu to kernel.c and recompile. You will get a linker error because cudaEventStart will be multiply defined -- in each compilation unit (.c file) that includes it. You would need to make the variable static, and initialize it in only one compilation unit.
Compiles in CUDA because CUDA does not have a linker, and therefore code in compilation units compiled by nvcc (.cu files) cannot reference symbols in other compilation units. CUDA doesn't support static global variables currently. In the future CUDA will have a linker, but currently it does not.
What is happening is each compilation unit is getting its own, non-conflicting instance of cudaEventStart.
What you can do is get rid of the global variable (make it a local variable in main()), add cudaEvent_t parameters to the functions that need to use the event, and then pass the event variable around.
BTW, in your second post, you have circular #includes...
I modified my simplified example (with success) by including the .cu file in the header and removing the forward declarations of the .cu file function.
main.h:
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include "kernel.cu"
cudaEvent_t cudaEventStart;
main.c:
#include "main.h"
int main(void)
{
int iDevice = 0;
cudaSetDevice(iDevice);
cudaFree(0);
cudaGetDevice(&iDevice);
printf("device: %d\n", iDevice);
cudaEventCreate(&cudaEventStart);
printf("create event: %d\n", (int) cudaEventStart);
func();
cudaEventDestroy(cudaEventStart);
printf("destroy event: %d\n", (int) cudaEventStart);
return cudaThreadExit();
}
kernel.cu:
#ifndef KERNEL_CU
#define KERNEL_CU
#include "main.h"
void func(void);
void func()
{
printf("event in cu: %d\n", (int) cudaEventStart);
}
#endif
output:
device: 0
create event: 42784024
event in cu: 42784024
event destroy: 42784024
About to see if it works in my real project, and whether the solution is portable back to linux.

Using assert within kernel invocation

Is there convenient way for using asserts within the kernels invocation on device mode?
CUDA now has a native assert function. Use assert(...). If its argument is zero, it will stop kernel execution and return an error. (or trigger a breakpoint if in CUDA debugging.)
Make sure to include "assert.h". Also, this requires compute capability 2.x or higher, and is not supported on MacOS. For more details see CUDA C Programming Guide, Section B.16.
The programming guide also includes this example:
#include <assert.h>
__global__ void testAssert(void)
{
int is_one = 1;
int should_be_one = 0;
// This will have no effect
assert(is_one);
// This will halt kernel execution
assert(should_be_one);
}
int main(int argc, char* argv[])
{
testAssert<<<1,1>>>();
cudaDeviceSynchronize();
return 0;
}
#define MYASSERT(condition) \
if (!(condition)) { return; }
MYASSERT(condition);
if you need something fancier you can use cuPrintf() which is available from the CUDA site for registered developers.