I am trying to perform a sum reduction using CUB and 2D arrays of type float/double.
Although it works for certain combinations of rows+columns, for relatively larger arrays, I get an illegal memory access error during the last transfer.
A minimal example is the following:
#include <stdio.h>
#include <stdlib.h>
#include <cub/device/device_reduce.cuh>
#include "cuda_runtime.h"
#ifdef DP
#define real double
#else
#define real float
#endif
void generatedata(const int num, real* vec, real start, real finish) {
real rrange = finish - start;
for (auto i = 0; i < num; ++i)
vec[i] = rand() / float(RAND_MAX) * rrange + start;
}
real reduce_to_sum(const int num, const real* vec) {
real total = real(0.0);
for (auto i = 0; i < num; ++i)
total += vec[i];
return total;
}
int main() {
int rows = 2001;
int cols = 3145;
size_t msize = rows * cols;
real* data = (real*)malloc(msize * sizeof(real));
if (!data)
return -999;
generatedata(msize, data, 0., 50.);
real ref_sum = reduce_to_sum(msize, data);
real* d_data_in = nullptr;
real* d_data_out = nullptr;
size_t pitch_in, pitch_out;
cudaError_t err = cudaMallocPitch(&d_data_in, &pitch_in, cols * sizeof(real), rows);
if (err != cudaSuccess) {
printf("data_in :: %s \n", cudaGetErrorString(err));
return -999;
}
err = cudaMallocPitch(&d_data_out, &pitch_out, cols * sizeof(real), rows);
if (err != cudaSuccess) {
printf("data_out :: %s \n", cudaGetErrorString(err));
return -999;
}
err = cudaMemset(d_data_in, 0, rows * pitch_in);
if (err != cudaSuccess) {
printf("set data_in :: %s \n", cudaGetErrorString(err));
return -999;
}
err = cudaMemcpy2D(d_data_in, pitch_in, data, cols * sizeof(real), cols * sizeof(real), rows, cudaMemcpyHostToDevice);
if (err != cudaSuccess) {
printf("copy data :: %s \n", cudaGetErrorString(err));
return -999;
}
void* d_temp = nullptr;
size_t temp_bytes = 0;
cub::DeviceReduce::Sum(d_temp, temp_bytes, d_data_in, d_data_out, rows * pitch_out);
err = cudaMalloc(&d_temp, temp_bytes);
if (err != cudaSuccess) {
printf("temp :: %s \n", cudaGetErrorString(err));
return -999;
}
err = cudaMemset(d_data_out, 0, rows * pitch_out);
if (err != cudaSuccess) {
printf("set temp :: %s \n", cudaGetErrorString(err));
return -999;
}
// Run sum-reduction
cub::DeviceReduce::Sum(d_temp, temp_bytes, d_data_in, d_data_out, rows * pitch_out);
err = cudaGetLastError();
if (err != cudaSuccess) {
printf("reduction :: %s \n", cudaGetErrorString(err));
return -999;
}
real gpu_sum = real(0.0);
err = cudaMemcpy(&gpu_sum, d_data_out, sizeof(real), cudaMemcpyDeviceToHost);
if (err != cudaSuccess) {
printf("copy final :: %s \n", cudaGetErrorString(err));
return -999;
}
printf("Difference in sum (h)%f - (d)%f = %f \n", ref_sum, gpu_sum, ref_sum - gpu_sum);
if (data) free(data);
if (d_data_in) cudaFree(d_data_in);
if (d_data_out) cudaFree(d_data_out);
if (d_temp) cudaFree(d_temp);
cudaDeviceReset();
return 0;
}
The error is thrown at "copy final ::". I am bit confused as to why certain rows x columns work and others don't. I did notice it's the larger values that cause it, but can't get my head around.
Any suggestions would be much appreciated.
The 5th parameter of cub::DeviceReduce::Sum should be the number of input elements. However, rows * pitch_out is the size of the output buffer in bytes.
Assuming pitch_in % sizeof(real) == 0, the following call may work.
cub::DeviceReduce::Sum(d_temp, temp_bytes, d_data_in, d_data_out, rows * (pitch_in / sizeof(real)));
Also note that cub::DeviceReduce::Sum may return before the reduction is complete. In this case, if any error happened during execution, this error will be reported by cudaMemcpy.
I am using MySQL in my C application. I have created a function
GetInDB(char *query, cJSON **json){...
that will allocate the json as a json array, query the db, and store the records in the array. The problem is that it seems difficult to quickly get the field name of each column in a table. Since I don't know the string length of each name in advance, I have to iterate through the results and dynamically allocate (!) a string that is large enough to hold each one. Then, I have to iterate through each record. Is there a better way?
int GetInDB(const char *query, cJSON **json)
{
if(*json != NULL){
cJSON_Delete(*json);
}
*json = cJSON_CreateArray();
if (mysql_query(mysqldb, query))
{
fprintf(stderr, "%s\n", mysql_error(mysqldb));
return -1;
}
MYSQL_RES *result = mysql_store_result(mysqldb);
if (result == NULL)
{
fprintf(stderr, "%s\n", mysql_error(mysqldb));
return -1;
}
int num_fields = mysql_num_fields(result);
int i;
MYSQL_ROW row;
MYSQL_FIELD *field;
char *Columns[num_fields];
int which = 0;
while((field = mysql_fetch_field(result)))
{
Columns[which] = (char*)malloc(strlen(field->name)+1);
strcpy(Columns[which], field->name);
which++;
}
while ((row = mysql_fetch_row(result)))
{
cJSON *obj = cJSON_CreateObject();
for(i = 0; i < num_fields; i++)
{
cJSON_AddStringToObject(obj, Columns[i], row[i] ? row[i] : "NULL");
}
cJSON_AddItemToArray(*json,obj);
}
mysql_free_result(result);
for(i=0; i<num_fields; i++){
free(Columns[i]);
}
return 0;
}
I spent the last days trying to figure out how to download a file from an URL.
This is my first challenge with socket and I'm using it to have an understanding of protocols so I would like to do it without cURL libraries and only in C language!!
I searched a lot....now I'm able to printf the source code of a page but I think it's different with a file, I don't have only to put the received data from a buffer to a file, right?
any tips?
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
int main(void)
{
char domain[] = "www.sstatic.net", path[]="stackexchange/img/logos/so/so-logo-med.png"; //example
int sock, bytes_received;
char send_data[1024],recv_data[9999], *p;
struct sockaddr_in server_addr;
struct hostent *he;
FILE *fp;
he = gethostbyname(domain);
if (he == NULL){
herror("gethostbyname");
exit(1);
}
if ((sock = socket(AF_INET, SOCK_STREAM, 0))== -1){
perror("Socket");
exit(1);
}
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(80);
server_addr.sin_addr = *((struct in_addr *)he->h_addr);
bzero(&(server_addr.sin_zero),8);
if (connect(sock, (struct sockaddr *)&server_addr,sizeof(struct sockaddr)) == -1){
perror("Connect");
exit(1);
}
snprintf(send_data, sizeof(send_data), "GET /%s HTTP/1.1\r\nHost: /%s\r\n\r\n", path, domain);
//printf("%s\n", send_data);
send(sock, send_data, strlen(send_data), 0);
printf("Data sended.\n");
fp=fopen("received_file","wb");
bytes_received = recv(sock, recv_data, 9999, 0);
recv_data[bytes_received] = '\0';
printf("Data receieved.\n");
printf("%s\n", recv_data);
p = strstr(recv_data, "\r\n\r\n"); //to find "\r\n\r\n" sequence and put the pointer p after that
p=p+4;
fwrite(p,strlen(p),1,fp);
close(sock);
fclose(fp);
return 0;
}
UPDATE 1 thanks to milevyo for some improvements!
It works good with a txt file but it doesn't with other kinds of file (png in this case)
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
int main(void){
//char domain[] = "www.gnu.org", path[]="/licenses/gpl.txt"; //example
char domain[] = "sstatic.net", path[]="stackexchange/img/logos/so/so-logo-med.png"; //example
int sock, bytes_received;
char send_data[1024],recv_data[9999];
struct sockaddr_in server_addr;
struct hostent *he;
FILE *fp;
he = gethostbyname(domain);
if (he == NULL){
herror("gethostbyname");
exit(1);
}
if ((sock = socket(AF_INET, SOCK_STREAM, 0))== -1){
perror("Socket");
exit(1);
}
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(80);
server_addr.sin_addr = *((struct in_addr *)he->h_addr);
bzero(&(server_addr.sin_zero),8);
printf("Connecting ...\n");
if (connect(sock, (struct sockaddr *)&server_addr,sizeof(struct sockaddr)) == -1){
perror("Connect");
exit(1);
}
printf("Sending data ...\n");
snprintf(send_data, sizeof(send_data), "GET /%s HTTP/1.1\r\nHost: /%s\r\n\r\n", path, domain);
if(send(sock, send_data, strlen(send_data), 0)==-1){
perror("send");
exit(2);
}
printf("Data sent.\n");
fp=fopen("received_file","wb");
printf("Recieving data...\n\n");
while((bytes_received = recv(sock, recv_data, 9999, 0))>0){
if(bytes_received==-1){
perror("recieve");
exit(3);
}
recv_data[bytes_received] = '\0';
fwrite(recv_data,bytes_received,1,fp);
printf("%s", recv_data);
}
close(sock);
fclose(fp);
printf("\n\nDone.\n\n");
return 0;
}
this code produce a 334 bytes file (instead of 12,4kb of the original file) with this inside:
HTTP/1.1 400 Bad Request
Date: Sat, 28 Nov 2015 16:20:45 GMT
Content-Type: text/html
Content-Length: 177
Connection: close
Server: -nginx
CF-RAY: -
<html>
<head><title>400 Bad Request</title></head>
<body bgcolor="white">
<center><h1>400 Bad Request</h1></center>
<hr><center>cloudflare-nginx</center>
</body>
</html>
somebody knows how to fix this "400 Bad Request"?
This is an update for the previous posted code. The http protocol is far to be implementation in just a small example.
reformatting the code , or giving a modification to it is more than welcome.
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
#include <string.h>
int ReadHttpStatus(int sock){
char c;
char buff[1024]="",*ptr=buff+1;
int bytes_received, status;
printf("Begin Response ..\n");
while(bytes_received = recv(sock, ptr, 1, 0)){
if(bytes_received==-1){
perror("ReadHttpStatus");
exit(1);
}
if((ptr[-1]=='\r') && (*ptr=='\n' )) break;
ptr++;
}
*ptr=0;
ptr=buff+1;
sscanf(ptr,"%*s %d ", &status);
printf("%s\n",ptr);
printf("status=%d\n",status);
printf("End Response ..\n");
return (bytes_received>0)?status:0;
}
//the only filed that it parsed is 'Content-Length'
int ParseHeader(int sock){
char c;
char buff[1024]="",*ptr=buff+4;
int bytes_received, status;
printf("Begin HEADER ..\n");
while(bytes_received = recv(sock, ptr, 1, 0)){
if(bytes_received==-1){
perror("Parse Header");
exit(1);
}
if(
(ptr[-3]=='\r') && (ptr[-2]=='\n' ) &&
(ptr[-1]=='\r') && (*ptr=='\n' )
) break;
ptr++;
}
*ptr=0;
ptr=buff+4;
//printf("%s",ptr);
if(bytes_received){
ptr=strstr(ptr,"Content-Length:");
if(ptr){
sscanf(ptr,"%*s %d",&bytes_received);
}else
bytes_received=-1; //unknown size
printf("Content-Length: %d\n",bytes_received);
}
printf("End HEADER ..\n");
return bytes_received ;
}
int main(void){
char domain[] = "sstatic.net", path[]="stackexchange/img/logos/so/so-logo-med.png";
int sock, bytes_received;
char send_data[1024],recv_data[1024], *p;
struct sockaddr_in server_addr;
struct hostent *he;
he = gethostbyname(domain);
if (he == NULL){
herror("gethostbyname");
exit(1);
}
if ((sock = socket(AF_INET, SOCK_STREAM, 0))== -1){
perror("Socket");
exit(1);
}
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(80);
server_addr.sin_addr = *((struct in_addr *)he->h_addr);
bzero(&(server_addr.sin_zero),8);
printf("Connecting ...\n");
if (connect(sock, (struct sockaddr *)&server_addr,sizeof(struct sockaddr)) == -1){
perror("Connect");
exit(1);
}
printf("Sending data ...\n");
snprintf(send_data, sizeof(send_data), "GET /%s HTTP/1.1\r\nHost: %s\r\n\r\n", path, domain);
if(send(sock, send_data, strlen(send_data), 0)==-1){
perror("send");
exit(2);
}
printf("Data sent.\n");
//fp=fopen("received_file","wb");
printf("Recieving data...\n\n");
int contentlengh;
if(ReadHttpStatus(sock) && (contentlengh=ParseHeader(sock))){
int bytes=0;
FILE* fd=fopen("test.png","wb");
printf("Saving data...\n\n");
while(bytes_received = recv(sock, recv_data, 1024, 0)){
if(bytes_received==-1){
perror("recieve");
exit(3);
}
fwrite(recv_data,1,bytes_received,fd);
bytes+=bytes_received;
printf("Bytes recieved: %d from %d\n",bytes,contentlengh);
if(bytes==contentlengh)
break;
}
fclose(fd);
}
close(sock);
printf("\n\nDone.\n\n");
return 0;
}
Try some thing like below: -
#include <sys/socket.h>
#include <sys/errno.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <stdio.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#define BUFLEN 4096
#define HOST "www.t.edu.pk"
#define PORT 443
int main()
{
int sock, iResult;
char *cmd, *ip;
char recvbuf[BUFLEN];
//
struct sockaddr_in sin;
struct hostent* hent;
//
hent = gethostbyname(HOST);
if(hent == NULL)
{
printf("gethostbyname failed: %d\n", errno);
return -1;
}
printf("gethostbyname succeeded\n");
ip = inet_ntoa(*((struct in_addr*)hent->h_addr_list[0]));
printf("Host IP: %s\n", ip);
//
sock = socket(AF_INET, SOCK_STREAM, 0);
if(sock == -1)
{
printf("socket failed: %d\n", errno);
return -1;
}
printf("socket created\n");
//
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = inet_addr(ip);
sin.sin_port = htons(PORT);
iResult = connect(sock, (struct sockaddr*)&sin, sizeof(sin));
if(iResult < 0)
{
printf("connect failed: %d\n", errno);
return -1;
}
printf("connect succeeded\n");
//
iResult = SSL_library_init();
if(iResult < 0)
{
printf("SSL failed\n");
return -1;
}
printf("SSL library initialised\n");
OpenSSL_add_all_algorithms();
ERR_load_crypto_strings();
SSL_load_error_strings();
SSL_CTX* ctx = SSL_CTX_new(TLSv1_2_client_method());
if(ctx == NULL)
{
printf("ctx failed\n");
ERR_print_errors_fp(stderr);
return -1;
}
printf("ctx loaded\n");
SSL* ssl = SSL_new(ctx);
if(ssl == NULL)
{
printf("ssl failed\n");
ERR_print_errors_fp(stderr);
return -1;
}
printf("ssl loaded\n");
SSL_set_fd(ssl, sock);
SSL_connect(ssl);
//
cmd = "GET / HTTP/1.1\r\nHost: www.t.edu.pk\r\n\r\n";
iResult = SSL_write(ssl, cmd, strlen(cmd));
if(iResult <= 0)
{
printf("SSL write failed\n");
ERR_print_errors_fp(stderr);
return -1;
}
printf("Byte(s) sent: %d\n", iResult);
bzero(recvbuf, BUFLEN);
do
{
iResult = SSL_read(ssl, recvbuf, BUFLEN - 1);
if(iResult < 0)
{
printf("error receiving data\n");
break;
}
if(iResult == 0)
{
printf("host closed connection\n");
break;
}
printf("%s\n", recvbuf);
}while(iResult > 0);
//
iResult = SSL_shutdown(ssl);
if(iResult == 0)
{
printf("SSL shutdown in progress...\n");
}
iResult = SSL_shutdown(ssl);
if(iResult == 1)
{
printf("SSL shutdown complete!\n");
}
if(iResult == -1)
{
printf("SSL shutdown unsuccessful!\n");
}
SSL_CTX_free(ctx);
//
iResult = shutdown(sock, SHUT_RDWR);
if(iResult == -1)
{
printf("Socket shutdown failed: %d\n", errno);
return -1;
}
printf("Socket shutdown succeeded\n");
iResult = close(sock);
if(iResult != 0)
{
printf("error closing socket: %d\n", errno);
return -1;
}
printf("Socket closed\n");
//
return 0;
}
This work for C/C++ in linux Environment. You can run it by makefile, or just add -lcurl option in g++.
Notice that should have lib cURL.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
FILE *fp = fopen("file.txt", "w");
char outfilename[FILENAME_MAX] = "file_downloaded.txt";
FILE *fp1 = fopen(outfilename,"wb");
struct MemoryStruct {
char *memory;
size_t size;
};
static size_t
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
char *ptr = (char*)realloc(mem->memory, mem->size + realsize + 1);
if(!ptr) {
/* out of memory! */
printf("not enough memory (realloc returned NULL)\n");
return 0;
}
fprintf(fp, "%ld - %ld - %ld\n", realsize, size, nmemb);
size_t written = fwrite(contents, size, nmemb, fp1);
mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
int main(void)
{
char link_download[] = "https://www.example.com/";
CURL *curl_handle;
CURLcode res;
struct MemoryStruct chunk;
chunk.memory = (char*)malloc(1); /* will be grown as needed by the realloc above */
chunk.size = 0; /* no data at this point */
curl_global_init(CURL_GLOBAL_ALL);
/* init the curl session */
curl_handle = curl_easy_init();
/* specify URL to get */
curl_easy_setopt(curl_handle, CURLOPT_URL, link_download);
/* send all data to this function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
/* we pass our 'chunk' struct to the callback function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
/* some servers do not like requests that are made without a user-agent
field, so we provide one */
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
/* get it! */
res = curl_easy_perform(curl_handle);
/* check for errors */
if(res != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
}
else {
/*
* Now, our chunk.memory points to a memory block that is chunk.size
* bytes big and contains the remote file.
*
* Do something nice with it!
*/
printf("%lu bytes retrieved\n", (unsigned long)chunk.size);
}
/* cleanup curl stuff */
curl_easy_cleanup(curl_handle);
free(chunk.memory);
/* we are done with libcurl, so clean it up */
curl_global_cleanup();
fclose(fp);
fclose(fp1);
return 0;
}
The function nppiDotProd_8u64f_C1R causes a cudaErrorUnknown. I'm able to compile and run properly boxFilterNPP and histEqualizationNPP so I assume my system is healthy. I'm running with a GTX470 (compute capability 2.0), CUDA 5.5 and VS2012 x64 on Windows7. I've also run many variations of it on two systems and having the same problem. Here is the code:
NppGpuComputeCapability capability = nppGetGpuComputeCapability();
NppiSize sizeROI;
sizeROI.width = 640;
sizeROI.height = 480;
int nBufferSize = 0;
NppStatus status = nppiDotProdGetBufferHostSize_8u64f_C1R(sizeROI,&nBufferSize);
if(status != NPP_SUCCESS) return status;
unsigned char *pDeviceBuffer;
cudaError_t err = cudaMalloc((void**)&pDeviceBuffer,nBufferSize);
if(err != cudaSuccess) return err;
int stepByte1 = 0;
Npp8u * buf1 = nppiMalloc_8u_C1(sizeROI.width, sizeROI.height, &stepByte1);
status = nppiSet_8u_C1R(1,buf1,stepByte1,sizeROI);
if(status != NPP_SUCCESS) return status;
int stepByte2 = 0;
Npp8u * buf2 = nppiMalloc_8u_C1(sizeROI.width, sizeROI.height, &stepByte2);
status = nppiSet_8u_C1R(1,buf2,stepByte2,sizeROI);
if(status != NPP_SUCCESS) return status;
err = cudaDeviceSynchronize();
if(err != cudaSuccess) return err;
double dp = 0;
status = nppiDotProd_8u64f_C1R(buf1,stepByte1,buf2,stepByte2,sizeROI,&dp,pDeviceBuffer);
if(status != NPP_SUCCESS) return status;
err = cudaDeviceSynchronize(); // return cudaErrorUnknown
// CUDA memchecker gives me "OutOfRangeStore" exception
if(err != cudaSuccess) return err;
printf("result: %f\n", dp);
nppiFree(buf1);
nppiFree(buf2);
cudaFree(pDeviceBuffer);
Any idea about my problem?
Thank you very much!!
The result argument in that nppiDotProd call must be a device pointer, not a host pointer. You can fix it by allocating memory for dp on the device, something like :
double * dp ;
cudaMalloc((void **)(&dp), sizeof(Npp64f) * 1);
status = nppiDotProd_8u64f_C1R(buf1,stepByte1,buf2,stepByte2,sizeROI,dp,pDeviceBuffer);
if(status != NPP_SUCCESS) return status;
[disclaimer: written in browser, not compiled or tested, use a own risk]
You will obviously need to copy the result of the dot product back to the host if you need it.