<Priority queue>Why values are compared twice? - stl

struct St {
int e;
bool operator < (const St &s) const {
cout << "e" << e << endl;
cout << "s" << s.e << endl;
return e > s.e;
}
};
int main() {
priority_queue<St> q;
St s1, s2, s3, s4;
s1.e = 5;
s2.e = 3;
s3.e = 7;
s4.e = 2;
q.push(s1);
cout << "ssssss1" << endl;
q.push(s2);
cout << "ssssss2" << endl;
q.push(s3);
cout << "ssssss3" << endl;
q.push(s4);
cout << "ssssss4" << endl;
cout << q.top().e;
return 0;
}
The result will be like this.
ssssss1
e5
s3
e3
s5
ssssss2
e3
s7
ssssss3
e5
s2
e2
s5
e3
s2
e2
s3
ssssss4
2
Why the values are compared twice when pushed s2 and s4?

Related

I'm trying to extract data from a csv file but I can't exactly get it right

This is what my code should do:
Take a reading with a barcode scanner, it consists of 16-17 string depending on the color palette type.
I segment what i read into smaller parts to process, A, B1, B2, C (the substr works perfectly fine)
I take those substrings and process them into a readable format.
The problem is that I can't (I don't know how to) extract the data correctly so that B2 can be compared to its corresponding colour code and for colorname to be displayed correctly.
Here's a sample of the csv file I'm trying to extract the data from, it consists of multiple rows and two columns:
RAL7016,NEGRO
RAL7017,BLANCO
RAL7018,AZUL
RAL7019,VERDE
YW207F,AZULOSCURO
Y2208I,BEIGE
Y4304I,AMARILLO
YX353F,CIELO
Y2212I,TURQUESA
First row can contain 6 or 7 characters which is why I ask in the code if RAL is present.
If anyone can help me I would really appreciate it. Many thanks.
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
using namespace std;
main()
{
string scan, A, B1, B2, C;
Init:
int x=0, y=0;
cout << "\nEsperando a scan...";
cin >> scan;
cout << "Codigo:" << scan;
A = scan.substr (0,3);
B1 = scan.substr (3,3);
B2 = scan.substr (6,3);
if (B2 == "RAL") B2 = scan.substr(5,7), C = scan.substr (13,4);
else B2 = scan.substr(5,6), C = scan.substr (12,4);
//Tipo de producto
if (A == "PUE") A = "PUERTA", x=1;
if (A == "PAN") A = "PANEL", x=1;
if (A == "LAC") A = "LACADO", x=1;
if (x=0) A = "PRODUCTO NO AGREGADO AL SISTEMA";
//Tipo de acabado
if (B1 == "BRI") B1 = "BRILLO", x=1;
if (B1 == "TEX") B1 = "TEXTURADO", x=1;
if (B1 == "MAT") B1 = "MATE", x=1;
if (x=0) B1 = "TIPO DE ACABADO NO AGREGADO AL SISTEMA";
//Leer colour desde archivo csv
string colourcode = "blank", colourname = "blank";
ifstream myfile("colourtable.csv");
if(myfile.is_open()){
while(getline(myfile, colourcode, ',') && getline(myfile, colourname)){
if(B2 == colourcode){
if(B2.substr (0,3) == "RAL"){
B2 = colourname.substr(8,20);
}
else B2 = colourname.substr(7,20);
}
}
myfile.close();
}
else B2 = "Falta archivo colourtable.csv";
//Output en pantalla
cout << "\n Producto:" << A << "\n Acabado:" << B1 << "\n colour:" << B2 << "\n N Pedido:" << C << "\n" << "Y = "<< y << "\n";
//colour debug
cout << "\n colourcode:" << colourcode << "\n colourname:" << colourname;
//Output en archivo a pedidos.csv
fstream file;
file.open ("pedidos.csv");
if (file.is_open()){
file << C << "," << B2 << "," << B1;
file.close();
}
else cout << "\n Falta archivo pedidos.csv";
//Reiniciar ciclo
goto Init;
}

Is there a way to convert huge decimal numbers(x10^8) to binary?

I have a code to convert my decimal number to a binary number but since the number is huge 1,4e8, it either always gives me a round number or it uses too much memory and crashes here is my code
double n = 1466333969;
double e = 10487;
double facteurs = 0;
long d = 0;
facteurs = factorisation(n);
d = calculerD(facteurs, e);
long dTemp = d / 2;
int dBin[70] = {};
int i = 0;
cout << dTemp << endl;
do
{
if (dTemp == floor(d / 2))
{
//cout << D << " " << dTemp << endl;
dBin[i] = 0;
}
else dBin[i] = 1;
d = floor(d / 2);
dTemp = d / 2;
//cout << D << " " << dBin[i] << endl;
i++;
} while (d != 0);
I checked and the calculerD function works fine and return the right d but afterwards, it just seems to stay in longs.

Recursion and functions

Thanks for helping everyone. I will continue looking at it so I can better understand! I am still struggling with recursion but I will study it more. Thanks again for all your time and effort for trying to help me
- /
int countEven(int arr[i]){
//I'm not sure what to do here... how to fix it...
int evens = 0;
if(arr[i] <= 0) return 0; //base case
while(arr[i] > 0){
int digit = arr[i]%10; //get the last digit
if(digit%2 == 0){
evens = evens+1;
}
arr[i] = arr[i]/10;
}
cout << evens;
}
}
}
int main(){
cout << "Part A:\n";
int arr[3] = { 5050155, 5, 707070 };
for (int i = 0; i < 3; i++){
cout << "countEven(" << arr[i] << ") = " << countEven(arr[i]) << endl;
cout << "removeEven(" << arr[i] << ") = " << removeEven(arr[i]) << endl;
cout << "hasEven(" << arr[i] << ") = ";
if (hasEven(arr[i])) cout << "Yes" << endl;
else cout << "No" << endl;
printStarDigit(arr[i]);
cout << endl << endl;
}
cout << "Part B:\n";
int a[4] = { 7, 2, 8, 3 };
int b[5] = { 3, 4, 5, 6, 7 };
cout << "The range of array a is " << range(a, 4) << endl;
cout << "The range of array b is " << range(b, 5) << endl;
reverse(a, 4);
reverse(b, 5);
cout << "Array a reversed: ";
for (int i = 0; i < 4; ++i)
cout << a[i] << " ";
cout << endl;
cout << "Array b reversed: ";
for (int i = 0; i < 5; ++i)
cout << b[i] << " ";
cout << endl;
return 0;
}
int countEven(int arr[i]){
Parameters must have simple names, and do not need to be identical to the expressions passed in. arr[i] is not a valid name. A simple name to use here is n.
//I'm not sure what to do here... how to fix it...
int evens = 0;
if(arr[i] <= 0) return 0; //base case
This base case is wrong for two reasons. Firstly, you treat all negative integers as the base case, but -202020 has six even digits. Secondly, you return the wrong value: 0 has one even digit, but you return zero.
A possible base case could be n > -10 && n < 10 (single digit number). I'll let you figure out the expression to return for that base case.
while(arr[i] > 0){
If your task is to write a recursive function, then you shouldn't use a loop here. Instead, see below.
int digit = arr[i]%10; //get the last digit
...
arr[i] = arr[i]/10;
This is a correct way of obtaining the last digit, and everything other than the last digit.
if(digit%2 == 0)
This is a correct way of determining whether the last digit is even.
Now, you need to combine what you have, by observing that the count of even digits is equal to "1 if the last digit is even, else 0", plus the count of even non-last digits. The goal of the exercise is to get you to write "the count of even non-last digits" as countEven(n / 10).
See these code snippets.
int countEven(int n, bool first_time = true){
static int total ;
int num =n;
if (first_time)
{
total = 0;
}
if (num <= 9) {
return num % 2 == 0 ? 1 : 0;
}
int temp = num - (num / 10) * 10;
num = num / 10;
total += temp % 2 == 0 ? 1 : 0;
countEven(num,false);
return total;
}
int removeEven(int n){
int result = 0;
/*TODO*/
return result;
}
bool hasEven(int n){
return countEven(n);
}
void printStarDigit(int* arr){
/*TODO*/
}
int range(int* arr, int n){
int result = *arr;
for (int i = 0; i < n; i++) {
if (result < *(arr + i)) result = *(arr + i);
}
return result;
}
void reverse(int* arr, int n){
int* temp = new int[n];
for (int i = 0; i < n; i++) {
*(temp + i) = *(arr + i);
}
for (int i = 0; i < n; i++) {
*(arr + i) = *(temp + n - 1 - i);
}
delete[] temp;
}
Main implementation :
int main() {
cout << "Part A:\n";
int arr[3] = { 5050155, 5, 707070 };
for (int i = 0; i < 3; i++) {
cout << "countEven(" << arr[i] << ") = " << countEven(arr[i]) << endl;
cout << "removeEven(" << arr[i] << ") = " << removeEven(arr[i]) << endl;
cout << "hasEven(" << arr[i] << ") = ";
if (hasEven(arr[i])) cout << "Yes" << endl;
else cout << "No" << endl;
printStarDigit(arr + i);
cout << endl << endl;
}
cout << "Part B:\n";
int a[4] = { 7, 2, 8, 3 };
int b[5] = { 3, 4, 5, 6, 7 };
cout << "The range of array a is " << range(a, 4) << endl;
cout << "The range of array b is " << range(b, 5) << endl;
reverse(a, 4);
reverse(b, 5);
cout << "Array a reversed: ";
for (int i = 0; i < 4; ++i)
cout << a[i] << " ";
cout << endl;
cout << "Array b reversed: ";
for (int i = 0; i < 5; ++i)
cout << b[i] << " ";
cout << endl;
return 0;
}
Output:

Char Function, number to letter grades

I am relatively new to c++ programming and I am struggling with my code. The objective of this code is to take scores input by the user and calculate the mean, the standard deviation and converting it to a letter grade using the calculations under char gradeFunction. When i try to debug this program using visual studios 2013, i am having a couple problems with the the gradefunction. Again i am new to programming so troubleshooting errors is very hard for me and I would appreciate any help or advice! The program looks like this so far.
#include <iostream>
#include <iomanip>
#include <cmath>
#include <string.h>
#include <string>
using namespace std;
void printArray(int Array[], int count);
double average(double scoreTotal, int count);
double stddev(int Array[], int count, double mean);
char gradeFunction(int scores, double stddev, double mean);
int main()
{
int scores[8];
int count;
double scoreTotal = 0;
int standarddev[8];
double mean;
cout << "Enter scores seperated by blanks:" " ";
for (count = 0; count <= 7; count++)
{
cin >> scores[count];
scoreTotal += scores[count];
mean = scoreTotal / 8;
}
cout << endl;
cout << "Grade Scores by Student" << endl;
cout << "Score" "\t" "Grade" << endl;
cout << "----------------------------------" << endl;
printArray(scores, 8);
cout << gradeFunction(scores, stddev, mean);
cout << endl;
cout << "The mean is" " "<< fixed << setprecision(1) << average(scoreTotal, count) << endl;
cout << "The standard deviation is" " " << stddev(scores, count, mean) << endl;
cout << endl;
system("pause");
return 0;
}
void printArray(int Array[], int count)
{
for (int x = 0; x < count; x++)
{
cout << fixed << setprecision(1) << Array[x] << endl;
}
}
char gradeFunction(int scores, double stddev, double mean)
{
char F, D, C, B, A;
if (scores <= (mean - (1.5 * stddev)))
return 'F';
else if (scores <= (mean - (.5 * stddev)))
return 'D';
else if (scores <= (mean + (.5 * stddev)))
return 'C';
else if (scores <= (mean + (1.5 * stddev)))
return 'B';
else return 'A';
}
double average(double scoreTotal, int count)
{
return scoreTotal / count;
}
double stddev(int Array[], int count , double mean)
{
double stddev;
double sum2 = 0;
for (int i = 0; i < count; i++)
{
sum2 += pow((Array[i] - mean), 2);
}
stddev = sqrt(sum2 / (count - 1));
return stddev;
}
The error messages this leaves me with are...
3 IntelliSense: argument of type "double (*)(int *Array, int count, double mean)" is incompatible with parameter of type "double"
Error 1 error C2664: 'char gradeFunction(int [],double,double)' : cannot convert argument 2 from 'double (__cdecl *)(int [],int,double)' to 'double'

Advice on string search with Cuda

I am to write a cuda code which searches set of keyword strings inside set of data strings and returns an array of boolean for keyword-data string pairs. Data strings: at the moment, 10000(may vary) strings and each of them has max 250 chars.
Keyword strings: at the moment, 100(may vary) strings and each of them has max 100 chars.
Length of each string is known.
My question is which of the following approaches might be more suitable in this case.
1st:
gridDim.x => # of keyword strings
gridDim.y => # of data strings
blockDim => (max string size(250 in this case),1,1)
Naive algorithm will be used for search
Each thread will load the chars of keyword and data to shared mem from global mem.
Each thread will be responsible for one of the windows in naive search algorithm.
Result will be written to the boolean array.
So, each block will be responsible for keyword-data pair.
2nd:
gridDim => (# of data strings,1,1)
blockDim => (# of keyword strings,1,1)
In each block, data string will be loaded to shared mem.
In this case, each thread will be responsible for keyword-data pair instead of block.
Each thread will search corresponding keyword inside the data string.
Naive algorithm is not necessary in this case, Boyer-Moore might be used.
For searches inside huge files, since length of the data is much bigger than the length of the keyword, 1st approach is used. But in this case, I am not sure if the 1st appraoch is better. On the other hand, for 2nd approach, coalescing the keywords might be a problem, since the lengths are not fixed. There is an upper boundry for the size of the keywords. So, padding might ease the coalescing but it would consume more memory.
Anyhow, if you have worked on a similar case or know about a better approach than those I described above, please help me out.
Thank you in advance.
So, I've implemented both of the cases. Code for approach 1:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "stdio.h"
#include "iostream"
#include "chrono"
#include "cstdlib"
#define SEARCHTERMSIZE 100
#define SEARCHITEMSIZE 65000
#define MAXDATASTRINGSIZE 250
#define MAXKEYWORDSTRINGSSIZE 50
using namespace std;
__global__ void searchKeywordKernel(bool* resultPtr, const char * dataPtr, const short* dataLengths, const char * keywordPtr, const short* keywordLengths)
{
int dataIndex = blockIdx.x;
int keywordIndex = blockIdx.y;
int dataLength = dataLengths[dataIndex];
int keywordLength = keywordLengths[keywordIndex];
__shared__ char sData[MAXDATASTRINGSIZE];
__shared__ char sKeyword[MAXKEYWORDSTRINGSSIZE];
__shared__ bool isFound;
if (dataIndex < SEARCHITEMSIZE && keywordIndex < SEARCHTERMSIZE)
{
if (dataLength < keywordLength)
{
resultPtr[keywordIndex*SEARCHITEMSIZE + dataIndex] = false;
}
else
{
isFound = false;
sData[threadIdx.x] = dataPtr[dataIndex*MAXDATASTRINGSIZE + threadIdx.x];
if (threadIdx.x < keywordLength)
sKeyword[threadIdx.x] = keywordPtr[keywordIndex*MAXKEYWORDSTRINGSSIZE + threadIdx.x];
__syncthreads();
if (threadIdx.x <= dataLength - keywordLength)
{
for (int i = 0; i < keywordLength && !isFound; i++)
{
if (sData[threadIdx.x + i] != sKeyword[i])
break;
if (i == keywordLength - 1)
isFound = true;
}
}
resultPtr[keywordIndex*SEARCHITEMSIZE + dataIndex] = isFound;
}
}
}
int main()
{
chrono::steady_clock::time_point startTime;
chrono::steady_clock::time_point endTime;
typedef chrono::duration<int, milli> millisecs_t;
//////////Search Data Init/////////////////
cout << "Before Search Data Init" << endl;
startTime = chrono::steady_clock::now();
char* dataPtr = (char*)malloc(sizeof(char)*MAXDATASTRINGSIZE*SEARCHITEMSIZE);
short* dataLengths = new short[SEARCHITEMSIZE];
short temp;
short tempChar;
for (int i = 0; i < SEARCHITEMSIZE; i++)
{
temp = rand() % (MAXDATASTRINGSIZE - 20) + 20;
for (int k = 0; k < temp; k++)
{
tempChar = rand() % 26;
dataPtr[i*MAXDATASTRINGSIZE + k] = 97 + tempChar; //97->a, 98->b, 122->z
}
dataLengths[i] = temp;
}
endTime = chrono::steady_clock::now();
millisecs_t duration(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "After Search Data Init: " << duration.count() << "ms" << endl;
//////////Search Data Init/////////////////
//////////Search Keyword Init/////////////////
cout << "Before Search Keyword Init" << endl;
startTime = chrono::steady_clock::now();
char* keywordPtr = (char*)malloc(sizeof(char)*MAXKEYWORDSTRINGSSIZE*SEARCHTERMSIZE);
short* keywordLengths = new short[SEARCHTERMSIZE]; //lenghts, not the start positions
for (int i = 0; i < SEARCHTERMSIZE; i++)
{
temp = rand() % (MAXKEYWORDSTRINGSSIZE - 10) + 10;
for (int k = 0; k < temp; k++)
{
tempChar = rand() % 26;
keywordPtr[i*MAXKEYWORDSTRINGSSIZE + k] = 97 + tempChar; //97->a, 98->b, 122->z
}
keywordLengths[i] = temp;
}
endTime = chrono::steady_clock::now();
millisecs_t duration1(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "After Search Keyword Init: " << duration1.count() << "ms" << endl;
//////////Search Keyword Init/////////////////
char* d_dataPtr;
short* d_dataLengths;
char* d_keywordPtr;
short* d_keywordLengths;
bool* d_resultPtr;
/////////////////////////CudaMalloc/////////////////////////////////
cout << "Before Malloc" << endl;
startTime = chrono::steady_clock::now();
cudaMalloc(&d_dataPtr, sizeof(char) * SEARCHITEMSIZE * MAXDATASTRINGSIZE);
cudaMalloc(&d_dataLengths, sizeof(short) * SEARCHITEMSIZE);
cudaMalloc(&d_keywordPtr, sizeof(char) * SEARCHTERMSIZE*MAXKEYWORDSTRINGSSIZE);
cudaMalloc(&d_keywordLengths, sizeof(short) * SEARCHTERMSIZE);
cudaMalloc(&d_resultPtr, sizeof(bool)*SEARCHITEMSIZE * SEARCHTERMSIZE);
endTime = chrono::steady_clock::now();
millisecs_t duration2(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "After Malloc: " << duration2.count() << "ms" << endl;
/////////////////////////CudaMalloc/////////////////////////////////
cudaEvent_t start, stop;
float elapsedTime;
/////////////////////////CudaMemCpy///////////////////////////////////
cout << "Before Memcpy" << endl;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
cudaMemcpy(d_dataPtr, dataPtr, sizeof(char) * SEARCHITEMSIZE * MAXDATASTRINGSIZE, cudaMemcpyHostToDevice);
cudaMemcpy(d_dataLengths, dataLengths, sizeof(short) * SEARCHITEMSIZE, cudaMemcpyHostToDevice);
cudaMemcpy(d_keywordPtr, keywordPtr, sizeof(char) * SEARCHTERMSIZE*MAXKEYWORDSTRINGSSIZE, cudaMemcpyHostToDevice);
cudaMemcpy(d_keywordLengths, keywordLengths, sizeof(short) * SEARCHTERMSIZE, cudaMemcpyHostToDevice);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
cout << "After Memcpy: " << elapsedTime << "ms" << endl;
/////////////////////////CudaMemCpy///////////////////////////////////
////////////////////////Kernel//////////////////////////////////////////
cout << "Before Kernel" << endl;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
dim3 dimGrid(SEARCHITEMSIZE,SEARCHTERMSIZE);
searchKeywordKernel << < dimGrid, MAXDATASTRINGSIZE >> >(d_resultPtr, d_dataPtr, d_dataLengths, d_keywordPtr, d_keywordLengths);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
cout << "After Kernel: " << elapsedTime << "ms" << endl;
////////////////////////Kernel//////////////////////////////////////////
bool* result = new bool[SEARCHTERMSIZE*SEARCHITEMSIZE];
cudaMemcpy(result, d_resultPtr, sizeof(bool) * SEARCHITEMSIZE * SEARCHTERMSIZE, cudaMemcpyDeviceToHost);
/////////////////////////////////// CPU code //////////////////////////////////////////
bool* cpuResult = new bool[SEARCHTERMSIZE*SEARCHITEMSIZE];
cout << "CPU code starts" << endl;
startTime = chrono::steady_clock::now();
for (int i = 0; i < SEARCHTERMSIZE; i++)
{
for (int j = 0; j < SEARCHITEMSIZE; j++)
{
if (dataLengths[j] < keywordLengths[i])
{
cpuResult[i*SEARCHITEMSIZE + j] = false;
break;
}
else
{
for (int k = 0; k <= dataLengths[j] - keywordLengths[i]; k++)
{
cpuResult[i*SEARCHITEMSIZE + j] = true;
for (int l = 0; l < keywordLengths[i]; l++)
{
if (dataPtr[j*MAXDATASTRINGSIZE + k + l] != keywordPtr[i*MAXKEYWORDSTRINGSSIZE + l])
{
cpuResult[i*SEARCHITEMSIZE + j] = false;
break;
}
}
if (cpuResult[i*SEARCHTERMSIZE + j])
break;
}
}
}
}
endTime = chrono::steady_clock::now();
millisecs_t duration3(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "CPU code ends: " << duration3.count() << "ms" << endl;
/////////////////////////////////// CPU code //////////////////////////////////////////
////////////////////////////////////Result Comparison////////////////////////////////////////
bool kernelRes = true;
for (int i = 0; i < SEARCHITEMSIZE*SEARCHTERMSIZE; i++)
{
if (cpuResult[i] != result[i])
{
kernelRes = false;
break;
}
}
////////////////////////////////////Result Comparison////////////////////////////////////////
cout << boolalpha << "Kernel computation: " << kernelRes << endl;
cout << "Before Deleting arrays" << endl;
delete[] dataPtr;
delete[] keywordPtr;
delete[] dataLengths;
delete[] keywordLengths;
delete[] result;
delete[] cpuResult;
cout << "After Deleting arrays" << endl;
cout << "Before Freeing device memory" << endl;
cudaFree(d_dataPtr);
cudaFree(d_keywordPtr);
cudaFree(d_dataLengths);
cudaFree(d_keywordLengths);
cudaFree(d_resultPtr);
cout << "After Freeing device memory" << endl;
cudaDeviceReset();
system("pause");
return 0;
}
Code for approach 2:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>
#include <chrono>
#include <cstdlib>
#define SEARCHTERMSIZE 198
#define SEARCHITEMSIZE 65000
#define MAXDATASTRINGSIZE 250
#define MAXKEYWORDSTRINGSSIZE 50
using namespace std;
__global__ void searchKeywordKernel(bool* resultPtr, const char * __restrict__ dataPtr, const short* dataLengths, const char * keywordPtr, const short* keywordLengths)
{
int dataIndex = blockIdx.x;
int keywordIndex = threadIdx.x;
int dataLength = dataLengths[dataIndex];
int keywordLength = keywordLengths[keywordIndex];
__shared__ char sData[MAXDATASTRINGSIZE];
if (dataIndex < SEARCHITEMSIZE)
{
int my_tid = keywordIndex;
while (my_tid < dataLength)
{
sData[my_tid] = dataPtr[dataIndex*MAXDATASTRINGSIZE + my_tid];
my_tid += blockDim.x;
}
__syncthreads();
if (keywordIndex < SEARCHTERMSIZE)
{
if (dataLength < keywordLength)
{
resultPtr[dataIndex*SEARCHTERMSIZE + keywordIndex] = false;
}
else
{
bool isFound = true;
for (int i = 0; i <= dataLength - keywordLength; i++)
{
for (int j = 0; j < keywordLength; j++)
{
if (sData[i + j] != keywordPtr[j*SEARCHTERMSIZE + keywordIndex])
{
isFound = false;
break;
}
}
if (isFound)
break;
}
resultPtr[dataIndex*SEARCHTERMSIZE + keywordIndex] = isFound;
}
}
}
}
int main()
{
chrono::steady_clock::time_point startTime;
chrono::steady_clock::time_point endTime;
typedef chrono::duration<int, milli> millisecs_t;
//////////Search Data Init/////////////////
cout << "Before Search Data Init" << endl;
startTime = chrono::steady_clock::now();
char* dataPtr = (char*)malloc(sizeof(char)*MAXDATASTRINGSIZE*SEARCHITEMSIZE);
short* dataLengths = new short[SEARCHITEMSIZE];
short temp;
short tempChar;
for (int i = 0; i < SEARCHITEMSIZE; i++)
{
temp = rand() % (MAXDATASTRINGSIZE - 20) + 20;
for (int k = 0; k < temp; k++)
{
tempChar = rand() % 26;
dataPtr[i*MAXDATASTRINGSIZE + k] = 97 + tempChar; //97->a, 98->b, 122->z
}
dataLengths[i] = temp;
}
endTime = chrono::steady_clock::now();
millisecs_t duration(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "After Search Data Init: " << duration.count() << "ms" << endl;
//////////Search Data Init/////////////////
//////////Search Keyword Init/////////////////
cout << "Before Search Keyword Init" << endl;
startTime = chrono::steady_clock::now();
char* keywordPtr = (char*)malloc(sizeof(char)*MAXKEYWORDSTRINGSSIZE*SEARCHTERMSIZE);
short* keywordLengths = new short[SEARCHTERMSIZE]; //lenghts, not the start positions
for (int i = 0; i < SEARCHTERMSIZE; i++)
{
temp = rand() % (MAXKEYWORDSTRINGSSIZE - 10) + 10;
for (int k = 0; k < temp; k++)
{
tempChar = rand() % 26;
keywordPtr[i*MAXKEYWORDSTRINGSSIZE + k] = 97 + tempChar; //97->a, 98->b, 122->z
}
keywordLengths[i] = temp;
}
endTime = chrono::steady_clock::now();
millisecs_t duration1(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "After Search Keyword Init: " << duration1.count() << "ms" << endl;
//////////Search Keyword Init/////////////////
////////////////////Traverse Keyword Array////////////////////////////
char* keywordPtr_T = new char[SEARCHTERMSIZE*MAXKEYWORDSTRINGSSIZE];
for (int i = 0; i < SEARCHTERMSIZE; i++)
for (int j = 0; j < MAXKEYWORDSTRINGSSIZE; j++)
keywordPtr_T[j*SEARCHTERMSIZE + i] = keywordPtr[i*MAXKEYWORDSTRINGSSIZE + j];
////////////////////Traverse Keyword Array////////////////////////////
char* d_dataPtr;
short* d_dataLengths;
char* d_keywordPtr;
short* d_keywordLengths;
bool* d_resultPtr;
/////////////////////////CudaMalloc/////////////////////////////////
cout << "Before Malloc" << endl;
startTime = chrono::steady_clock::now();
cudaMalloc(&d_dataPtr, sizeof(char) * SEARCHITEMSIZE * MAXDATASTRINGSIZE);
cudaMalloc(&d_dataLengths, sizeof(short) * SEARCHITEMSIZE);
cudaMalloc(&d_keywordPtr, sizeof(char) * SEARCHTERMSIZE*MAXKEYWORDSTRINGSSIZE);
cudaMalloc(&d_keywordLengths, sizeof(short) * SEARCHTERMSIZE);
cudaMalloc(&d_resultPtr, sizeof(bool)*SEARCHITEMSIZE * SEARCHTERMSIZE);
endTime = chrono::steady_clock::now();
millisecs_t duration2(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "After Malloc: " << duration2.count() << "ms" << endl;
/////////////////////////CudaMalloc/////////////////////////////////
cudaEvent_t start, stop;
float elapsedTime;
/////////////////////////CudaMemCpy///////////////////////////////////
cout << "Before Memcpy" << endl;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
cudaMemcpy(d_dataPtr, dataPtr, sizeof(char) * SEARCHITEMSIZE * MAXDATASTRINGSIZE, cudaMemcpyHostToDevice);
cudaMemcpy(d_dataLengths, dataLengths, sizeof(short) * SEARCHITEMSIZE, cudaMemcpyHostToDevice);
cudaMemcpy(d_keywordPtr, keywordPtr_T, sizeof(char) * SEARCHTERMSIZE*MAXKEYWORDSTRINGSSIZE, cudaMemcpyHostToDevice);
cudaMemcpy(d_keywordLengths, keywordLengths, sizeof(short) * SEARCHTERMSIZE, cudaMemcpyHostToDevice);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
cout << "After Memcpy: " << elapsedTime << "ms" << endl;
/////////////////////////CudaMemCpy///////////////////////////////////
////////////////////////Kernel//////////////////////////////////////////
cout << "Before Kernel" << endl;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
searchKeywordKernel << < SEARCHITEMSIZE, SEARCHTERMSIZE >> >(d_resultPtr, d_dataPtr, d_dataLengths, d_keywordPtr, d_keywordLengths);
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&elapsedTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
cout << "After Kernel: " << elapsedTime << "ms" << endl;
////////////////////////Kernel//////////////////////////////////////////
bool* result_T = new bool[SEARCHTERMSIZE*SEARCHITEMSIZE];
bool* result = new bool[SEARCHTERMSIZE*SEARCHITEMSIZE];
cudaMemcpy(result_T, d_resultPtr, sizeof(bool) * SEARCHITEMSIZE * SEARCHTERMSIZE, cudaMemcpyDeviceToHost);
for (int i = 0; i < SEARCHTERMSIZE; i++)
for (int j = 0; j < SEARCHITEMSIZE; j++)
result[j*SEARCHTERMSIZE + i] = result_T[i*SEARCHITEMSIZE + j];
/////////////////////////////////// CPU code //////////////////////////////////////////
bool* cpuResult = new bool[SEARCHTERMSIZE*SEARCHITEMSIZE];
cout << "CPU code starts" << endl;
startTime = chrono::steady_clock::now();
for (int i = 0; i < SEARCHTERMSIZE; i++)
{
for (int j = 0; j < SEARCHITEMSIZE; j++)
{
if (dataLengths[j] < keywordLengths[i])
{
cpuResult[i*SEARCHITEMSIZE + j] = false;
break;
}
else
{
for (int k = 0; k <= dataLengths[j] - keywordLengths[i]; k++)
{
cpuResult[i*SEARCHITEMSIZE + j] = true;
for (int l = 0; l < keywordLengths[i]; l++)
{
if (dataPtr[j*MAXDATASTRINGSIZE + k + l] != keywordPtr[i*MAXKEYWORDSTRINGSSIZE + l])
{
cpuResult[i*SEARCHITEMSIZE + j] = false;
break;
}
}
if (cpuResult[i*SEARCHTERMSIZE + j])
break;
}
}
}
}
endTime = chrono::steady_clock::now();
millisecs_t duration3(chrono::duration_cast<millisecs_t>(endTime - startTime));
cout << "CPU code ends: " << duration3.count() << "ms" << endl;
/////////////////////////////////// CPU code //////////////////////////////////////////
////////////////////////////////////Result Comparison////////////////////////////////////////
bool kernelRes = true;
for (int i = 0; i < SEARCHITEMSIZE*SEARCHTERMSIZE; i++)
{
if (cpuResult[i] != result[i])
{
kernelRes = false;
break;
}
}
////////////////////////////////////Result Comparison////////////////////////////////////////
cout << boolalpha << "Kernel computation: " << kernelRes << endl;
cout << "Before Deleting arrays" << endl;
delete[] dataPtr;
delete[] keywordPtr;
delete[] keywordPtr_T;
delete[] dataLengths;
delete[] keywordLengths;
delete[] result;
delete[] result_T;
delete[] cpuResult;
cout << "After Deleting arrays" << endl;
cout << "Before Freeing device memory" << endl;
cudaFree(d_dataPtr);
cudaFree(d_keywordPtr);
cudaFree(d_dataLengths);
cudaFree(d_keywordLengths);
cudaFree(d_resultPtr);
cout << "After Freeing device memory" << endl;
cudaDeviceReset();
system("pause");
return 0;
}
Second approach gave better results than the first approach. Yet the performance of the second approach depends on the number of keywords. If the number of the keywords is multiple of 192, gpu has performance than cpu (time of malloc+memcpy+kernel < time of cpu).
What should I do to overcome such dependancy? Would it be viable to increase the number of threads and to pass multiple data strings rather than one in each block?
I suggest blockDim = (16, 16, 1) and gridDim = (# of data strings / 16, # of keyword strings / 16, 1). In your case, where tens of strings can ideally fit in shared memory, such block-grid division will lead to minimum global memory access while introducing no computation overhead.
Padding is not a good choice, unless each string is expected to have its length quite close to the maximum (80% of maximum for example). If you keep a array of offset of every string (CPU is good at generating it), coalescing global memory read is just trivial.