I was trying to solve a problem in code chef named mchef (http://www.codechef.com/problems/MCHEF) . I am continuously getting error for my solution http://www.codechef.com/viewsolution/7488621. I can't figure out what is the error .
You are making a mistake ! All the arrays allocated using new are freed from heap using
delete []Array;
Try this code!
#include <bits/stdc++.h>
using namespace std;
long long max(long long x,long long y)
{
return (x>y)?x:y;
}
long long knapSack(unsigned int W, long long wt[], long long val[], unsigned int n)
{
long long i, w;
long long K[n+1][W+1];
// Build table K[][] in bottom up manner
for (i = 0; i <= n; i++)
{
for (w = 0; w <= W; w++)
{
if (i==0 || w==0)
K[i][w] = 0;
else if (wt[i-1] <= w)
K[i][w] = max(val[i-1] + K[i-1][w-wt[i-1]], K[i-1][w]);
else
K[i][w] = K[i-1][w];
}
}
return K[n][W];
}
int main()
{
unsigned int t,n,k,m,l,r,M[5000][20],x,temp;
long long *arr,sum=0,*v,i,*s,*c,j;
unsigned int *idx;
vector<int> *L,*R;
set <long long> iset;
cin>>t;
while(t--)
{
cin>>n>>k>>m;
sum=0;
arr= new long long[n];
s= new long long[n];
v= new long long[n];
c=new long long[m];
for(i=0;i<n;i++)
{
cin>>arr[i];
v[i]=-1*arr[i];
sum+=arr[i];
s[i]=1000000;
}
L=new vector<int>[n];
R=new vector<int>[n];
for(i=0;i<m;i++)
{
cin>>l>>r>>temp;
L[l-1].push_back(i);
R[r-1].push_back(i);
c[i]=temp;
}
for(i=0;i<n;i++)
{
for(j=0;j<(int)L[i].size();j++)
{
iset.insert(c[L[i][j]]);
//cout<<1;
}
s[i]=*(iset.begin());
for(j=0;j<(int)R[i].size();j++)
{
iset.erase(c[R[i][j]]);
// cout<<(int)R[i].size()-1<<" ";
}
}
// for(i=0;i<n;i++)
// {
// cout<<s[i]<<endl;
// }
//reduces to knapsack problem
cout<<sum+knapSack(k,s,v,n)<<endl;
delete []arr;
delete []s;
delete []v;
delete []L;
delete []R;
delete []c;
}
}
Related
I am attempting to make a version of std::set using a linked list. I think I have implemented it mostly correctly but I am getting a compile error that I cannot decipher. I would appreciate anyone spotting the error in my code, and or explaining how I would go about tracking down an error like this. Meaning an error that goes far into stl functions.
#include <iterator>
#include <cstddef>
template <typename Type>
struct ListNode{
Type info;
ListNode<Type> * next;
ListNode(Type newInfo, ListNode<Type> * newNext) : info(newInfo), next(newNext){
}
ListNode(ListNode<Type>& L): info(L.info), next(L.next){
}
ListNode<Type>& operator=(ListNode<Type>& L){
info = L->info;
next = L->next;
return this;
}
};
template <typename Type>
class SetList{
ListNode<Type> * head;
ListNode<Type> * tail;
public:
typedef ListNode<Type> value_type;
SetList() : head(nullptr), tail(nullptr){
}
SetList(SetList & s){
}
~SetList(){
//ListNode<Type> * cur = head;
//ListNode<Type> * next = cur;
//while(cur){
// next = cur->next;
// delete cur;
// cur = next;
// }
}
struct iterator{
//traits
typedef std::forward_iterator_tag iterator_category;
typedef iterator self_type;
typedef Type value_type;
typedef Type& reference;
typedef Type* pointer;
typedef ptrdiff_t difference_type;
private:
//rename to ihead
ListNode<Type>* ibuf;
public:
iterator(ListNode<value_type>* node) : ibuf(node){}
self_type& operator++(){ibuf = ibuf->next; return *this;}
self_type operator++(int postfix){
self_type cpy = *this;
ibuf = ibuf->next;
return cpy;
}
reference operator*(){return ibuf->info;}
pointer operator->(){return &ibuf->info;}
self_type operator=(const iterator& it){insert(*it);}
bool operator==(const self_type& rhs) const {return ibuf->info == rhs.ibuf->info;}
bool operator !=(const self_type& rhs) const {return ibuf->info != rhs.ibuf->info;}
};
iterator begin(){ return iterator(head);}
iterator end() { return iterator(nullptr);}
// const_iterator begin() { return const_iterator(head);}
// const_iterator end() { return const_iterator(tail);}
Type operator[](int index){
iterator cur(head);
for(int i = 0; i < index; ++i,++cur){
}
return *cur;
}
SetList<Type>& operator=(const SetList<Type>& s){
head = s.head;
tail = s.tail;
return this;
}
iterator find(Type toFind){
ListNode<Type> * cur = head;
while(cur){
if(cur->info == toFind)
return iterator(cur);
}
return this->end();
}
void insert(Type toInsert){
ListNode<Type>* cur = nullptr;
if(head){
cur = new ListNode<Type>(toInsert, head);
head = cur;
}else{
cur = new ListNode<Type>(toInsert, nullptr);
head = cur;
}
}
};
I am calling elsewhere copy on my set, my copy call works with std::set but not my set.
The error I am getting is as follows.
Hope this isn't too much to ask. You don't even have to read my code, even just input on how to track down large errors like this would be much appreciated.
SetList<Type> should have Type as its value_type, not ListNode<Type>.
I'm playing with processing from some days, but I encountered an error that i didn't understand. I declared the class and the constructor with the proper arguments, maybe you can help me. This is the code:
Cell[][] grid;
int rnc = 5;
int side = 5;
void setup(){
size(rnc*side,rnc*side);
grid = new Cell[rnc][rnc];
for(int i = 0; i < rnc; i++){
for(int j = 0; j < rnc; j++){
grid[i][j] = new Cell(i,j);
rect(grid[i][j].row*side,grid[i][j].column*side,side,side);
}
}
}
void draw(){}
class Cell
{
boolean isChecked;
int row,column;
int side;
void Cell(int trow, int tcolumn){
row=trow;
column=tcolumn;
}
void toggleCheck(){
if(isChecked == true){
isChecked = false;
}else{
isChecked = true;
}
}
}
The error I got after i tried to ran the program is : The constructor sketch.Cell(int,int) is undefined.
Thank you in advance.
I'm assuming this is Java, although you haven't specified a language. If so, this is the problem:
void Cell(int trow, int tcolumn){
row=trow;
column=tcolumn;
}
That's not a constructor. That's a method called Cell, with a void return type. You meant:
Cell(int trow, int tcolumn){
row=trow;
column=tcolumn;
}
(Or possibly public Cell(...).)
At that point, you should be okay. Note that this would have been a compile-time error - not an execution-time error. Don't try to run your code until it compiles.
Also, it's not clear why you've made your constructor parameters trow and tcolumn - what's the t meant to be for? I'd also make your variables private, and final if possible, and simplify your toggleCheck method. For example:
public final class Cell {
private final int row, column;
private boolean checked;
// It's not clear what side was for
public Cell(int row, int column) {
this.row = row;
this.column = column;
}
public void toggleChecked() {
checked = !checked;
}
public boolean isChecked() {
return checked;
}
}
The following is the rootbeer example code for Nvidia CUDA that I ran on a laptop with Ubuntu 12.04 (Precise) with bumblebee and optirun. The laptop features Nvidia Optimus, hence the optirun. The GPU happens to be a Nvidia GeForce GT 540M which the Nvidia website says has 96 cores. I get almost no throughput gain. What is the problem?
package com.random.test;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;
import edu.syr.pcpratts.rootbeer.runtime.Kernel;
import edu.syr.pcpratts.rootbeer.runtime.Rootbeer;
public class ArraySumApp {
final static int numberOfJobs = 1024; // 1024 in the original example
final static int sizeOfArray = 512; // 512 in the original example
final static int theAnswer = 130816;
public int[] sumArrays(List<int[]> arrays) {
List<Kernel> jobs = new ArrayList<Kernel>();
int[] ret = new int[arrays.size()];
for (int i = 0; i < arrays.size(); ++i) {
jobs.add(new ArraySum(arrays.get(i), ret, i));
}
Rootbeer rootbeer = new Rootbeer();
rootbeer.runAll(jobs);
return ret;
}
private static long measureOneJob() {
int[] source = new int[ArraySumApp.sizeOfArray];
int[] destination = new int[1];
for (int i = 0; i < ArraySumApp.sizeOfArray; i++)
source[i] = i;
Kernel job = new ArraySum(source, destination, 0);
ElapsedTimer et = new ElapsedTimer();
job.gpuMethod();
long timeInMs = et.stopInMilliseconds();
System.out.println("measureOneJob " + et.stringInMilliseconds());
assert destination[0] == ArraySumApp.theAnswer : "cosmic rays";
return timeInMs;
}
public static void main(String[] args) {
Helper.assertAssertionEnabled();
// measure the time to do one job
ArraySumApp.measureOneJob();
long oneJob = ArraySumApp.measureOneJob();
ArraySumApp app = new ArraySumApp();
List<int[]> arrays = new ArrayList<int[]>();
// you want 1000s of threads to run on the GPU all at once for speedups
for (int i = 0; i < ArraySumApp.numberOfJobs; ++i) {
int[] array = new int[ArraySumApp.sizeOfArray];
for (int j = 0; j < array.length; ++j) {
array[j] = j;
}
arrays.add(array);
}
ElapsedTimer et = new ElapsedTimer();
int[] sums = app.sumArrays(arrays);
long allJobs = et.stopInMilliseconds();
System.out.println("measureAllJobs " + et.stringInMilliseconds());
double gainFactor = ((double) ArraySumApp.numberOfJobs) * oneJob
/ allJobs;
System.out.println(String.format(
"throughput gain factor %.1f\nthroughput gain %.1f\n",
gainFactor, gainFactor - 1.0d));
// check the number of answers is correct
assert sums.length == ArraySumApp.numberOfJobs : "cosmic rays";
// check they all have the answer
for (int i = 0; i < ArraySumApp.numberOfJobs; i++)
assert sums[i] == ArraySumApp.theAnswer : "cosmic rays";
}
}
class ArraySum implements Kernel {
final static int repetitionFactor = 100000;
private int[] source;
private int[] ret;
private int index;
public ArraySum(int[] src, int[] dst, int i) {
source = src;
ret = dst;
index = i;
}
public void gpuMethod() {
for (int repetition = 0; repetition < ArraySum.repetitionFactor; repetition++) {
int sum = 0;
for (int i = 0; i < source.length; ++i) {
sum += source[i];
}
ret[index] = sum;
}
}
}
class Helper {
private Helper() {
}
static void assertAssertionEnabled() {
try {
assert false;
} catch (AssertionError e) {
return;
}
Helper.noteCosmicRays();
}
static void noteCosmicRays() // programmer design or logic error
{
throw new RuntimeException("cosmic rays");
}
}
class ElapsedTimer {
private org.joda.time.DateTime t0;
private long savedStopInMilliseconds;
public ElapsedTimer() {
this.t0 = new org.joda.time.DateTime();
}
public long stopInMilliseconds() {
return stop();
}
public String stringInMilliseconds() // relies on a saved stop
{
Formatter f = new Formatter();
f.format("%d ms", this.savedStopInMilliseconds);
String s = f.toString();
f.close();
return s;
}
public String stopStringInMilliseconds() {
stop();
return stringInMilliseconds();
}
public String stringInSecondsAndMilliseconds() // relies on a saved stop
{
Formatter f = new Formatter();
f.format("%5.3f s", this.savedStopInMilliseconds / 1000.0d);
String s = f.toString();
f.close();
return s;
}
public String stopStringInSecondsAndMilliseconds() {
stop();
return stringInSecondsAndMilliseconds();
}
public long stopInSeconds() {
return (stop() + 500L) / 1000L; // rounding
}
public String stringInSeconds() // relies on a saved stop
{
Formatter f = new Formatter();
long elapsed = (this.savedStopInMilliseconds + 500L) / 1000L; // rounding
f.format("%d s", elapsed);
String s = f.toString();
f.close();
return s;
}
public String stopStringInSeconds() {
stop();
return stringInSeconds();
}
/**
* This is private. Use the stopInMilliseconds method if this is what you
* need.
*/
private long stop() {
org.joda.time.DateTime t1 = new org.joda.time.DateTime();
savedStopInMilliseconds = t1.getMillis() - this.t0.getMillis();
return savedStopInMilliseconds;
}
}
This is the output:
measureOneJob 110 ms
measureOneJob 26 ms
CudaRuntime2 ctor: elapsedTimeMillis: 609
measureAllJobs 24341 ms
throughput gain factor 1.1
throughput gain 0.1
The rootbeer developer said the example code that takes the sum of array elements is not the best example and an alternative example would show throughput gains.
You can see: https://github.com/pcpratts/rootbeer1/tree/develop/gtc2013/Matrix
This is an example for the 2013 NVIDIA GTC conference. I obtained a 20x speedup over a 4-core Java Matrix Multiply that uses transpose.
The example is a tiled Matrix Multiply using shared memory on the GPU. From the NVIDIA literature, using shared memory is one of the most important apsects of getting good speedups. To use shared memory you have each thread in a block load values into a shared array. Then you have to reuse these shared values several times. This saves the time to fetch from global memory.
A fetch from global memory takes about 200-300 clock cycles and a fetch from shared memory takes about 2-3 clock cycles on the Tesla 2.0 archicture.
I have a trouble working with JCUDA. I have a task to make 1D FFT using CUFFT library, but the result should be multiply on 2. So I decided to make 1D FFT with type CUFFT_R2C. Class responsible for this going next:
public class FFTTransformer {
private Pointer inputDataPointer;
private Pointer outputDataPointer;
private int fftType;
private float[] inputData;
private float[] outputData;
private int batchSize = 1;
public FFTTransformer (int type, float[] inputData) {
this.fftType = type;
this.inputData = inputData;
inputDataPointer = new CUdeviceptr();
JCuda.cudaMalloc(inputDataPointer, inputData.length * Sizeof.FLOAT);
JCuda.cudaMemcpy(inputDataPointer, Pointer.to(inputData),
inputData.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyHostToDevice);
outputDataPointer = new CUdeviceptr();
JCuda.cudaMalloc(outputDataPointer, (inputData.length + 2) * Sizeof.FLOAT);
}
public Pointer getInputDataPointer() {
return inputDataPointer;
}
public Pointer getOutputDataPointer() {
return outputDataPointer;
}
public int getFftType() {
return fftType;
}
public void setFftType(int fftType) {
this.fftType = fftType;
}
public float[] getInputData() {
return inputData;
}
public int getBatchSize() {
return batchSize;
}
public void setBatchSize(int batchSize) {
this.batchSize = batchSize;
}
public float[] getOutputData() {
return outputData;
}
private void R2CTransform() {
cufftHandle plan = new cufftHandle();
JCufft.cufftPlan1d(plan, inputData.length, cufftType.CUFFT_R2C, batchSize);
JCufft.cufftExecR2C(plan, inputDataPointer, outputDataPointer);
JCufft.cufftDestroy(plan);
}
private void C2CTransform(){
cufftHandle plan = new cufftHandle();
JCufft.cufftPlan1d(plan, inputData.length, cufftType.CUFFT_C2C, batchSize);
JCufft.cufftExecC2C(plan, inputDataPointer, outputDataPointer, fftType);
JCufft.cufftDestroy(plan);
}
public void transform(){
if (fftType == JCufft.CUFFT_FORWARD) {
R2CTransform();
} else {
C2CTransform();
}
}
public float[] getFFTResult() {
outputData = new float[inputData.length + 2];
JCuda.cudaMemcpy(Pointer.to(outputData), outputDataPointer,
outputData.length * Sizeof.FLOAT, cudaMemcpyKind.cudaMemcpyDeviceToHost);
return outputData;
}
public void releaseGPUResources(){
JCuda.cudaFree(inputDataPointer);
JCuda.cudaFree(outputDataPointer);
}
public static void main(String... args) {
float[] inputData = new float[65536];
for(int i = 0; i < inputData.length; i++) {
inputData[i] = (float) Math.sin(i);
}
FFTTransformer transformer = new FFTTransformer(JCufft.CUFFT_FORWARD, inputData);
transformer.transform();
float[] result = transformer.getFFTResult();
HilbertSpectrumTicksKernelInvoker.multiplyOn2(transformer.getOutputDataPointer(), inputData.length+2);
transformer.releaseGPUResources();
}
}
Method which responsible for multiplying uses cuda kernel function.
Java method code:
public static void multiplyOn2(Pointer inputDataPointer, int dataSize){
// Enable exceptions and omit all subsequent error checks
JCudaDriver.setExceptionsEnabled(true);
// Create the PTX file by calling the NVCC
String ptxFileName = null;
try {
ptxFileName = FileService.preparePtxFile("resources\\HilbertSpectrumTicksKernel.cu");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// Initialize the driver and create a context for the first device.
cuInit(0);
CUdevice device = new CUdevice();
cuDeviceGet(device, 0);
CUcontext context = new CUcontext();
cuCtxCreate(context, 0, device);
// Load the ptx file.
CUmodule module = new CUmodule();
cuModuleLoad(module, ptxFileName);
// Obtain a function pointer to the "add" function.
CUfunction function = new CUfunction();
cuModuleGetFunction(function, module, "calcSpectrumSamples");
// Set up the kernel parameters: A pointer to an array
// of pointers which point to the actual values.
int N = (dataSize + 1) / 2 + 1;
int pair = (dataSize + 1) % 2 > 0 ? 1 : -1;
Pointer kernelParameters = Pointer.to(Pointer.to(inputDataPointer),
Pointer.to(new int[] { dataSize }),
Pointer.to(new int[] { N }), Pointer.to(new int[] { pair }));
// Call the kernel function.
int blockSizeX = 128;
int gridSizeX = (int) Math.ceil((double) dataSize / blockSizeX);
cuLaunchKernel(function, gridSizeX, 1, 1, // Grid dimension
blockSizeX, 1, 1, // Block dimension
0, null, // Shared memory size and stream
kernelParameters, null // Kernel- and extra parameters
);
cuCtxSynchronize();
// Allocate host output memory and copy the device output
// to the host.
float freq[] = new float[dataSize];
cuMemcpyDtoH(Pointer.to(freq), (CUdeviceptr)inputDataPointer, dataSize
* Sizeof.FLOAT);
And the kernel function is next:
extern "C"
__global__ void calcSpectrumSamples(float* complexData, int dataSize, int N, int pair) {
int i = threadIdx.x + blockIdx.x * blockDim.x;
if(i >= dataSize) return;
complexData[i] = complexData[i] * 2;
}
But when I'm trying to pass the pointer which points to the result of FFT (in device memory) to the multiplyOn2 method, it throws the exception on cuCtxSynchronize() call. Exception:
Exception in thread "main" jcuda.CudaException: CUDA_ERROR_UNKNOWN
at jcuda.driver.JCudaDriver.checkResult(JCudaDriver.java:263)
at jcuda.driver.JCudaDriver.cuCtxSynchronize(JCudaDriver.java:1709)
at com.ifntung.cufft.HilbertSpectrumTicksKernelInvoker.multiplyOn2(HilbertSpectrumTicksKernelInvoker.java:73)
at com.ifntung.cufft.FFTTransformer.main(FFTTransformer.java:123)
I was trying to do the same using Visual Studion C++ and there no problems with this. Could you please help me.
P.S.
I can solve this prolem, but I need to copy data from device memory to host memory and then copy back with creating new pointers every time before calling new cuda functions, which slows my program executing.
Where exactly does the error occurs at which line?
The Cuda error can also be a previous error.
Why do you use Pointer.to(inputDataPointer), you already have that device pointer. Now you pass a pointer to the device pointer to the device?
Pointer kernelParameters = Pointer.to(Pointer.to(inputDataPointer),
I also recommend to use "this" qualifier or any other marking to detect instance variables. I hate and refuse to look through code, especially as nested and long as your example if I cannot see which scope the variable in methods have trying to debug it by just reading it.
I don't wanna ask myself always where the hell comes this variable from.
If a complex code in a question at SO is not formatted properly I don't read it.
I know how to do it using simple recursion, but in order to complete this particular assignment I need to be able to accumulate on the stack and throw an exception that holds the answer in it.
So far I have:
public static int fibo(int index) {
int sum = 0;
try {
fibo_aux(index, 1, 1);
}
catch (IntegerException me) {
sum = me.getIntValue();
}
return sum;
}
fibo_aux is supposed to throw an IntegerException (which holds the value of the answer that is retireved via getIntValue) and accumulates the answer on the stack, but so far I can't figure it out. Can anyone help?
I don't know what your implementations for fibo_aux and IntegerException look like, but the following two implementations work with your existing code (I don't think there's anything wrong with the code you posted, so I assume something is awry in either fibo_aux or IntegerException). Maybe you'll find this helpful.
public static void fibo_aux(int index, int a, int b) throws IntegerException
{
if (--index > 0)
fibo_aux(index, b, a + b);
else
throw new IntegerException(a + b);
}
An implementation for IntegerException:
public class IntegerException extends Exception
{
private static final long serialVersionUID = -6795044518321782305L;
private Integer intValue;
public IntegerException(int i)
{
this.intValue = i;
}
public Integer getIntValue()
{
return intValue;
}
}
Here you go :
public class ExcFib {
/**
* #param args
*/
public static void main(String[] args) {
new ExcFib().fibo ( 10 );
}
class FiboException extends Throwable
{
public int n;
public FiboException(int n)
{
this.n = n;
}
private static final long serialVersionUID = 1L;
}
public void fibo(int idx) {
try {
fibo_aux(idx-1,1,1);
} catch (FiboException e) {
System.out.println ( "F(" + idx + ") = " + e.n );
}
}
private void fibo_aux(int i, int j, int k) throws FiboException {
if ( i < 1 )
{
throw new FiboException(k);
}
fibo_aux(i - 1, k, j + k );
}
}