LLVM MCJIT / SEH Exception handling - exception

Lately, I've been attempting to get SEH exception handling to work in LLVM (3.8.1) together with MCJIT. So far without any luck.
From what I understand from the website ( http://llvm.org/docs/ExceptionHandling.html ), this is pretty much how this should be implemented. Compiling a minimal piece of code with clang gives pretty much the same LLVM IR code. However, when I try it, the program crashes with a nasty Stack cookie instrumentation code detected a stack-based buffer overrun..
To illustrate what I've been attempting to do, I've created a minimum test case (I apologise for the amount of code...):
#include <string>
#include <iostream>
#include <exception>
#pragma warning(push)
#pragma warning(disable: 4267)
#pragma warning(disable: 4244)
#pragma warning(disable: 4800)
#pragma warning(disable: 4996)
#pragma warning(disable: 4141)
#pragma warning(disable: 4146)
#pragma warning(disable: 4624)
#pragma warning(disable: 4291)
#define DONT_GET_PLUGIN_LOADER_OPTION
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
#include "llvm/PassRegistry.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/ExecutionEngine/Interpreter.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#pragma warning(pop)
static void test()
{
// You can use this to see that function calls work fine.
// std::cout << "Foo!" << std::endl;
throw std::exception("Something we should try to catch.");
}
int main()
{
// Initialize LLVM
std::cout << "Initializing LLVM." << std::endl;
llvm::InitializeNativeTarget();
llvm::InitializeAllTargetMCs();
llvm::InitializeNativeTargetAsmPrinter();
llvm::InitializeNativeTargetAsmParser();
llvm::PassRegistry *Registry = llvm::PassRegistry::getPassRegistry();
llvm::initializeCore(*Registry);
llvm::initializeScalarOpts(*Registry);
llvm::initializeObjCARCOpts(*Registry);
llvm::initializeVectorization(*Registry);
llvm::initializeIPO(*Registry);
llvm::initializeAnalysis(*Registry);
llvm::initializeTransformUtils(*Registry);
llvm::initializeInstCombine(*Registry);
llvm::initializeInstrumentation(*Registry);
llvm::initializeTarget(*Registry);
// For codegen passes, only passes that do IR to IR transformation are
// supported.
llvm::initializeCodeGenPreparePass(*Registry);
llvm::initializeAtomicExpandPass(*Registry);
llvm::initializeRewriteSymbolsPass(*Registry);
llvm::initializeWinEHPreparePass(*Registry);
llvm::initializeDwarfEHPreparePass(*Registry);
llvm::initializeSjLjEHPreparePass(*Registry);
llvm::StringRef MCPU = llvm::sys::getHostCPUName();
std::string MTrip = llvm::sys::getProcessTriple();
static llvm::StringMap<bool, llvm::MallocAllocator> features;
llvm::sys::getHostCPUFeatures(features);
// Initialize module & context:
auto context = std::unique_ptr<llvm::LLVMContext>(new llvm::LLVMContext());
auto module = std::unique_ptr<llvm::Module>(new llvm::Module("native", *context));
// Create 'main' method:
llvm::Type* returnType = llvm::Type::getInt32Ty(*context);
std::vector<llvm::Type*> arguments;
// MCJIT only supports main(int, char**)
arguments.push_back(llvm::Type::getInt32Ty(*context));
arguments.push_back(llvm::Type::getInt8PtrTy(*context)->getPointerTo());
llvm::Function *fcn = llvm::cast<llvm::Function>(module->getOrInsertFunction("main", llvm::FunctionType::get(returnType, arguments, false)));
// Generate exception handler info for main:
llvm::AttrBuilder argBuilder;
argBuilder.addAttribute(llvm::Attribute::UWTable);
argBuilder.addAttribute("stack-protector-buffer-size", "8");
fcn->addAttributes(llvm::AttributeSet::FunctionIndex, llvm::AttributeSet::get(*context, llvm::AttributeSet::FunctionIndex, argBuilder));
// Exception handling requires a personality function. We want to use the SEH personality handler
llvm::Function *personalityHandler = llvm::cast<llvm::Function>(module->getOrInsertFunction("__CxxFrameHandler3", llvm::FunctionType::get(llvm::Type::getInt32Ty(*context), true)));
auto personalityPtr = llvm::ConstantExpr::getBitCast(personalityHandler, llvm::Type::getInt8PtrTy(*context));
fcn->setPersonalityFn(personalityPtr);
// Create some code. Basically we want to invoke our 'test' method
auto block = llvm::BasicBlock::Create(*context, "code", fcn);
llvm::IRBuilder<> builder(block);
// all other cases might throw an exception
auto continueBlock = llvm::BasicBlock::Create(*context, "invoke.cont", fcn);
auto catchDispatch = llvm::BasicBlock::Create(*context, "catch.dispatch", fcn);
// Register 'test' as an external function:
const void* testFunctionPtr = &test;
auto testFunctionType = llvm::FunctionType::get(builder.getVoidTy(), false);
auto testFunction = llvm::Function::Create(testFunctionType, llvm::Function::ExternalLinkage, "test", module.get());
// %call = invoke i32 #"test"() to label %invoke.cont unwind label %catch.dispatch
auto call = builder.CreateInvoke(testFunction, continueBlock, catchDispatch);
// return [ 0 from ok, 1 from catch handler ]
builder.SetInsertPoint(continueBlock);
auto phi = builder.CreatePHI(builder.getInt32Ty(), 2, "result");
phi->addIncoming(builder.getInt32(0), block);
builder.CreateRet(phi);
// Create exception handler:
// Create default catch block. Basically handles the exception and returns '1'.
builder.SetInsertPoint(catchDispatch);
auto parentPad = llvm::ConstantTokenNone::get(*context);
// %0 = catchswitch within none [label %catch] unwind to caller
auto catchSwitch = builder.CreateCatchSwitch(parentPad, nullptr, 1);
auto catchBlock = llvm::BasicBlock::Create(*context, "catch", fcn);
builder.SetInsertPoint(catchBlock);
catchSwitch->addHandler(catchBlock);
// MSVC code:
// %1 = catchpad within %0 [i8* null, i32 64, i8* null] == "catch all"
llvm::Value *nullPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(*context));
auto catchPad = builder.CreateCatchPad(catchSwitch, { nullPtr, builder.getInt32(0x40), nullPtr });
// catchret from %1 to label %return
auto const1 = builder.getInt32(1);
builder.CreateCatchRet(catchPad, continueBlock);
// set 1 for the catch handler
phi->addIncoming(builder.getInt32(1), catchBlock);
// *DONE* building the code.
// Dump the LLVM IR:
module->dump();
// Let's JIT the code:
std::string error;
auto trip = llvm::Triple::normalize(MTrip);
llvm::Triple triple(trip);
const llvm::Target *target = llvm::TargetRegistry::lookupTarget("x86-64", triple, error);
if (!target)
{
throw error.c_str();
}
llvm::TargetOptions Options;
std::unique_ptr<llvm::TargetMachine> targetMachine(
target->createTargetMachine(trip, MCPU, "", Options, llvm::Reloc::Default, llvm::CodeModel::Default, llvm::CodeGenOpt::Aggressive));
if (!targetMachine.get())
{
throw "Could not allocate target machine!";
}
// Create the target machine; set the module data layout to the correct values.
auto DL = targetMachine->createDataLayout();
module->setDataLayout(DL);
module->setTargetTriple(trip);
// Pass manager builder:
llvm::PassManagerBuilder pmbuilder;
pmbuilder.OptLevel = 3;
pmbuilder.BBVectorize = false;
pmbuilder.SLPVectorize = true;
pmbuilder.LoopVectorize = true;
pmbuilder.Inliner = llvm::createFunctionInliningPass(3, 2);
llvm::TargetLibraryInfoImpl *TLI = new llvm::TargetLibraryInfoImpl(triple);
pmbuilder.LibraryInfo = TLI;
// Generate pass managers:
// 1. Function pass manager:
llvm::legacy::FunctionPassManager FPM(module.get());
pmbuilder.populateFunctionPassManager(FPM);
// 2. Module pass manager:
llvm::legacy::PassManager PM;
PM.add(llvm::createTargetTransformInfoWrapperPass(targetMachine->getTargetIRAnalysis()));
pmbuilder.populateModulePassManager(PM);
// 3. Execute passes:
// - Per-function passes:
FPM.doInitialization();
for (llvm::Module::iterator I = module->begin(), E = module->end(); I != E; ++I)
{
if (!I->isDeclaration())
{
FPM.run(*I);
}
}
FPM.doFinalization();
// - Per-module passes:
PM.run(*module);
// All done, *RUN*.
llvm::EngineBuilder engineBuilder(std::move(module));
engineBuilder.setEngineKind(llvm::EngineKind::JIT);
engineBuilder.setMCPU(MCPU);
engineBuilder.setMArch("x86-64");
engineBuilder.setUseOrcMCJITReplacement(false);
engineBuilder.setOptLevel(llvm::CodeGenOpt::None);
llvm::ExecutionEngine* engine = engineBuilder.create();
// Register global 'test' function:
engine->addGlobalMapping(testFunction, const_cast<void*>(testFunctionPtr)); // Yuck...
// Finalize
engine->finalizeObject();
// Invoke:
std::vector<llvm::GenericValue> args(2);
args[0].IntVal = llvm::APInt(32, static_cast<uint64_t>(0), true);
args[1].PointerVal = nullptr;
llvm::GenericValue gv = engine->runFunction(fcn, args);
auto result = int(gv.IntVal.getSExtValue());
std::cout << "Result after execution: " << result << std::endl;
std::string s;
std::getline(std::cin, s);
}
This produces the following IR code:
; ModuleID = 'native'
; Function Attrs: uwtable
define i32 #main(i32, i8**) #0 personality i8* bitcast (i32 (...)* #__CxxFrameHandler3 to i8*) {
code:
invoke void #test()
to label %invoke.cont unwind label %catch.dispatch
invoke.cont: ; preds = %catch, %code
%result = phi i32 [ 0, %code ], [ 1, %catch ]
ret i32 %result
catch.dispatch: ; preds = %code
%2 = catchswitch within none [label %catch] unwind to caller
catch: ; preds = %catch.dispatch
%3 = catchpad within %2 [i8* null, i32 64, i8* null]
catchret from %3 to label %invoke.cont
}
declare i32 #__CxxFrameHandler3(...)
declare void #test()
attributes #0 = { uwtable "stack-protector-buffer-size"="8" }
Q: What am I missing, why doesn't this work and how to fix it?

After posting this issue on the llvm developer list, I got a friendly reply explaining how this issue is related to a known bug: https://llvm.org/bugs/show_bug.cgi?id=24233 .
Basically what happens is that LLVM doesn't implement the code that Windows (more specifically: SEH and debugging) requires for handling stack frames. I'm by no means an expert on this subject, but until this is implemented, SEH won't know what to do, which means C++ exception basically won't work.
An obvious workaround is of course to pass the object as a pointer during the function call and perform an if-then-else. That way, exceptions are avoided. However, this is pretty nasty and will probably give a serious performance penalty. Also, this makes the flow in the compiler as well as the generated program much more complicated. In other words: let's just say I'd rather not.
I'll leave the question open; if someone happens to find a hack or figure out a workaround, I'll gladly accept it.

Related

C++ How do I turn a string into a json object? [duplicate]

I'm currently working in C++, getting an HTTP response from a request that I write into a .txt file using ostream. This happens asynchronously and I don't want to change this.
Once the data is done being written, I want to read from the file
{"data":{"request":[{"type":"City","query":"London, United Kingdom"}],"weather":[{"date":"2013-04-21","astronomy".....
~somehow~ prettify the string using either an outside library like nlohmann/json or other(?) and then
a)print it to the console and
b) save it in a different file (pretty.json)
I am having trouble understanding which method to use from:
https://github.com/nlohmann/json
Any ideas how to approach this?
I was thinking getting the file line by line until I hit EOF into a sort of "buffer" and then running _json on that and saving the solution which can be displayed on the console...
My code so far
#include <cpprest/http_client.h>
#include <cpprest/filestream.h>
#include <iostream>
#include <sstream>
#include "json.hpp"
using namespace utility; // string conversion
using namespace web; // URI
using namespace web::http; // HTTP commands
using namespace web::http::client; // HTTP Client features
using namespace concurrency::streams; // Asynch streams, like Node
using json = nlohmann::json;
int main()
{
auto fileStream = std::make_shared<ostream>();
// Open stream to output file.
pplx::task<void> requestTask = fstream::open_ostream(U("results.txt"))
.then([=](ostream outFile)
{
*fileStream = outFile;
http_client client //gets the info
return client.request(methods::GET, stringBuilder.to_string());
})
.then([=](http_response response) // set up response handler
{
printf("Received response status code:%u\n", response.status_code());
return response.body().read_to_end(fileStream->streambuf());
})
.then([=](size_t) // close file stream
{
return fileStream->close();
})
.then([=]()
{
nlohmann::json j;
std::ifstream i;
i.open("results.txt"); // ?? <<< === this is where my question is
});
// Wait for all the outstanding I/O to complete, handle exceptions
try
{
requestTask.wait();
}
catch (const std::exception &e)
{
printf("Error exception:%s\n", e.what());
}
return 0;
}
SOLUTION:
.then([=]()
{
// read a JSON file
std::ifstream readFromFile("results.txt");
if (readFromFile.is_open()) {
nlohmann::json j;
readFromFile >> j;
// write prettified JSON to another file
std::ofstream writeToFile("pretty.json");
writeToFile << std::setw(4) << j << std::endl;
readFromFile.close();
writeToFile.close();
}
else {
std::cout << "unable to open file";
}
});
You have two choices to prettify with nlohmann.
Uses dump which produces a string
int indent = 4;
nlohmann::json data;
data.dump(indent);
Or use the stream output overload with field width set
std::ofstream o("pretty.json");
o << std::setw(4) << data << std::endl;

function IBuilder::buildEngineWithConfig() returns null

I am using tensorRT to build a small model as below:
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include <fstream>
#include <map>
#include <chrono>
#include <iostream>
#include "include/Utils.h"
#include <memory>
#include <vector>
#include <cassert>
#include "src/InferDeleter.cpp"
using namespace std;
using namespace nvinfer1;
class MyLogger : public ILogger {
void log(Severity severity, const char *msg) override {
if (severity != Severity::kINFO) {
cout << msg << endl;
}
}
} gLogger;
int main() {
//load weights
map<string, Weights> mWeightMap = Utils::getInstance().loadWeights("Weights/mnistapi.wts");
//a few configuration parameters
const char *INPUT_BLOB_NAME = "input";
const char *OUTPUT_BLOB_NAME = "output";
DataType dataType = nvinfer1::DataType::kFLOAT;
int INPUT_H = 28, INPUT_W = 28;
int batchSize = 1;
//define the network
IBuilder *builder = createInferBuilder(gLogger);
INetworkDefinition *network = builder->createNetworkV2(0U);
// Create input tensor of shape { 1, 1, 28, 28 }
ITensor *data = network->addInput(
INPUT_BLOB_NAME, DataType::kFLOAT, Dims3{1, INPUT_H, INPUT_W});
// Create scale layer with default power/shift and specified scale parameter.
const float scaleParam = 0.0125f;
const Weights power{DataType::kFLOAT, nullptr, 0};
const Weights shift{DataType::kFLOAT, nullptr, 0};
const Weights scale{DataType::kFLOAT, &scaleParam, 1};
IScaleLayer *scale_1 = network->addScale(*data, ScaleMode::kUNIFORM, shift, scale, power);
// Add convolution layer with 20 outputs and a 5x5 filter.
IConvolutionLayer *conv1 = network->addConvolutionNd(
*scale_1->getOutput(0), 20, Dims{2, {5, 5}, {}}, mWeightMap["conv1filter"], mWeightMap["conv1bias"]);
conv1->setStride(DimsHW{1, 1});
// Add max pooling layer with stride of 2x2 and kernel size of 2x2.
IPoolingLayer *pool1 = network->addPoolingNd(*conv1->getOutput(0), PoolingType::kMAX, Dims{2, {2, 2}, {}});
pool1->setStride(DimsHW{2, 2});
// Add second convolution layer with 50 outputs and a 5x5 filter.
IConvolutionLayer *conv2 = network->addConvolutionNd(
*pool1->getOutput(0), 50, Dims{2, {5, 5}, {}}, mWeightMap["conv2filter"], mWeightMap["conv2bias"]);
conv2->setStride(DimsHW{1, 1});
// Add second max pooling layer with stride of 2x2 and kernel size of 2x3>
IPoolingLayer *pool2 = network->addPoolingNd(*conv2->getOutput(0), PoolingType::kMAX, Dims{2, {2, 2}, {}});
pool2->setStride(DimsHW{2, 2});
// Add fully connected layer with 500 outputs.
IFullyConnectedLayer *ip1
= network->addFullyConnected(*pool2->getOutput(0), 500, mWeightMap["ip1filter"], mWeightMap["ip1bias"]);
// Add activation layer using the ReLU algorithm.
IActivationLayer *relu1 = network->addActivation(*ip1->getOutput(0), ActivationType::kRELU);
// Add second fully connected layer with 20 outputs.
IFullyConnectedLayer *ip2 = network->addFullyConnected(
*relu1->getOutput(0), 10, mWeightMap["ip2filter"], mWeightMap["ip2bias"]);
// Add softmax layer to determine the probability.
ISoftMaxLayer *prob = network->addSoftMax(*ip2->getOutput(0));
prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*prob->getOutput(0));
//build engine
IBuilderConfig *builderConfig = builder->createBuilderConfig();
builder->setMaxBatchSize(batchSize);
builderConfig->setMaxWorkspaceSize(1<<24);
//engine null
ICudaEngine *engine = builder->buildEngineWithConfig(*network, *builderConfig);
//later uses of engine.
return 0;
}
However, the function builder->buildEngineWithConfig(*network, *builderConfig) returns nullptr. I tried to change maxWorkSpace to other values but it still does not work. I also visited this post but nothing help. Anyone points out the causes of the problem. Thank you!
After a few days of rolling over this problem. I have found that if layers in the model does not match the weight passed in, there is no error will appear but you can not create an TensorRT engine to do later tasks. Therefore, the best way to do in this situation is carefully checking layer by layer and the .wts file.

C++ Builder 10.2 Tokyo > JSON > replace JSONvalue causes "access violation" with JsonValue->Free()

My environment:
RadStudio 10.2 Tokyo
Working on Windows 10 (64bit) v1809
I was searching the way to replace JSON value.
Then, I came across the following Q and A.
Delphi XE7: How to change a JSON value using System.JSON (versus SuperObject)
In Delphi:
JoPair.JsonValue.Free;
JoPair.JsonValue := TJSONNumber.Create(123);
Following this, I thought, it would be in C++ Builder
JoPair->JsonValue->Free();
JoPair->JsonValue = new TJSONNumber(123);
However, it caused "access violation" error in ToString();
Instead, I commented out JoPair->JsonValue->Free();, then no problem.
Question:
In C++ Buidler, should I need Free JoPair->JsonValue?
But without Freeing the JsonValue, it may cause memory leak.
source code
Following is the actual code I checked
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include <DBXJSON.hpp> // for JSON
#include <memory> // for unique_ptr
#include "Unit1.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner)
: TForm(Owner)
{
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Button1Click(TObject *Sender)
{
String srcFileName = L"test.json"; // source
String dstFileName = L"out.json"; // destination
String targetKeySubString = L"hogehoge"; //
String targetValue = "9";
// 1. read JSON strings
std::unique_ptr<TStringList> slread(new TStringList);
slread->LoadFromFile(srcFileName);
// 2. replace values for the key (target is checked by substring)
TJSONObject *jsonObj;
String jsonKey, jsonValue;
TJSONPair *pairObj;
std::unique_ptr<TStringList> slwrite(new TStringList);
for(int li=0; li < slread->Count; li++) { // file line index
String jsonText = slread->Strings[li];
// this is needed for avoiding error caused by the file path treating backslash
jsonText = StringReplace(jsonText, L"\\", L"\\\\", TReplaceFlags()<<rfReplaceAll);
//
jsonObj = dynamic_cast<TJSONObject*>(TJSONObject::ParseJSONValue(jsonText));
for(int pi=0; pi < jsonObj->Size(); pi++) { // pair index
pairObj = jsonObj->Get(pi);
jsonKey = pairObj->JsonString->Value();
jsonValue = pairObj->JsonValue->Value();
if (jsonKey.Pos(targetKeySubString) == 0) {
continue;
}
// replace value
// (ref: https://stackoverflow.com/questions/33426576/delphi-xe7-how-to-change-a-json-value-using-system-json-versus-superobject)
//
//pairObj->JsonValue->Free(); // commented out because this causes "access violation" in ToString()
pairObj->JsonValue = new TJSONString(targetValue);
// debug
//ShowMessage(jsonKey + ":" + jsonValue);
}
slwrite->Add(jsonObj->ToString());
}
jsonObj->Free();
// 3. output
slwrite->SaveToFile(dstFileName);
ShowMessage(L"Done");
}
//---------------------------------------------------------------------------
Example
{"1_hogehoge":"3", "2_fugafuga":"1","3_hogehoge":"4", "4_fugafuga":"1", "5_hogehoge":"5", "6_fugafuga":"9"}
{"1_hogehoge":"9","2_fugafuga":"1","3_hogehoge":"9","4_fugafuga":"1","5_hogehoge":"9","6_fugafuga":"9"}
Source code (10.2 Tokyo)
I have updated the source code. Still same problem.
I also used ToJSON() instead of ToString() with same error (access violation)`.
I also tried using std::unique_ptr, which caused another error. So I gave up using std::unique_ptr on this topic now (may better be investigated separately).
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include <System.JSON.hpp>
#include <memory> // for unique_ptr
#include "Unit1.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner)
: TForm(Owner)
{
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Button1Click(TObject *Sender)
{
String srcFileName = L"test.json"; // source
String dstFileName = L"out.json"; // destination
String targetKeySubString = L"hogehoge"; //
String targetValue = "9";
// 1. read JSON strings
std::unique_ptr<TStringList> slread(new TStringList);
slread->LoadFromFile(srcFileName);
// 2. replace values for the key (target is checked by substring)
TJSONObject *jsonObj;
//std::unique_ptr<TJSONObject> jsonObj(new TJSONObject);
String jsonKey, jsonValue;
TJSONPair *pairObj;
std::unique_ptr<TStringList> slwrite(new TStringList);
for(int li=0; li < slread->Count; li++) { // file line index
String jsonText = slread->Strings[li];
// this is needed for avoiding error caused by the file path treating backslash
jsonText = StringReplace(jsonText, L"\\", L"\\\\", TReplaceFlags()<<rfReplaceAll);
//
jsonObj = dynamic_cast<TJSONObject*>(TJSONObject::ParseJSONValue(jsonText));
if (jsonObj == NULL) {
continue;
}
for(int pi=0; pi < jsonObj->Count; pi++) { // pair index
pairObj = jsonObj->Pairs[pi];
jsonKey = pairObj->JsonString->Value();
jsonValue = pairObj->JsonValue->Value();
if (jsonKey.Pos(targetKeySubString) == 0) {
continue;
}
// replace value
// (ref: https://stackoverflow.com/questions/33426576/delphi-xe7-how-to-change-a-json-value-using-system-json-versus-superobject)
//
//pairObj->JsonValue->Free(); // commented out because this causes "access violation" in ToString()
delete pairObj->JsonValue;
pairObj->JsonValue = new TJSONString(targetValue);
// debug
//ShowMessage(jsonKey + ":" + jsonValue);
}
//String res = jsonObj->ToJSON(); // *** access violation ***
String res = jsonObj->ToString(); // *** access violation ***
slwrite->Add(res);
jsonObj->Free();
}
// 3. output
slwrite->SaveToFile(dstFileName);
ShowMessage(L"Done");
}
//---------------------------------------------------------------------------

Cuda Thrust - max vec3

When i want to perform a reduction on an array of float i usually do the following :
float res = *thrust::max_element(thrust::device,
thrust::device_ptr<float>(dDensities),
thrust::device_ptr<float>(dDensities+numParticles)
);
However what i would like to do now is pretty much the same thing on a vec3 (the glm library type) array :
float res = *thrust::max_element(thrust::device,
thrust::device_ptr<glm::vec3>(dDensities),
thrust::device_ptr<glm::vec3>(dDensities+numParticles)
);
As you can see, this has no sense because the '<' operator is not defined on. But i would like to get the maximum vec3 based on his length :
len = sqrtf(v.x*v.x + v.y*v.y + v.z*v.z);
Is that possible ?
Yes, its possible. You may want to read the thrust quickstart guide if you're not already familiar with it.
If you review the thrust extrema documentation, you'll note that thrust::max_element comes in several different varieties (as do most thrust algorithms). One of these accepts a binary comparison functor. We can define a comparison functor which will do what you want.
Here's a trivial worked example:
$ cat t134.cu
#include <thrust/extrema.h>
#include <thrust/device_ptr.h>
#include <glm/glm.hpp>
#include <iostream>
struct comp
{
template <typename T>
__host__ __device__
bool operator()(T &t1, T &t2){
return ((t1.x*t1.x+t1.y*t1.y+t1.z*t1.z) < (t2.x*t2.x+t2.y*t2.y+t2.z*t2.z));
}
};
int main(){
int numParticles = 3;
glm::vec3 d[numParticles];
d[0].x = 0; d[0].y = 0; d[0].z = 0;
d[1].x = 2; d[1].y = 2; d[1].z = 2;
d[2].x = 1; d[2].y = 1; d[2].z = 1;
glm::vec3 *dDensities;
cudaMalloc(&dDensities, numParticles*sizeof(glm::vec3));
cudaMemcpy(dDensities, d, numParticles*sizeof(glm::vec3), cudaMemcpyHostToDevice);
glm::vec3 res = *thrust::max_element(thrust::device,
thrust::device_ptr<glm::vec3>(dDensities),
thrust::device_ptr<glm::vec3>(dDensities+numParticles),
comp()
);
std::cout << "max element x: " << res.x << " y: " << res.y << " z: " << res.z << std::endl;
}
$ nvcc -arch=sm_61 -o t134 t134.cu
$ ./t134
max element x: 2 y: 2 z: 2
$

odeint streaming observer and related questions

I have a system of 4 coupled equations to solve and a parameter Gamma[i] to iterate over. Since I am quite new to C++, my code is a very rudimentary. If it looks sophisticated and elegant in certain parts, it is only because I have adapted code from the author of odeint. :)
This question is related to (http://stackoverflow.com/questions/12060111/using-odeint-function-definition/12066958#comment16253600_12066958) but not exactly the same. Please do not delete this. :(
Questions have been inserted between the lines of code.
#include <iostream>
#include <iterator>
#include <algorithm>
#include <boost/numeric/odeint.hpp>
#include <cmath>
#include <vector>
#include <fstream>
#include <iomanip>
using namespace std;
using namespace boost::numeric::odeint;
class NLI_class {
private:
double gamma;
public:
NLI_class (double r) : gamma(r) {}
void operator()( vector<double> &u , vector<double> &du , double z ) {
du[0] = u[0]*u[1]*cos(u[3]); //u1
du[1] = -u[0]*u[0]*cos(u[3]); //u2
du[2] = gamma * (2/(u[0]*u[0]) - 1/(u[1]*u[1])); //theta
du[3] = gamma * (1.0/(u[0]*u[0])); //phi1
du[4] = gamma * (1.0/(u[1]*u[1])); //phi2;
}
};
Question #1:
In my original program, I had something like this to pipe the output to a csv file:
inline void save(vector<double>& v, string filename)
{
ofstream output(filename);
for(int i=0;i<v.size();++i){
output << setprecision(64) << v[i] << endl;
}
}
How do I adapt streaming_observer to do what my save() does? Basically, I want to generate .csv files for each iteration i. At this point, I am doing it the ugly way, i.e compiling everything, opening a windows command prompt and then piping the exe output to a text file. This generates one big file with all iterations thrown in there.
This becomes very painful to analyze for a large number of iterations.
struct streaming_observer {
std::ostream &m_out;
streaming_observer( std::ostream &out ) : m_out( out ) {}
void operator()( const vector<double> &x , double t ) const
{
m_out << t;
for( size_t i=0 ; i < x.size() ; ++i )
m_out << "\t" << x[i];
m_out << "\n";
}
};
int main(){
vector<double> x( 5 );
vector<double> Gamma;
vector<double>delta;
const double pi=acos(-1.0);
short delta_n=5;
const double delta_step=(2*pi)/delta_n;
const double dz = 0.01;
const double zeta = 3.0;
const double theta_initial=0.0;
const double u20=tanh(zeta);
const double u10=sqrt(1.0-(u20*u20));
double d=0.0;
double G=0.0;
for(int i=0;i<=delta_n;i++){
//When i=0, the d=0.0 and G=0.0 are pushed into the vector.
delta.push_back(d);
Gamma.push_back(G);
// Compute delta and Gamma
d=d+delta_step;
G=-u10*u10*u20*sin(theta_initial+d);
}
save(delta,"delta.csv");
save(Gamma,"Gamma.csv");
Question#2:
The results I get here do not agree with what I get with what I get using a simple explicit Euler method. Hence, I would like to see the RK4 coefficients (preferably dump them to a file) or the intermediate steps. How can I get this information?
//Numeric Integration
for (unsigned i = 0; i < Gamma.size(); ++i) {
x[0] = u10;
x[1] = u20;
x[2] = 0.0;
x[3] = 0.0;
x[4] = 0.0;
NLI_class nli_obj(Gamma[i]);
integrate_const( runge_kutta4< vector<double > >(), nli_obj, x , 0.0 , 3.0 , dz,streaming_observer( std::cout ) );
}
}
Thank you for all those who helped!
Edit:
Is there some way to get a running error estimate? Note that u[0]*u[0]+u[1]*u[1]=1 at all times.
Question #1 :
I do not understand exactly what kind of output you need. But if you want to write the result after each iteration you can implement an output observer like this:
struct output_observer
{
string filename_;
size_t count_;
output_observer( const string &filename ) : filename_( filename ) , count_( 0 ) { }
void operator()( const state_type &x , time_type dt )
{
char fn[512] = "";
sprintf( fn , "%s_%04lu.csv" , filename_.c_str() , count_ );
ofstream fout( fn );
for( size_t i=0 ; i<x.size() ; ++i ) fout << x[i] << "\n";
++count_;
}
};
You can apply this observer simply by
integrate_const( runge_kutta4< vector<double > >() , nli_obj , x ,
0.0 , 3.0 , dz , output_observer( "filename" ) );
Is this the desired functionality?
Question #2 :
It is not possible to see the intermediate e steps of runge_kutta4. The coefficients are the standard ones for the classical Runge-Kutta method: http://en.wikipedia.org/wiki/Runge%E2%80%93Kutta_methods
Question #3 :
odeint has several error steppers, which estimate the error made during one step. You can use for example the Runge_Kutta Cash Karp algorithm;
runge_kutta_cash_karp54< state_type > rk;
state_type xerr;
rk.do_step( nli_obj , x , t , xerr );
which makes ONE step and estimates the error and writes the error result in xerr.