I'm writing a shader in CG language for Unity3d.
If you make a shader for transparent object you need to create two similar passes in SubShader. The first one to render only back faces (with Cull Front) and the second one to render only front faces (with Cull Back). But the code for vertex and fragment function is the same for two passes.
Is it possible not to double a code and declare some functions, that would be shared between passes?
I want to have something like in my code example:
Shader "cool shader" {
Properties {
...
}
SubShader {
CGPROGRAM
// need to declare vertexOutput somewhow here
float4 sharedFragFoo(vertexOutput i) : COLOR // How to make smth like this?
{
....
return float4(...);
}
ENDCG
pass {
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
vertexOutput vert(vertexInput v) {
vertexOutput o;
...
return o;
}
float4 frag(vertexOutput i) : COLOR
{
return sharedFragFoo(i); // call the shared between passes function
}
ENDCG
}
pass {
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
vertexOutput vert(vertexInput v) {
vertexOutput o;
...
return o;
}
float4 frag(vertexOutput i) : COLOR
{
return sharedFragFoo(i); // call the shared between passes function
}
ENDCG
}
}
}
UPD: Found out how to do it using includes.
But is it possible to do inside one file?
You can do it in one file using CGINCLUDE. If you look at the shader for MobileBlur ("Hidden/FastBlur") by Unity it has shared code at the top and passes below.
Here are just the key parts - note CGINCLUDE/ENDCG outside of the SubShader/Pass
Shader "YourShader"
{
...
CGINCLUDE
#include "UnityCG.cginc"
struct shared_v2f
{
float4 pos : SV_POSITION;
}
shared_v2f myVert( appdate_img v )
{
shared_v2f o;
o.pos = mul (UNITY_MATRIX_MVP, v.vertex);
return o;
}
fixed4 myFrag( shared_v2f i ) : SV_Target
{
return fixed4( 1.0, 0.5, 0.0, 1.0 );
}
ENDCG
SubShader
{
...
Pass
{
CGPROGRAM
#pragma vertex myVert
#pragma fragment myFrag
ENDCG
}
}
}
Answering my own question. Wierdo!
Hope it will help somebody else.
You can write all that betwee CGPROGRAM and ENDCG in separate *.cginc file and include it inside each pass.
Important! But you need to write
#pragma vertex vert and #pragma fragment frag inside your main shader file, otherwise it will compile but won't work. I suppose that the reason is that pragma'ss are processed before include's.
Here is my code sample.
Main shader definition file:
Shader "cool shader" {
Properties {
// properties
}
SubShader {
...
pass {
Cull Front
ZWrite Off
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
#include "shared.cginc"
ENDCG
}
pass {
Cull Back
ZWrite Off
Blend SrcAlpha OneMinusSrcAlpha
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
#include "shared.cginc"
ENDCG
}
}
}
Shared file shared.cginc:
#ifndef SHARED_FOO
#define SHARED_FOO
uniform sampler2D _MainTex;
uniform float4 _MainTex_ST;
uniform float4 _Color;
// other variables....
struct vertexInput {
float4 vertex : POSITION;
float3 normal : NORMAL;
float4 texcoord : TEXCOORD0;
};
struct vertexOutput {
float4 pos : SV_POSITION;
float4 tex : TEXCOORD0;
float4 posWorld : TEXCOORD1;
float4 posInObjectCoords : TEXCOORD2;
float3 normalDir : TEXCOORD3;
};
vertexOutput vert(vertexInput v) {
vertexOutput o;
// do staff
return o;
}
float4 frag(vertexOutput i) : COLOR
{
// do staff
return float4(...);
}
#endif // SHARED_FOO
Related
I am using tensorRT to build a small model as below:
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include <fstream>
#include <map>
#include <chrono>
#include <iostream>
#include "include/Utils.h"
#include <memory>
#include <vector>
#include <cassert>
#include "src/InferDeleter.cpp"
using namespace std;
using namespace nvinfer1;
class MyLogger : public ILogger {
void log(Severity severity, const char *msg) override {
if (severity != Severity::kINFO) {
cout << msg << endl;
}
}
} gLogger;
int main() {
//load weights
map<string, Weights> mWeightMap = Utils::getInstance().loadWeights("Weights/mnistapi.wts");
//a few configuration parameters
const char *INPUT_BLOB_NAME = "input";
const char *OUTPUT_BLOB_NAME = "output";
DataType dataType = nvinfer1::DataType::kFLOAT;
int INPUT_H = 28, INPUT_W = 28;
int batchSize = 1;
//define the network
IBuilder *builder = createInferBuilder(gLogger);
INetworkDefinition *network = builder->createNetworkV2(0U);
// Create input tensor of shape { 1, 1, 28, 28 }
ITensor *data = network->addInput(
INPUT_BLOB_NAME, DataType::kFLOAT, Dims3{1, INPUT_H, INPUT_W});
// Create scale layer with default power/shift and specified scale parameter.
const float scaleParam = 0.0125f;
const Weights power{DataType::kFLOAT, nullptr, 0};
const Weights shift{DataType::kFLOAT, nullptr, 0};
const Weights scale{DataType::kFLOAT, &scaleParam, 1};
IScaleLayer *scale_1 = network->addScale(*data, ScaleMode::kUNIFORM, shift, scale, power);
// Add convolution layer with 20 outputs and a 5x5 filter.
IConvolutionLayer *conv1 = network->addConvolutionNd(
*scale_1->getOutput(0), 20, Dims{2, {5, 5}, {}}, mWeightMap["conv1filter"], mWeightMap["conv1bias"]);
conv1->setStride(DimsHW{1, 1});
// Add max pooling layer with stride of 2x2 and kernel size of 2x2.
IPoolingLayer *pool1 = network->addPoolingNd(*conv1->getOutput(0), PoolingType::kMAX, Dims{2, {2, 2}, {}});
pool1->setStride(DimsHW{2, 2});
// Add second convolution layer with 50 outputs and a 5x5 filter.
IConvolutionLayer *conv2 = network->addConvolutionNd(
*pool1->getOutput(0), 50, Dims{2, {5, 5}, {}}, mWeightMap["conv2filter"], mWeightMap["conv2bias"]);
conv2->setStride(DimsHW{1, 1});
// Add second max pooling layer with stride of 2x2 and kernel size of 2x3>
IPoolingLayer *pool2 = network->addPoolingNd(*conv2->getOutput(0), PoolingType::kMAX, Dims{2, {2, 2}, {}});
pool2->setStride(DimsHW{2, 2});
// Add fully connected layer with 500 outputs.
IFullyConnectedLayer *ip1
= network->addFullyConnected(*pool2->getOutput(0), 500, mWeightMap["ip1filter"], mWeightMap["ip1bias"]);
// Add activation layer using the ReLU algorithm.
IActivationLayer *relu1 = network->addActivation(*ip1->getOutput(0), ActivationType::kRELU);
// Add second fully connected layer with 20 outputs.
IFullyConnectedLayer *ip2 = network->addFullyConnected(
*relu1->getOutput(0), 10, mWeightMap["ip2filter"], mWeightMap["ip2bias"]);
// Add softmax layer to determine the probability.
ISoftMaxLayer *prob = network->addSoftMax(*ip2->getOutput(0));
prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*prob->getOutput(0));
//build engine
IBuilderConfig *builderConfig = builder->createBuilderConfig();
builder->setMaxBatchSize(batchSize);
builderConfig->setMaxWorkspaceSize(1<<24);
//engine null
ICudaEngine *engine = builder->buildEngineWithConfig(*network, *builderConfig);
//later uses of engine.
return 0;
}
However, the function builder->buildEngineWithConfig(*network, *builderConfig) returns nullptr. I tried to change maxWorkSpace to other values but it still does not work. I also visited this post but nothing help. Anyone points out the causes of the problem. Thank you!
After a few days of rolling over this problem. I have found that if layers in the model does not match the weight passed in, there is no error will appear but you can not create an TensorRT engine to do later tasks. Therefore, the best way to do in this situation is carefully checking layer by layer and the .wts file.
I am attempting to use C++17 parallel algorithms with containers holding non-fundamental types as illustrated in the minimal example below, compiled using GCC9.2.1/Intel TBB on Ubuntu 19.10. The sequential policy is OK, but compilation fails with par since the lambda is expected to accept double as the second argument. The issue persists on icc 19.0.1.
My question is if the code is valid or if this issue is simply because of the early development stage of the parallel implementation?
#include <numeric>
#include <algorithm>
#include <execution>
#include <vector>
struct Data {
double radius;
};
int main() {
double sum;
std::vector<double> v1;
std::vector<Data> v2;
// ok
sum = std::reduce(std::execution::par, v1.begin(), v1.end(), 0.0, [](double sum, auto i) { return sum + i; });
// ok
sum = std::reduce(std::execution::seq, v2.begin(), v2.end(), 0.0, [](double sum, const Data &i) { return sum + i.radius; });
// compile error
sum = std::reduce(std::execution::par, v2.begin(), v2.end(), 0.0, [](double sum, const Data &i) { return sum + i.radius; });
}
BinaryOp for std::reduce should be commutative, below both operations should be supported:
double + Data // your lambda supports only this
Data + double // this can be performed only by adding some conversions
If you want to make conversion double->Data you should add proper constructor. For conversion Data->double you should add proper conversion operator:
struct Data {
double radius;
// double -> Data
Data (double d) : radius(d) {}
// Data -> double
operator double() const {
return radius;
}
};
Live demo
My environment:
RadStudio 10.2 Tokyo
Working on Windows 10 (64bit) v1809
I was searching the way to replace JSON value.
Then, I came across the following Q and A.
Delphi XE7: How to change a JSON value using System.JSON (versus SuperObject)
In Delphi:
JoPair.JsonValue.Free;
JoPair.JsonValue := TJSONNumber.Create(123);
Following this, I thought, it would be in C++ Builder
JoPair->JsonValue->Free();
JoPair->JsonValue = new TJSONNumber(123);
However, it caused "access violation" error in ToString();
Instead, I commented out JoPair->JsonValue->Free();, then no problem.
Question:
In C++ Buidler, should I need Free JoPair->JsonValue?
But without Freeing the JsonValue, it may cause memory leak.
source code
Following is the actual code I checked
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include <DBXJSON.hpp> // for JSON
#include <memory> // for unique_ptr
#include "Unit1.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner)
: TForm(Owner)
{
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Button1Click(TObject *Sender)
{
String srcFileName = L"test.json"; // source
String dstFileName = L"out.json"; // destination
String targetKeySubString = L"hogehoge"; //
String targetValue = "9";
// 1. read JSON strings
std::unique_ptr<TStringList> slread(new TStringList);
slread->LoadFromFile(srcFileName);
// 2. replace values for the key (target is checked by substring)
TJSONObject *jsonObj;
String jsonKey, jsonValue;
TJSONPair *pairObj;
std::unique_ptr<TStringList> slwrite(new TStringList);
for(int li=0; li < slread->Count; li++) { // file line index
String jsonText = slread->Strings[li];
// this is needed for avoiding error caused by the file path treating backslash
jsonText = StringReplace(jsonText, L"\\", L"\\\\", TReplaceFlags()<<rfReplaceAll);
//
jsonObj = dynamic_cast<TJSONObject*>(TJSONObject::ParseJSONValue(jsonText));
for(int pi=0; pi < jsonObj->Size(); pi++) { // pair index
pairObj = jsonObj->Get(pi);
jsonKey = pairObj->JsonString->Value();
jsonValue = pairObj->JsonValue->Value();
if (jsonKey.Pos(targetKeySubString) == 0) {
continue;
}
// replace value
// (ref: https://stackoverflow.com/questions/33426576/delphi-xe7-how-to-change-a-json-value-using-system-json-versus-superobject)
//
//pairObj->JsonValue->Free(); // commented out because this causes "access violation" in ToString()
pairObj->JsonValue = new TJSONString(targetValue);
// debug
//ShowMessage(jsonKey + ":" + jsonValue);
}
slwrite->Add(jsonObj->ToString());
}
jsonObj->Free();
// 3. output
slwrite->SaveToFile(dstFileName);
ShowMessage(L"Done");
}
//---------------------------------------------------------------------------
Example
{"1_hogehoge":"3", "2_fugafuga":"1","3_hogehoge":"4", "4_fugafuga":"1", "5_hogehoge":"5", "6_fugafuga":"9"}
{"1_hogehoge":"9","2_fugafuga":"1","3_hogehoge":"9","4_fugafuga":"1","5_hogehoge":"9","6_fugafuga":"9"}
Source code (10.2 Tokyo)
I have updated the source code. Still same problem.
I also used ToJSON() instead of ToString() with same error (access violation)`.
I also tried using std::unique_ptr, which caused another error. So I gave up using std::unique_ptr on this topic now (may better be investigated separately).
//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop
#include <System.JSON.hpp>
#include <memory> // for unique_ptr
#include "Unit1.h"
//---------------------------------------------------------------------------
#pragma package(smart_init)
#pragma resource "*.dfm"
TForm1 *Form1;
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner)
: TForm(Owner)
{
}
//---------------------------------------------------------------------------
void __fastcall TForm1::Button1Click(TObject *Sender)
{
String srcFileName = L"test.json"; // source
String dstFileName = L"out.json"; // destination
String targetKeySubString = L"hogehoge"; //
String targetValue = "9";
// 1. read JSON strings
std::unique_ptr<TStringList> slread(new TStringList);
slread->LoadFromFile(srcFileName);
// 2. replace values for the key (target is checked by substring)
TJSONObject *jsonObj;
//std::unique_ptr<TJSONObject> jsonObj(new TJSONObject);
String jsonKey, jsonValue;
TJSONPair *pairObj;
std::unique_ptr<TStringList> slwrite(new TStringList);
for(int li=0; li < slread->Count; li++) { // file line index
String jsonText = slread->Strings[li];
// this is needed for avoiding error caused by the file path treating backslash
jsonText = StringReplace(jsonText, L"\\", L"\\\\", TReplaceFlags()<<rfReplaceAll);
//
jsonObj = dynamic_cast<TJSONObject*>(TJSONObject::ParseJSONValue(jsonText));
if (jsonObj == NULL) {
continue;
}
for(int pi=0; pi < jsonObj->Count; pi++) { // pair index
pairObj = jsonObj->Pairs[pi];
jsonKey = pairObj->JsonString->Value();
jsonValue = pairObj->JsonValue->Value();
if (jsonKey.Pos(targetKeySubString) == 0) {
continue;
}
// replace value
// (ref: https://stackoverflow.com/questions/33426576/delphi-xe7-how-to-change-a-json-value-using-system-json-versus-superobject)
//
//pairObj->JsonValue->Free(); // commented out because this causes "access violation" in ToString()
delete pairObj->JsonValue;
pairObj->JsonValue = new TJSONString(targetValue);
// debug
//ShowMessage(jsonKey + ":" + jsonValue);
}
//String res = jsonObj->ToJSON(); // *** access violation ***
String res = jsonObj->ToString(); // *** access violation ***
slwrite->Add(res);
jsonObj->Free();
}
// 3. output
slwrite->SaveToFile(dstFileName);
ShowMessage(L"Done");
}
//---------------------------------------------------------------------------
Lately, I've been attempting to get SEH exception handling to work in LLVM (3.8.1) together with MCJIT. So far without any luck.
From what I understand from the website ( http://llvm.org/docs/ExceptionHandling.html ), this is pretty much how this should be implemented. Compiling a minimal piece of code with clang gives pretty much the same LLVM IR code. However, when I try it, the program crashes with a nasty Stack cookie instrumentation code detected a stack-based buffer overrun..
To illustrate what I've been attempting to do, I've created a minimum test case (I apologise for the amount of code...):
#include <string>
#include <iostream>
#include <exception>
#pragma warning(push)
#pragma warning(disable: 4267)
#pragma warning(disable: 4244)
#pragma warning(disable: 4800)
#pragma warning(disable: 4996)
#pragma warning(disable: 4141)
#pragma warning(disable: 4146)
#pragma warning(disable: 4624)
#pragma warning(disable: 4291)
#define DONT_GET_PLUGIN_LOADER_OPTION
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Triple.h"
#include "llvm/PassRegistry.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/GenericValue.h"
#include "llvm/ExecutionEngine/Interpreter.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#pragma warning(pop)
static void test()
{
// You can use this to see that function calls work fine.
// std::cout << "Foo!" << std::endl;
throw std::exception("Something we should try to catch.");
}
int main()
{
// Initialize LLVM
std::cout << "Initializing LLVM." << std::endl;
llvm::InitializeNativeTarget();
llvm::InitializeAllTargetMCs();
llvm::InitializeNativeTargetAsmPrinter();
llvm::InitializeNativeTargetAsmParser();
llvm::PassRegistry *Registry = llvm::PassRegistry::getPassRegistry();
llvm::initializeCore(*Registry);
llvm::initializeScalarOpts(*Registry);
llvm::initializeObjCARCOpts(*Registry);
llvm::initializeVectorization(*Registry);
llvm::initializeIPO(*Registry);
llvm::initializeAnalysis(*Registry);
llvm::initializeTransformUtils(*Registry);
llvm::initializeInstCombine(*Registry);
llvm::initializeInstrumentation(*Registry);
llvm::initializeTarget(*Registry);
// For codegen passes, only passes that do IR to IR transformation are
// supported.
llvm::initializeCodeGenPreparePass(*Registry);
llvm::initializeAtomicExpandPass(*Registry);
llvm::initializeRewriteSymbolsPass(*Registry);
llvm::initializeWinEHPreparePass(*Registry);
llvm::initializeDwarfEHPreparePass(*Registry);
llvm::initializeSjLjEHPreparePass(*Registry);
llvm::StringRef MCPU = llvm::sys::getHostCPUName();
std::string MTrip = llvm::sys::getProcessTriple();
static llvm::StringMap<bool, llvm::MallocAllocator> features;
llvm::sys::getHostCPUFeatures(features);
// Initialize module & context:
auto context = std::unique_ptr<llvm::LLVMContext>(new llvm::LLVMContext());
auto module = std::unique_ptr<llvm::Module>(new llvm::Module("native", *context));
// Create 'main' method:
llvm::Type* returnType = llvm::Type::getInt32Ty(*context);
std::vector<llvm::Type*> arguments;
// MCJIT only supports main(int, char**)
arguments.push_back(llvm::Type::getInt32Ty(*context));
arguments.push_back(llvm::Type::getInt8PtrTy(*context)->getPointerTo());
llvm::Function *fcn = llvm::cast<llvm::Function>(module->getOrInsertFunction("main", llvm::FunctionType::get(returnType, arguments, false)));
// Generate exception handler info for main:
llvm::AttrBuilder argBuilder;
argBuilder.addAttribute(llvm::Attribute::UWTable);
argBuilder.addAttribute("stack-protector-buffer-size", "8");
fcn->addAttributes(llvm::AttributeSet::FunctionIndex, llvm::AttributeSet::get(*context, llvm::AttributeSet::FunctionIndex, argBuilder));
// Exception handling requires a personality function. We want to use the SEH personality handler
llvm::Function *personalityHandler = llvm::cast<llvm::Function>(module->getOrInsertFunction("__CxxFrameHandler3", llvm::FunctionType::get(llvm::Type::getInt32Ty(*context), true)));
auto personalityPtr = llvm::ConstantExpr::getBitCast(personalityHandler, llvm::Type::getInt8PtrTy(*context));
fcn->setPersonalityFn(personalityPtr);
// Create some code. Basically we want to invoke our 'test' method
auto block = llvm::BasicBlock::Create(*context, "code", fcn);
llvm::IRBuilder<> builder(block);
// all other cases might throw an exception
auto continueBlock = llvm::BasicBlock::Create(*context, "invoke.cont", fcn);
auto catchDispatch = llvm::BasicBlock::Create(*context, "catch.dispatch", fcn);
// Register 'test' as an external function:
const void* testFunctionPtr = &test;
auto testFunctionType = llvm::FunctionType::get(builder.getVoidTy(), false);
auto testFunction = llvm::Function::Create(testFunctionType, llvm::Function::ExternalLinkage, "test", module.get());
// %call = invoke i32 #"test"() to label %invoke.cont unwind label %catch.dispatch
auto call = builder.CreateInvoke(testFunction, continueBlock, catchDispatch);
// return [ 0 from ok, 1 from catch handler ]
builder.SetInsertPoint(continueBlock);
auto phi = builder.CreatePHI(builder.getInt32Ty(), 2, "result");
phi->addIncoming(builder.getInt32(0), block);
builder.CreateRet(phi);
// Create exception handler:
// Create default catch block. Basically handles the exception and returns '1'.
builder.SetInsertPoint(catchDispatch);
auto parentPad = llvm::ConstantTokenNone::get(*context);
// %0 = catchswitch within none [label %catch] unwind to caller
auto catchSwitch = builder.CreateCatchSwitch(parentPad, nullptr, 1);
auto catchBlock = llvm::BasicBlock::Create(*context, "catch", fcn);
builder.SetInsertPoint(catchBlock);
catchSwitch->addHandler(catchBlock);
// MSVC code:
// %1 = catchpad within %0 [i8* null, i32 64, i8* null] == "catch all"
llvm::Value *nullPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(*context));
auto catchPad = builder.CreateCatchPad(catchSwitch, { nullPtr, builder.getInt32(0x40), nullPtr });
// catchret from %1 to label %return
auto const1 = builder.getInt32(1);
builder.CreateCatchRet(catchPad, continueBlock);
// set 1 for the catch handler
phi->addIncoming(builder.getInt32(1), catchBlock);
// *DONE* building the code.
// Dump the LLVM IR:
module->dump();
// Let's JIT the code:
std::string error;
auto trip = llvm::Triple::normalize(MTrip);
llvm::Triple triple(trip);
const llvm::Target *target = llvm::TargetRegistry::lookupTarget("x86-64", triple, error);
if (!target)
{
throw error.c_str();
}
llvm::TargetOptions Options;
std::unique_ptr<llvm::TargetMachine> targetMachine(
target->createTargetMachine(trip, MCPU, "", Options, llvm::Reloc::Default, llvm::CodeModel::Default, llvm::CodeGenOpt::Aggressive));
if (!targetMachine.get())
{
throw "Could not allocate target machine!";
}
// Create the target machine; set the module data layout to the correct values.
auto DL = targetMachine->createDataLayout();
module->setDataLayout(DL);
module->setTargetTriple(trip);
// Pass manager builder:
llvm::PassManagerBuilder pmbuilder;
pmbuilder.OptLevel = 3;
pmbuilder.BBVectorize = false;
pmbuilder.SLPVectorize = true;
pmbuilder.LoopVectorize = true;
pmbuilder.Inliner = llvm::createFunctionInliningPass(3, 2);
llvm::TargetLibraryInfoImpl *TLI = new llvm::TargetLibraryInfoImpl(triple);
pmbuilder.LibraryInfo = TLI;
// Generate pass managers:
// 1. Function pass manager:
llvm::legacy::FunctionPassManager FPM(module.get());
pmbuilder.populateFunctionPassManager(FPM);
// 2. Module pass manager:
llvm::legacy::PassManager PM;
PM.add(llvm::createTargetTransformInfoWrapperPass(targetMachine->getTargetIRAnalysis()));
pmbuilder.populateModulePassManager(PM);
// 3. Execute passes:
// - Per-function passes:
FPM.doInitialization();
for (llvm::Module::iterator I = module->begin(), E = module->end(); I != E; ++I)
{
if (!I->isDeclaration())
{
FPM.run(*I);
}
}
FPM.doFinalization();
// - Per-module passes:
PM.run(*module);
// All done, *RUN*.
llvm::EngineBuilder engineBuilder(std::move(module));
engineBuilder.setEngineKind(llvm::EngineKind::JIT);
engineBuilder.setMCPU(MCPU);
engineBuilder.setMArch("x86-64");
engineBuilder.setUseOrcMCJITReplacement(false);
engineBuilder.setOptLevel(llvm::CodeGenOpt::None);
llvm::ExecutionEngine* engine = engineBuilder.create();
// Register global 'test' function:
engine->addGlobalMapping(testFunction, const_cast<void*>(testFunctionPtr)); // Yuck...
// Finalize
engine->finalizeObject();
// Invoke:
std::vector<llvm::GenericValue> args(2);
args[0].IntVal = llvm::APInt(32, static_cast<uint64_t>(0), true);
args[1].PointerVal = nullptr;
llvm::GenericValue gv = engine->runFunction(fcn, args);
auto result = int(gv.IntVal.getSExtValue());
std::cout << "Result after execution: " << result << std::endl;
std::string s;
std::getline(std::cin, s);
}
This produces the following IR code:
; ModuleID = 'native'
; Function Attrs: uwtable
define i32 #main(i32, i8**) #0 personality i8* bitcast (i32 (...)* #__CxxFrameHandler3 to i8*) {
code:
invoke void #test()
to label %invoke.cont unwind label %catch.dispatch
invoke.cont: ; preds = %catch, %code
%result = phi i32 [ 0, %code ], [ 1, %catch ]
ret i32 %result
catch.dispatch: ; preds = %code
%2 = catchswitch within none [label %catch] unwind to caller
catch: ; preds = %catch.dispatch
%3 = catchpad within %2 [i8* null, i32 64, i8* null]
catchret from %3 to label %invoke.cont
}
declare i32 #__CxxFrameHandler3(...)
declare void #test()
attributes #0 = { uwtable "stack-protector-buffer-size"="8" }
Q: What am I missing, why doesn't this work and how to fix it?
After posting this issue on the llvm developer list, I got a friendly reply explaining how this issue is related to a known bug: https://llvm.org/bugs/show_bug.cgi?id=24233 .
Basically what happens is that LLVM doesn't implement the code that Windows (more specifically: SEH and debugging) requires for handling stack frames. I'm by no means an expert on this subject, but until this is implemented, SEH won't know what to do, which means C++ exception basically won't work.
An obvious workaround is of course to pass the object as a pointer during the function call and perform an if-then-else. That way, exceptions are avoided. However, this is pretty nasty and will probably give a serious performance penalty. Also, this makes the flow in the compiler as well as the generated program much more complicated. In other words: let's just say I'd rather not.
I'll leave the question open; if someone happens to find a hack or figure out a workaround, I'll gladly accept it.
In StringTemplate - which I've used in some projects to emit C code - whitespace prefixes are automatically added in the output lines:
PrintCFunction(linesGlobal, linesLocal) ::= <<
void foo() {
if (someRuntimeFlag) {
<linesGlobal>
if (anotherRuntimeFlag) {
<linesLocal>
}
}
}
>>
When this template is rendered in StringTemplate, the whitespace
prefixing the multilined linesGlobal and linesLocal strings,
is copied for all the lines emitted. The generated C code is
e.g.:
void foo() {
if (someRuntimeFlag) {
int i;
i=1; // <=== whitespace prefix copied in 2nd
i++; // <=== and 3rd line
if (anotherRuntimeFlag) {
int j=i;
j++; // <=== ditto
}
}
}
I am new to Jinja2 - and tried to replicate this, to see if I can use Python/Jinja2 to do the same thing:
#!/usr/bin/env python
from jinja2 import Template
linesGlobal='\n'.join(['int i;', 'i=1;'])
linesLocal='\n'.join(['int j=i;', 'j++;'])
tmpl = Template(u'''\
void foo() {
if (someRuntimeFlag) {
{{linesGlobal}}
if (anotherRuntimeFlag) {
{{linesLocal}}
}
}
}
''')
print tmpl.render(
linesGlobal=linesGlobal,
linesLocal=linesLocal)
...but saw it produce this:
void foo() {
if (someRuntimeFlag) {
int i;
i=1;
if (anotherRuntimeFlag) {
int j=i;
j++;
}
}
}
...which is not what I want.
I managed to make the output emit proper whitespace prefixes with this:
...
if (someRuntimeFlag) {
{{linesGlobal|indent(8)}}
if (anotherRuntimeFlag) {
{{linesLocal|indent(12)}}
}
}
...but this is arguably bad, since I need to manually count whitespace
for every string I emit...
Surely Jinja2 offers a better way that I am missing?
There's no answer (yet), because quite simply, Jinja2 doesn't support this functionality.
There is, however, an open ticket for this feature - if you care about it, join the discussion there.