Unicode html buffers not working with libmicrohttpd - html

I am trying to create a server which will server pages in non English language, and i am testing libmicrohttpd with this code:
static int
answer_to_connection(void* cls, struct MHD_Connection* connection,
const char* url, const char* method,
const char* version, const char* upload_data,
size_t* upload_data_size, void** con_cls)
{
char *page = "<html><head><meta charset='UTF-8'></head><body>हैलो यूनिकोड</body></html>";
struct MHD_Response* response;
int ret;
response =
MHD_create_response_from_buffer(strlen(page), (void *)page,
MHD_RESPMEM_PERSISTENT);
ret = MHD_queue_response(connection, MHD_HTTP_OK, response);
MHD_destroy_response(response);
return ret;
}
But its not working and giving ????? characters on the browser.
Can anyone tell me if libmicrohttpd supports Unicode, if yes then how?

As I already wrote in the comment you have to be sure that your string is formatted in UTF-8. Standard char type formats string as per the selected codepage or local, that gives bad formed characters if interpreted as UTF-8.
If you're using a C11 compiler prefix string with u8 as:
char *page = u8"<html><head><meta charset='UTF-8'></head><body>हैलो यूनिकोड</body></html>";
If your compiler doesn't support UTF-8 you need an external tool that formats the string using hex escaping or octal or the like.

Related

Using SWIG to wrap structures containing const char * without memory leak

I'm attempting to use SWIG to wrap a pre-existing library interface that expects the caller to manage the lifetime of some const char * values.
struct Settings {
const char * log_file;
int log_level;
};
// The Settings struct and all members only need to be valid for the duration of this call.
int Initialize(const struct Settings* settings);
int DoStuff();
int Deinitialize();
I started off using the most basic input to SWIG to wrap the library:
%module lib
%{
#include "lib.h"
%}
%include "lib.h"
This leads to SWIG warning about a potential memory leak:
lib.h(2) : Warning 451: Setting a const char * variable may leak memory.
Which is entirely understandable as looking at lib_wrap.c, SWIG has generated code that will malloc a buffer into the log_file value but never frees it:
SWIGINTERN PyObject *_wrap_Settings_log_file_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
PyObject *resultobj = 0;
struct Settings *arg1 = (struct Settings *) 0 ;
char *arg2 = (char *) 0 ;
void *argp1 = 0 ;
int res1 = 0 ;
int res2 ;
char *buf2 = 0 ;
int alloc2 = 0 ;
PyObject *swig_obj[2] ;
if (!SWIG_Python_UnpackTuple(args, "Settings_log_file_set", 2, 2, swig_obj)) SWIG_fail;
res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_Settings, 0 | 0 );
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Settings_log_file_set" "', argument " "1"" of type '" "struct Settings *""'");
}
arg1 = (struct Settings *)(argp1);
res2 = SWIG_AsCharPtrAndSize(swig_obj[1], &buf2, NULL, &alloc2);
if (!SWIG_IsOK(res2)) {
SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Settings_log_file_set" "', argument " "2"" of type '" "char const *""'");
}
arg2 = (char *)(buf2);
if (arg2) {
size_t size = strlen((const char *)((const char *)(arg2))) + 1;
arg1->log_file = (char const *)(char *)memcpy(malloc((size)*sizeof(char)), arg2, sizeof(char)*(size));
} else {
arg1->log_file = 0;
}
resultobj = SWIG_Py_Void();
if (alloc2 == SWIG_NEWOBJ) free((char*)buf2);
return resultobj;
fail:
if (alloc2 == SWIG_NEWOBJ) free((char*)buf2);
return NULL;
}
If I change the type of log_file to char * then the warning goes away and it appears that multiple attempts to set the value of log_file will no longer leak memory:
SWIGINTERN PyObject *_wrap_Settings_log_file_set(PyObject *SWIGUNUSEDPARM(self), PyObject *args) {
PyObject *resultobj = 0;
struct Settings *arg1 = (struct Settings *) 0 ;
char *arg2 = (char *) 0 ;
void *argp1 = 0 ;
int res1 = 0 ;
int res2 ;
char *buf2 = 0 ;
int alloc2 = 0 ;
PyObject *swig_obj[2] ;
if (!SWIG_Python_UnpackTuple(args, "Settings_log_file_set", 2, 2, swig_obj)) SWIG_fail;
res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_Settings, 0 | 0 );
if (!SWIG_IsOK(res1)) {
SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "Settings_log_file_set" "', argument " "1"" of type '" "struct Settings *""'");
}
arg1 = (struct Settings *)(argp1);
res2 = SWIG_AsCharPtrAndSize(swig_obj[1], &buf2, NULL, &alloc2);
if (!SWIG_IsOK(res2)) {
SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "Settings_log_file_set" "', argument " "2"" of type '" "char *""'");
}
arg2 = (char *)(buf2);
if (arg1->log_file) free((char*)arg1->log_file);
if (arg2) {
size_t size = strlen((const char *)(arg2)) + 1;
arg1->log_file = (char *)(char *)memcpy(malloc((size)*sizeof(char)), (const char *)(arg2), sizeof(char)*(size));
} else {
arg1->log_file = 0;
}
resultobj = SWIG_Py_Void();
if (alloc2 == SWIG_NEWOBJ) free((char*)buf2);
return resultobj;
fail:
if (alloc2 == SWIG_NEWOBJ) free((char*)buf2);
return NULL;
}
However it still appears that the memory allocated for log_file will be leaked when the Settings object is garbage collected in Python.
What is the recommended way of managing lifetimes of char * struct values in SWIG in a way which avoids these memory leaks?
Strings are a bit awkward to do right here. There are several ways to side-step the issue you're seeing. Simplest is to use a fixed size array in the struct, but it's 2019. Personally I'd wholeheartedly recommend using idiomatic C++ instead (it's 2019!), which would mean std::string and then the whole issue evaporates.
Failing that you're stuck in a case where to make the interface Pythonic you'll have to do some extra work. We can keep the total amount of work low and the nice thing about SWIG is that we can pick and choose where we target the extra effort we make, there's no "all or nothing". The main problem here is that we want to tie the lifespan of the buffer the log_file path is stored in to the lifespan of the Python Settings object itself. We can achieve that in multiple different ways depending on your preference for writing Python code, C or Python C API calls.
What we can't really solve is the case were you're given a borrowed pointer to a Settings struct by some other code (i.e. it's not owned/managed by Python) and you want to change log_file string in that borrowed object. The API you've got doesn't really give us a way to do that, but it seems like this isn't a case that really matters in your current module.
So without further ado below are a few options for tying the lifespan of a buffer that holds your string to a Python object that points to the buffer.
Option #1: Make Settings wholly or partially immutable, use a single malloc call to hold both the struct itself and the string it refers to. For this use case that's probably my preferred option.
We can do that fairly simply by giving the Settings type a constructor in Python which handles this and it doesn't force you to use C++:
%module lib
%{
#include "lib.h"
%}
// Don't let anybody change this other than the ctor
%immutable Settings::log_file;
%include "lib.h"
%extend Settings {
Settings(const char *log_file) {
assert(log_file); // TODO: handle this properly
// Single allocation for both things means the single free() is sufficient and correct
struct Settings *result = malloc(strlen(log_file) + 1 + sizeof *result);
char *buf = (void*)&result[1];
strcpy(buf, log_file);
result->log_file = buf;
return result;
}
}
If you wanted to make the path mutable you could write a little extra Python code that wraps this up and acts a proxy which creates a new immutable object every time you "mutate" it on the Python side. You could also go the other way and make the other members of settings immutable. (Thinking about it some more it'd be neat if SWIG could optionally auto synthesize a kwargs constructor for aggregate/POD types and wouldn't be too hard to add that as a patch).
This is my personal preference here, I like immutable things and overall it's a fairly small tweak to the generated interface to get something sane.
Option #2a: Make another Python object that manages the lifespan of the string buffer and then "stash" a reference to that inside the Python side of every Settings struct that's owned by Python.
%module lib
%{
#include "lib.h"
%}
%typemap(in) const char *log_file %{
// Only works for Python owned objects:
assert(SWIG_Python_GetSwigThis($self)->own & SWIG_POINTER_OWN); // TODO: exception...
// Python 2.7 specific, 3 gets more complicated, use bytes buffers instead.
$1 = PyString_AsString($input);
assert($1); // TODO: errors etc.
// Force a reference to the original input string to stick around to keep the pointer valid
PyObject_SetAttrString($self, "_retained_string", $input);
%}
%typemap(memberin) const char *log_file %{
// Because we trust the in typemap has retained the pointer for us this is sufficient now:
$1 = $input;
%}
%include "lib.h"
These typemaps work together to keep a reference to the PyObject string stashed inside the Settings PyObject as an attribute. It only works safely here because a) we assume Python owns the object, and we're not using -builtin in SWIG, so we can safely stash things in attributes to keep them around and b) because it's const char *, not char * we can be pretty sure that (unless there's some K&R silliness going on) that nobody will be changing the buffer.
Option #2b: The general idea is the same, but instead of using typemaps, which means writing Python C API calls use something like this:
%extend Settings {
%pythoncode {
#property
# ....
}
}
To do the same thing. Similar code could also be produced using %pythonprepend instead if preferred. However this is my least preferred solution here, so I've not fully fleshed it out.
You can tell SWIG to use char* semantics for log_file. Unfortunately, it doesn't seem possible to use Settings::log_file (the required memberin does not show up in the pattern matching), so there could be clashes if that data member name is used in other structs as well with the same type but different semantics. This would look like:
%module lib
%{
#include "lib.h"
%}
%typemap(out) char const *log_file = char *;
%typemap(memberin) char const *log_file = char *;
%extend Settings {
Settings() {
Settings* self = new Settings{};
self->log_file = nullptr;
self->log_level = 0;
return self;
}
~Settings() {
delete[] self->log_file; self->log_file = nullptr;
delete self;
}
}
%include "lib.h"
(Note that SWIG in my case produces delete[], not free().)
EDIT: added a custom destructor to delete the log_file memory on garbage collection. (And for good measure also a constructor to make sure that an uninitialized log_file is nullptr, not some random memory.) What this does, is add an internal function delete_Settings to the wrapper file, which gets called in _wrap_delete_Settings, which is called on object destruction. Yes, syntax is a bit odd, b/c you're effectively describing Python's __del__ (taking a self), only labeled as a C++ destructor.

C/ HTML - Printing the username and password from an HTML Form

We were asked to create a login page using HTML, C/CGI and CSS. I wanted to start with the basics, HTML and C/CGI first so I created this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(void)
{
char *data;
char *user;
char *password;
printf("Content-type:text/html\r\n\r\n");
printf("<!DOCTYPE html><html><head><title>Welcome!</title></head><body>");
data = getenv("QUERY_STRING");
if (data) {
sscanf(data,"user=%s&password=%s", &user, &password);
printf("Hello Mr./Ms. %s\n",user);
printf("You entered your password %s\n",password);
}
printf("<form action='http://localhost/inch.html'>");
printf("<input type=submit value='Compute'>");
printf("</body></html>");
exit(EXIT_SUCCESS);
}
However, the compiler is showing me this problem:
warning: format ‘%s’ expects argument of type ‘char *’, but argument 3
has type ‘char **’ [-Wformat=]
sscanf(data,"user=%s&password=%s", &user, &password);
Also, the HTML is showing me Error 500.
Any ideas on how to fix this? I'm fairly new at this HTML/CGI thing, and I would appreciate help along the way. Thanks!
The problem is, sscanf needs a pointer to char:
int sscanf ( const char * s, const char * format, ...);
Try this:
char user[100];
char password[100];
// ...
sscanf(data,"user=%s&password=%s", user, password);

CUresult vs cudaError - how to get readable error descriptions?

I want to get a human readable description of result returned by cuInit. Every single page I've found recommnend using cudaGetErrorString for this purpose, but this results in error:
error: cannot convert CUresult {aka cudaError_enum} to cudaError_t
{aka cudaError} for argument 1 to const char* cudaGetErrorString(cudaError_t)
What's the difference between CUresult and cudaError and what function can I use to interpret the former?
The current driver API as time of writing does support translating an CUresult to its string representation and description.
From the Driver API doc:
CUresult cuGetErrorName ( CUresult error, const char** pStr )
Gets the string representation of an error code enum name.
CUresult cuGetErrorString ( CUresult error, const char** pStr )
Gets the string description of an error code.
LIBRARY RETURN TYPE VALUES
CUDA Driver CUresult enum cudaError_enum in cuda.h
CUDA Runtime cudaError_t enum cudaError in driver_types.h
The CUDA Runtime provides the function cudaGetErrorString to convert a cudaError enum value to a string.
The CUDA Driver API does not provide a function to return string.
The error names and values for CUresult and cudaError_t do not match.
NVIDIA does not currently supply a library for returning an error string for CUresult. It should only take a few minutes to convert the data in cuda.h into a function.

how to convert from NPVariant* to char*?

I have NPAPI plugin method which recieves NPVariant* I need to convert it to char*, I heard NPVariant does not end with NULL character so before I process the argument I need to convert it to char* , can anyone plz tell me how to convert it to char*?
Thanks in Advance...
An NPVariant has a type and value, the value has a stringValue which contains a pointer UTF8Characters and UTF8Length, so it tells you the length as well.
For example:
NPVariant yourVariant; //Pretend this is initialized
if (yourVariant.type == NPVariantType_String) {
const NPUTF8* characters = yourVariant.value.stringValue.UTF8Characters;
int length = yourVariant.value.stringValue.UTF8Length;
}
UTF8Characters is a NPUTF8, which may be a typedef to a char.
Knowing the length, you can construct your own char* that is null terminated.

SSIS: Redirecting error rows to file. File contains additional unexpected carriage return

I would like to redirect rows hat could not be loaded into a table to an error file.
I see that the red exception path has 3 Input columns coming in, the "Flat File Source Error Output Column" contains the original data in the file.
The problem is that when I open the file, there is an extra Carriage Return\Line Feed character after every row. I'd like to be able to manually fix the errors and reprocess them without having to delete all of the added CRLF chars. So I added a Script Component to shave of the characters being added.
public override void Input0_ProcessInputRow(Input0Buffer Row)
{
string buffer = GetString(Row.FlatFileSourceErrorOutputColumn.GetBlobData(0, (int)(Row.FlatFileSourceErrorOutputColumn.Length)));
System.Windows.Forms.MessageBox.Show(buffer);
byte[] ThisBytes = GetBytes("Test");
Row.FlatFileSourceErrorOutputColumn.ResetBlobData();
Row.FlatFileSourceErrorOutputColumn.AddBlobData(ThisBytes);
}
static string GetString(byte[] bytes)
{
char[] chars = new char[bytes.Length / sizeof(char)];
System.Buffer.BlockCopy(bytes, 0, chars, 0, bytes.Length);
return new string(chars);
}
static byte[] GetBytes(string str)
{
byte[] bytes = new byte[str.Length * sizeof(char)];
System.Buffer.BlockCopy(str.ToCharArray(), 0, bytes, 0, bytes.Length);
return bytes;
}
But my debug message box shows non displayable characters that appear as blocks.
When I try to force a literal "TEST" to the output file as a test to see if I could control what goes into the file, I get NULL (ASCII 0) characters after every letter.
Why is SSIS adding a CRLF when I just simply redirect the Output column to the file w/o using a Scripting block component to attempt to modify the data written? How can I get rid of the CRLF? Why am I unable to read the byte array in the data column and display it as a string? Why is the "TEST" literal having NULLS between every letter? Is my ByteArray conversion functions incorrect?
Got it.
public override void Input0_ProcessInputRow(Input0Buffer Row)
{
string buffer = GetString(Row.FlatFileSourceErrorOutputColumn.GetBlobData(0, (int)(Row.FlatFileSourceErrorOutputColumn.Length - 2)));
System.Windows.Forms.MessageBox.Show(buffer);
byte[] ThisBytes = GetBytes(buffer);
Row.FlatFileSourceErrorOutputColumn.ResetBlobData();
Row.FlatFileSourceErrorOutputColumn.AddBlobData(ThisBytes);
}
static string GetString(byte[] bytes)
{
System.Text.Encoding enc = System.Text.Encoding.ASCII;
return enc.GetString(bytes);
}
static byte[] GetBytes(string str)
{
System.Text.Encoding enc = System.Text.Encoding.ASCII;
return enc.GetBytes(str);
}
I would still like to know why SSIS is adding the CRLF!