I am new to this but I need to emulate RISC-V using qemu. As a start for my fuzzing project, how can I do give qemu an instruction set and get the changes in the registries as an output.
I probably understand your question. Because I don't have a riscv-related environment here, I can only provide a solution.
For example, in riscv, we design a function to get the values of all registers, relying on qemu's plugin module (such as qemu_plugin_register_vcpu_insn_exec_cb()).
plugin_test.c
#include <inttypes.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <glib.h>
#include <qemu-plugin.h>
QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
#define CPU_SIZE 32
static int cpu_num;
static int cpu_value[CPU_SIZE]={0};
static void vcpu_insn_exec_before(unsigned int cpu_index, void *)
{
for (size_t i = 0; i < cpu_num; i++)
{
/* code */
for (size_t j = 0; j < CPU_SIZE; i++)
{
if(cpu_value[j] != get_cpu_register(i,j)) {
// The value of cpu has changed
...
} else {
// The value of cpu has not changed
...
}
}
}
}
static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
{
size_t n = qemu_plugin_tb_n_insns(tb);
size_t i;
for (i = 0; i < n; i++) {
struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
qemu_plugin_register_vcpu_insn_exec_cb(
insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS,void *);
}
}
static void plugin_exit(qemu_plugin_id_t id, void *p)
{
}
QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
const qemu_info_t *info,
int argc, char **argv)
{
if(info->system_emulation) {
cpu_num = info->system.smp_vcpus;
} else {
cpu_num = 1;
}
qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
return 0;
}
api-ext.c
void *qemu_get_cpu(int index);
static uint32_t get_cpu_register(unsigned int cpu_index, unsigned int reg) {
uint8_t* cpu = qemu_get_cpu(cpu_index);
return *(uint32_t*)(cpu + 33488 + 5424 + reg * 4);
}
It should be noted that the content in api-ext.c is obtained from others. This is the function used to obtain the value of arm cpu. You need to check the source code or documentation for riscv.
I am using 2 Arduino, 1 Esp01,433 MHz 1 receiver and 1 433 MHz transmitter. The transmtiter sending uint_8 "85648217". When i write below the code.I can receive the messages and i can get response from api.
#include <ArduinoJson.h>
#include <RH_ASK.h>
#include <SPI.h>
#define RX 0
#define TX 1
using namespace std;
unsigned char rxBuf[512];
RH_ASK rf_driver;
void setup() {
rf_driver.init();
Serial.begin(9600);
}
void loop() {
{
uint8_t buf[8];
uint8_t buflen = sizeof(buf);
if (rf_driver.recv(buf, &buflen))
{
Serial.println("https://example.com/example/example/"+(String((char*)buf))+";headers\n");
//(String((char*)buf) is number from transmitter
String message = Serial.readString();
const size_t capacity = JSON_OBJECT_SIZE(1) + 20;
DynamicJsonDocument doc(capacity);
// DeserializationError error = deserializeJson(doc, message);
//if (error) {
// Serial.print(F("deserializeJson() failed: "));
//Serial.println(error.f_str());
// return;
// }
Serial.println(doc["attribute"].as<const char*>());
Serial.println(message);
}
}
}
When i write code like this.I can't receive message.I am receving "07413943⸮" or "07413943⸮ " .I don't know why is this happening.Does anyone know how can i fix this?
#include <ArduinoJson.h>
#include <RH_ASK.h>
#include <SPI.h>
#define RX 0
#define TX 1
using namespace std;
unsigned char rxBuf[512];
RH_ASK rf_driver;
void setup() {
rf_driver.init();
Serial.begin(9600);
}
void loop() {
{
uint8_t buf[8];
uint8_t buflen = sizeof(buf);
if (rf_driver.recv(buf, &buflen))
{
Serial.println("https://example.com/example/example/"+(String((char*)buf))+";headers\n");
//(String((char*)buf) is number from transmitter
String message = Serial.readString();
const size_t capacity = JSON_OBJECT_SIZE(1) + 20;
DynamicJsonDocument doc(capacity);
DeserializationError error = deserializeJson(doc, message);
if (error) {
Serial.print(F("deserializeJson() failed: "));
Serial.println(error.f_str());
return;
}
Serial.println(doc["attribute"].as<const char*>());
Serial.println(message);
}
}
}
I found the solution.It seems like i need to convert string this (String((char*)buf)) but not like this.Here is how i solved.
uint8_t buflen = sizeof(buf)
if (rf_driver.recv(buf, &buflen))
{
rf_driver.printBuffer("Got:",buf,buflen);
String rcv;
for (int i=0; i<buflen;i++){
rcv+=(char)buf[i];
}
Serial.print("example.com/example"+rcv+";header");
String input = Serial.readString();
StaticJsonDocument<32> doc;
DeserializationError error = deserializeJson(doc,input);
if (error) {
Serial.print(F("deserializeJson() failed: "));
Serial.println(error.f_str());
return;
}
float attribute= doc["attribute"]; // 428.5
Serial.println(float(attribute));
Serial.println(input);
TRANSMITTER.ino (COM8 port)
#include <SPI.h>
#include <nRF24L01.h>
#include <RF24.h>
#include<ArduinoJson.h>
RF24 radio(7, 8); // CE, CSN
const byte addresses[6] = {"00001"};
void setup() {
radio.begin();
radio.openWritingPipe(addresses);
radio.setPALevel(RF24_PA_MIN);
radio.stopListening();
}
void loop() {
DynamicJsonBuffer jBuffer;
JsonObject& root = jBuffer.createObject();
root["North"] = "true";
root["South"] = "false";
root["East"] = "true";
root["West"] = "true";
radio.write(&root, sizeof(root));
delay(1000);
}
RECEIVER.ino (COM9 port)
#include <SPI.h>
#include <nRF24L01.h>
#include <RF24.h>
#include<ArduinoJson.h>
RF24 radio(7, 8); // CE, CSN
const byte addresses[6] = {"00001"};
void setup() {
Serial.begin(9600);
radio.begin();
radio.openReadingPipe(0, addresses);
radio.setPALevel(RF24_PA_MIN);
radio.startListening();
}
void loop() {
if ( radio.available()) {
unsigned char data[1024];
radio.read(&data, sizeof(data));
StaticJsonBuffer<1024> jsonBuffer;
JsonObject& toor = jsonBuffer.parseObject(data);
String n = toor["North"];
String s = toor["South"];
String e = toor["East"];
String w = toor["West"];
Serial.println(n);
Serial.println(s);
Serial.println(e);
Serial.println(w);
delay(1000);
}
}
I am trying to transmit a JSON data over NRF24L01, and printing it to the serial monitor, but i dont see any output in my serial monitor(COM9 serial monitor). what is the mistake am i doing here?
I am using arduino JSON 5.13.5 version
I write a code that get first _var positions of a vector of possibilities (i.e., matrix _size*_var with _var=3 and _size=27) and calling this function in my kernel (32 threads, ie, each has an object) but I do not get any return value of the function neither the NULL pointer.
The program exit without error but the printf lines in the kernel is not executed or displayed (even compiled with sm_20 or higher) as if the program stopped before.
dataIntern.h:
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define _MIN -1
#define _MAX 1
#ifdef __CUDACC__
#define CUDA_CALLABLE_MEMBER __host__ __device__
#else
#define CUDA_CALLABLE_MEMBER
#endif
template <class a_type>
class dataIntern{
private:
a_type *possibilities;
int _assign;
int _size;
int _var;
int _maxsize;
public:
CUDA_CALLABLE_MEMBER dataIntern(){
}
CUDA_CALLABLE_MEMBER dataIntern(int var){
_var = var;
_size = (int)pow(3.0, (double)_var);
_maxsize = _size * _var;
_assign = 1;
possibilities = (a_type*)malloc(_maxsize*sizeof(a_type));
if(!possibilities){
exit(1);
}
createTable();
}
CUDA_CALLABLE_MEMBER void createTable(){
int i, j, k, limit, pos;
a_type value;
if(_assign == 1){
for(i=0; i<_var; i++){
#ifdef __CUDA_ARCH__
limit = (int)pow(3.0, _var-i-1);
#else
limit = (int)pow(3.0, (double)_var-i-1);
#endif
value = (a_type)_MIN;
k = 0;
for(j=0; j<_size; j++){
pos = _var*j+i;
if(k >= limit){
value++;
if(value > _MAX){
value = (a_type)_MIN;
}
k = 0;
}
possibilities[pos] = value;
k++;
}
}
}
}
CUDA_CALLABLE_MEMBER void print(){
int i;
printf("Printing.\n");
if(_assign == 1){
for(i=0; i<_size*_var; i++){
printf("%d ", possibilities[i]);
if(i%_var == _var-1){
printf("\n");
}
}
}
else{
printf("Not assigned.\n");
}
}
CUDA_CALLABLE_MEMBER void retify(int posChanged, a_type valueRetified){
int i, pos, count, initpos, attrib;
a_type *newnode;
a_type *newlist = NULL, *morelist = NULL;
pos = posChanged;
initpos = 0;
count = 0;
if(_assign == 1){
attrib = 0;
newnode = (a_type*)malloc(_var*sizeof(a_type));
for(i=0; i<_size; i++){
if(possibilities[pos] == valueRetified){
memcpy(newnode, &possibilities[i*_var], _var*sizeof(a_type));
count++;
if(newlist!=NULL){
morelist = (a_type*)malloc(count*_var*sizeof(a_type));
memcpy(morelist, newlist, (count-1)*_var*sizeof(a_type));
}
newlist = (a_type*)malloc(count*_var*sizeof(a_type));
memcpy(newlist, morelist, (count-1)*_var*sizeof(a_type));
memcpy(&newlist[initpos], newnode, _var*sizeof(a_type));
initpos+=_var;
attrib = 1;
}
pos+=_var;
}
if(attrib == 1){
_size = count;
possibilities = (a_type*)malloc(_size*_var*sizeof(a_type));
if(possibilities == NULL){
printf("Allocation fail in newlist retify.\n");
exit(1);
}
memcpy(possibilities, newlist, _size*_var*sizeof(a_type));
}
else{
_assign = 0;
}
}
}
CUDA_CALLABLE_MEMBER a_type* unstack(){
a_type* solution = NULL, *backup = NULL;
if(_assign == 1){
if(_size>0){
backup = (a_type*)malloc(_var*_size*sizeof(a_type));
if(backup == NULL){
printf("Erro to alloc backup pointer on unstack function in data intern\n");
return NULL;
}
solution = (a_type*)malloc(_var*sizeof(a_type));
if(solution == NULL){
printf("Erro to alloc solution pointer on unstack function in data intern\n");
return NULL;
}
memcpy(backup, possibilities, _size*_var*sizeof(a_type));
memcpy(solution, possibilities, _var*sizeof(a_type));
free(possibilities);
_size--;
possibilities = (a_type*)malloc(_size*_var*sizeof(a_type));
if(possibilities == NULL){
printf("Error to realloc possibilities pointer in data intern\n");
return NULL;
}
memcpy(possibilities, &backup[_var], _size*_var*sizeof(a_type));
free(backup);
return solution;
}
}
return NULL;
}
CUDA_CALLABLE_MEMBER int get_size(){
return _size;
}
CUDA_CALLABLE_MEMBER ~dataIntern(){
_assign = 0;
if(possibilities)
free(possibilities);
}
};
deviceCode.h:
#ifndef DEVICECODE_H
#define DEVICECODE_H
void CallingInMain();
__global__ void kernel();
#endif
deviceCode.cu:
#include "deviceCode.h"
#include "dataIntern.h"
#include <iostream>
#include <stdio.h>
//I declared like this to my kernel:
__global__ void kernel(){
__shared__ dataIntern<int> data[32];
int *vetor;
vetor = NULL;
data[threadIdx.x] = dataIntern<int>(3);
//_var == 3 in the class above
vetor = (int*)malloc(sizeof(int)*3);
vetor = data[threadIdx.x].unstack();
while(vetor!=NULL){
//never past here
printf("%d %d %d %d\n", threadIdx.x, vetor[0], vetor[1], vetor[2]);
vetor = data[threadIdx.x].unstack();
}
//neither here in if or else
if(vetor)
printf("Not null\n");
else
printf("Null final\n");
free(vetor);
}
void CallingInMain(){
kernel<<<1, 32>>>();
cudaDeviceSynchronize();
}
main.cu:
#include <iostream>
#include <stdio.h>
#ifndef deviceCode_H
#include "deviceCode.h"
#endif
int main(int argc, char* argv[]){
CallingInMain();
return 0;
}
Some colleagues pointed out to me that your code seems to have an error in it.
Consider this line in your kernel:
data[threadIdx.x] = dataIntern<int>(3);
This line instantiates a temporary dataIntern<int> object, runs the constructor with a value of 3 on it, and then does a copy from that object to the storage in data[threadIdx.x]. Note that the constructor performs a malloc operation:
CUDA_CALLABLE_MEMBER dataIntern(int var){
...
possibilities = (a_type*)malloc(_maxsize*sizeof(a_type));
But since the original object is temporary, the C++ standard allows the object to be deleted at the termination of the statement, i.e. at the semicolon here:
data[threadIdx.x] = dataIntern<int>(3);
^
after the copy-construction process is complete. But the deletion of the object triggers the destructor, which does a free operation on possibilities:
CUDA_CALLABLE_MEMBER ~dataIntern(){
_assign = 0;
if(possibilities)
free(possibilities);
}
Therefore usage of the pointer so allocated subsequent to this line of code:
data[threadIdx.x] = dataIntern<int>(3);
such as in unstack here:
vetor = data[threadIdx.x].unstack();
will be invalid.
This is a violation of C++ programming rules, and the error is not specific to CUDA.
I really tried to implement a function in C to multiply to row-major matrix in cublas. I don't know where I mistaking.
In the function below A, B and C are pointers to an row matrix correctly
allocated.
I'd like to keep the option of translate a matrix before perform the product.
The function below is not working.
void matrixMul(cublasHandle_t handle,float *A,float *B,float *C, int m,int n,int k,int transA,int transB){
cublasStatus_t stat ; // CUBLAS functions status
float alfa = 1;
float beta = 0;
int
ma = transA ? n:m,
na = transA ? m:n,
nb = transB ? k:n,
mb = transB ? n:k;
if(na!=mb){
puts("Something wrong");
}
//(mb,nb)(ma,na) = (mb,na)
stat= cublasSgemm_v2(handle, (cublasOperation_t) transB, (cublasOperation_t)transA,
nb,ma,mb,&alfa,
B,k,
A,n,&beta,
C,m);
switch (stat) {
case CUBLAS_STATUS_SUCCESS:
puts("Sucess");
break;
default:
printf(">>>>ERRO %d<<<<\n",stat);
break;
}
}
The entire source code
// Utilities and system includes
#include <assert.h>
#include <helper_string.h> // helper for shared functions common to CUDA Samples
// CUDA runtime
#include <cuda_runtime.h>
#include <cublas_v2.h>
// CUDA and CUBLAS functions
#include <helper_functions.h>
void getFromDevice(float *h_A,float *d_A,int size){
//printf("Copy input data from the host memory to the CUDA device\n");
cudaError_t err = cudaMemcpy(h_A, d_A, size, cudaMemcpyDeviceToHost);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy vector A from host to device (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
}
//A = (m,n)
//B = (n,k)
//C = (m,k)
void matrixMul(cublasHandle_t handle,float *A,float *B,float *C, int m,int n,int k,int transA,int transB){
cublasStatus_t stat ; // CUBLAS functions status
float alfa = 1;
float beta = 0;
int
ma = transA ? n:m,
na = transA ? m:n,
nb = transB ? k:n,
mb = transB ? n:k;
if(na!=mb){
puts("Something wrong");
}
//(mb,nb)(ma,na) = (mb,na)
stat= cublasSgemm_v2(handle, (cublasOperation_t) transB, (cublasOperation_t)transA,
nb,ma,mb,&alfa,
B,k,
A,n,&beta,
C,m);
switch (stat) {
case CUBLAS_STATUS_SUCCESS:
puts("Sucess");
break;
default:
printf(">>>>ERRO %d<<<<\n",stat);
break;
}
}
float *mallocfDevice(int size){
float *d_C = NULL;
cudaError_t err = cudaMalloc((void **)&d_C, size * sizeof(float));
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to allocate device vector C (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}else{
size_t freeM, total;
cudaMemGetInfo ( &freeM, &total);
printf("MEM:%.3f\n",freeM,total,100 - ((double)freeM/total)*100 );
}
return d_C;
}
void printHostMatrix(int nl, int nc, float *h_s){
for(int j = 0; j < nl ; j++) {
for(int i = 0; i < (nc) ; i++){
int idx = j*nc + i;
printf("%.2f ", h_s[idx]);
}
printf("\n");
}
}
void printfDeviceMatrix(float *d_s,int m, int p){
float *h_s =(float*) malloc(sizeof(float)*m*p);
getFromDevice(h_s,d_s,sizeof(float)*m*p);
printHostMatrix(m,p,h_s);
free(h_s);
}
void sendTofDevice(float *h_A,float *d_A,int size){
//printf("Copy input data from the host memory to the CUDA device\n");
cudaError_t err = cudaMemcpy(d_A, h_A, size*sizeof(float), cudaMemcpyHostToDevice);
if (err != cudaSuccess)
{
fprintf(stderr, "Failed to copy vector A from host to device (error code %s)!\n", cudaGetErrorString(err));
exit(EXIT_FAILURE);
}
}
int main(int argc,char **argv){
int ma = 2,
na = 3,
mb = 3,
nb = 2;
float A[] = { 1,2,3,
4,5,6};
float B[] = {7, 8,
9,10,
11,12};
float *C = new float[ma*nb];
float *d_a = mallocfDevice(ma*mb),
*d_b = mallocfDevice(mb*nb),
*d_c = mallocfDevice(ma*nb);
sendTofDevice(A,d_a,ma*na);
sendTofDevice(B,d_b,mb*nb);
cublasHandle_t handle ; // CUBLAS context
cublasCreate (&handle );
puts("A");
printfDeviceMatrix(d_a,ma,na);
puts("B");
printfDeviceMatrix(d_b,mb,nb);
matrixMul(handle, d_a,d_b,d_c,
ma,na,nb,0,0);
puts("AB=C");
printfDeviceMatrix(d_c,ma,nb);
}
CUBLAS assumes that the matrix in the device is stored in column major:
"
where α and β are scalars, and A , B and C are matrices stored in column-major format with dimensions op ( A ) m × k , op ( B ) k × n and C m × n , respectively. Also, for matrix A
Read more at: http://docs.nvidia.com/cuda/cublas/index.html#ixzz3mSDJTWrM "
That means the matrix needs to be treated as differently on the device than on the host.