How to tell OCaml compiler the path of recently installed module - csv

I already have OCaml installed on my Mac by running these commands:
$ brew install opam
$ opam init --bare -a -y
$ opam switch create cs3110-2022fa ocaml-base-compiler.4.14.0
Running any OCaml code which only uses Standard library modules works fine. Then I want to do a few things that are not covered with it, e.g reading CSV files.
$ opam install csv
Let's try to compile this code
open Printf
open Csv
let embedded_csv = "\
\"Banner clickins\"
\"Clickin\",\"Number\",\"Percentage\",
\"brand.adwords\",\"4,878\",\"14.4\"
\"vacation.advert2.adwords\",\"4,454\",\"13.1\"
\"affiliates.generic.tc1\",\"1,608\",\"4.7\"
\"brand.overture\",\"1,576\",\"4.6\"
\"vacation.cheap.adwords\",\"1,515\",\"4.5\"
\"affiliates.generic.vacation.biggestchoice\",\"1,072\",\"3.2\"
\"breaks.no-destination.adwords\",\"1,015\",\"3.0\"
\"fly.no-destination.flightshome.adwords\",\"833\",\"2.5\"
\"exchange.adwords\",\"728\",\"2.1\"
\"holidays.cyprus.cheap\",\"574\",\"1.7\"
\"travel.adwords\",\"416\",\"1.2\"
\"affiliates.vacation.generic.onlinediscount.200\",\"406\",\"1.2\"
\"promo.home.topX.ACE.189\",\"373\",\"1.1\"
\"homepage.hp_tx1b_20050126\",\"369\",\"1.1\"
\"travel.agents.adwords\",\"358\",\"1.1\"
\"promo.home.topX.SSH.366\",\"310\",\"0.9\""
let csvs =
List.map (fun name -> name, Csv.load name)
[ "examples/example1.csv"; "examples/example2.csv" ]
let () =
let ecsv = Csv.input_all(Csv.of_string embedded_csv) in
printf "---Embedded CSV---------------------------------\n" ;
Csv.print_readable ecsv;
List.iter (
fun (name, csv) ->
printf "---%s----------------------------------------\n" name;
Csv.print_readable csv
) csvs;
printf "Compare (Embedded CSV) example1.csv = %i\n"
(Csv.compare ecsv (snd(List.hd csvs)))
let () =
(* Save it to a file *)
let ecsv = Csv.input_all(Csv.of_string embedded_csv) in
let fname = Filename.concat (Filename.get_temp_dir_name()) "example.csv" in
Csv.save fname ecsv;
printf "Saved CSV to file %S.\n" fname
The result is:
$ ocamlopt csvdemo.ml -o csvdemo
File "csvdemo.ml", line 2, characters 5-8:
2 | open Csv
^^^
Error: Unbound module Csv
How to tell OCaml compiler where to find Csv module path?

This seems like a wonderful place to use Dune, but going a little bit old school you can use ocamlfind to locate the package.
% cat test2.ml
open Csv
let () = print_endline "hello"
% ocamlopt -I `ocamlfind query csv` -o test2 csv.cmxa test2.ml
% ./test2
hello
%
Or alternatively:
ocamlfind ocamlopt -package csv -o test2 cvs.cmxa test2.ml

Related

Nextflow rename barcodes and concatenate reads within barcodes

My current working directory has the following sub-directories
My Bash script
Hi there
I have compiled the above Bash script to do the following tasks:
rename the sub-directories (barcode01-12) taking information from the metadata.csv
concatenate the individual reads within a sub-directory and move them up in the $PWD
then I use these concatenated reads (one per barcode) for my Nextflow script below:
Query:
How can I get the above pre-processing tasks (renaming and concatenating) or the Bash script added at the beginning of my following Nextflow script?
In my experience, FASTQ files can get quite large. Without knowing too much of the specifics, my recommendation would be to move the concatenation (and renaming) to a separate process. In this way, all of the 'work' can be done inside Nextflow's working directory. Here's a solution that uses the new DSL 2. It uses the splitCsv operator to parse the metadata and identify the FASTQ files. The collection can then be passed into our 'concat_reads' process. To handle optionally gzipped files, you could try the following:
params.metadata = './metadata.csv'
params.outdir = './results'
process concat_reads {
tag { sample_name }
publishDir "${params.outdir}/concat_reads", mode: 'copy'
input:
tuple val(sample_name), path(fastq_files)
output:
tuple val(sample_name), path("${sample_name}.${extn}")
script:
if( fastq_files.every { it.name.endsWith('.fastq.gz') } )
extn = 'fastq.gz'
else if( fastq_files.every { it.name.endsWith('.fastq') } )
extn = 'fastq'
else
error "Concatentation of mixed filetypes is unsupported"
"""
cat ${fastq_files} > "${sample_name}.${extn}"
"""
}
process pomoxis {
tag { sample_name }
publishDir "${params.outdir}/pomoxis", mode: 'copy'
cpus 18
input:
tuple val(sample_name), path(fastq)
"""
mini_assemble \\
-t ${task.cpus} \\
-i "${fastq}" \\
-o results \\
-p "${sample_name}"
"""
}
workflow {
fastq_extns = [ '.fastq', '.fastq.gz' ]
Channel.fromPath( params.metadata )
| splitCsv()
| map { dir, sample_name ->
all_files = file(dir).listFiles()
fastq_files = all_files.findAll { fn ->
fastq_extns.find { fn.name.endsWith( it ) }
}
tuple( sample_name, fastq_files )
}
| concat_reads
| pomoxis
}

How to extract the name of a Party?

In a DAML contract, how do I extract the name of a party from a Party field?
Currently, toText p gives me Party(Alice). I'd like to only keep the name of the party.
That you care about the precise formatting of the resulting string suggests that you are implementing a codec in DAML. As a general principle DAML excels as a modelling/contract language, but consequently has limited features to support the sort of IO-oriented work this question implies. You are generally better off returning DAML values, and implementing codecs in Java/Scala/C#/Haskell/etc interfacing with the DAML via the Ledger API.
Still, once you have a Text value you also have access to the standard List manipulation functions via unpack, so converting "Party(Alice)" to "Alice" is not too difficult:
daml 1.0 module PartyExtract where
import Base.List
def pack (cs: List Char) : Text =
foldl (fun (acc: Text) (c: Char) -> acc <> singleton c) "" cs;
def partyToText (p: Party): Text =
pack $ reverse $ drop 2 $ reverse $ drop 7 $ unpack $ toText p
test foo : Scenario {} = scenario
let p = 'Alice'
assert $ "Alice" == partyToText p
In DAML 1.2 the standard library has been expanded, so the code above can be simplified:
daml 1.2
module PartyExtract2
where
import DA.Text
traceDebug : (Show a, Show b) => b -> a -> a
traceDebug b a = trace (show b <> show a) $ a
partyToText : Party -> Text
partyToText p = dropPrefix "'" $ dropSuffix "'" $ traceDebug "show party: " $ show p
foo : Scenario ()
foo = do
p <- getParty "Alice"
assert $ "Alice" == (traceDebug "partyToText party: " $ partyToText p)
NOTE: I have left the definition and calls to traceDebug so you can see the exact strings being generated in the scenario trace output.

Run R silently from command line, export results to JSON

How might I call an R script from the shell (e.g. from Node.js exec) and export results as JSON (e.g. back to Node.js)?
The R code below basically works. It reads data, fits a model, converts the parameter estimates to JSON, and prints them to stdout:
#!/usr/bin/Rscript --quiet --slave
install.packages("cut", repos="http://cran.rstudio.com/");
install.packages("Hmisc", repos="http://cran.rstudio.com/");
install.packages("rjson", repos="http://cran.rstudio.com/");
library(rjson)
library(reshape2);
data = read.csv("/data/records.csv", header = TRUE, sep=",");
mylogit <- glm( y ~ x1 + x2 + x3, data=data, family="binomial");
params <- melt(mylogit$coefficients);
json <- toJSON(params);
json
Here's how I'd like to call it from Node...
var exec = require('child_process').exec;
exec('./model.R', function(err, stdout, stderr) {
var params = JSON.parse(stdout); // FAIL! Too much junk in stdout
});
Except the R process won't stop printing to stdout. I've tried --quiet --slave --silent which all help a little but not enough. Here's what's sent to stdout:
The downloaded binary packages are in
/var/folders/tq/frvmq0kx4m1gydw26pcxgm7w0000gn/T//Rtmpyk7GmN/downloaded_packages
The downloaded binary packages are in
/var/folders/tq/frvmq0kx4m1gydw26pcxgm7w0000gn/T//Rtmpyk7GmN/downloaded_packages
[1] "{\"value\":[4.04458733165933,0.253895751245782,-0.1142272181932,0.153106007464742,-0.00289013062471735,-0.00282580664375527,0.0970325223603164,-0.0906967639834928,0.117150317941983,0.046131890754108,6.48538603593323e-06,6.70646151749708e-06,-0.221173770066275,-0.232262366060079,0.163331098409235]}"
What's the best way to use R scripts on the command line?
Running R --silent --slave CMD BATCH model.R per the post below still results in a lot of extraneous text printed to model.Rout:
Run R script from command line
Those options only stop R's own system messages from printing, they won't stop another R function doing some printing. Otherwise you'll stop your last line from printing and you won't get your json to stdout!
Those messages are coming from install.packages, so try:
install.packages(-whatever-, quiet=TRUE)
which claims to reduce the amount of output. If it reduces it to zero, job done.
If not, then you can redirect stdout with sink, or run things inside capture.output.

Undefined symbol error when testing Netezza UDX

I'm trying to implement a Netezza UDX that uses R language through Rserve (therefore I use Rconnection library for C++). Currently I've written only the bare minimum required for the UDX to run.
It looks as follows:
#define MAIN
// Netezza UDX libraries
#include "udxinc.h"
#include "udxhelpers.h"
// C++ libraries
#include <cstdlib>
#include <cstring>
// external Rserve and R headers
#include "Rconnection.h"
#include "sisocks.h"
using namespace nz::udx::dthelpers;
using namespace nz::udx_ver2;
// extends UDF base class
class My_UDX : public Udf {
public:
Rconnection * rc;
My_UDX(UdxInit *pInit) : Udf(pInit) {
initsocks();
// IP address in place of ...
rc = new Rconnection("...", 6311);
};
~My_UDX() {
delete rc;
}
static Udf* instantiate(UdxInit *pInit);
virtual ReturnValue evaluate();
};
Udf* My_UDX::instantiate(UdxInit *pInit) {
return new My_UDX(pInit);
}
// called once for each row of data during execution
ReturnValue My_UDX::evaluate() {
int32 retval = 15;
NZ_UDX_RETURN_INT32(retval);
}
The problem arises when I try to test the UDX with a SQL query. My query looks like this:
SELECT * FROM table_name WHERE My_UDX(table_field) = 1;
And this is the error message I get:
Testing...
nzsql -d database_name -u my_username -pw my_password -f sql/test.sql
nzsql:sql/test.sql:1: ERROR: /dev/shm/spuplans/63_1_3.o: undefined symbol: _ZN11RconnectionC1EPKci : loading executable code
make: *** [update] Error 2
If I remove the Rconnection object, UDX works just fine.
I've checked different manuals about Netezza UDXs, Rserve, and Rconnection but with no results. Couldn't find any explanation to this error and what could be done to resolve it.
Then I assumed there might be something wrong with the compiling process, inclusion of needed libraries and flags. I'm not so familiar with makefiles so this is quite a possibility. My makefile looks like so:
# list of files
function_name = My_UDX
object_files = $(function_name).o_x86
spu_files = $(function_name).o_spu10
# database connection
db_call = nzsql -d database_name -u my_username -pw my_password -f
# path to directories
lib_dir = /home/nz/libs
r_connection_dir = /home/nz/Rconnection
all: update
update: compile
$(db_call) sql/unload.sql
cp $(object_files) $(lib_dir)
cp $(spu_files) $(lib_dir)
rm -f *.o_spu10 *.o_x86
$(db_call) sql/create.sql
$(db_call) sql/test.sql
compile:
nzudxcompile $(function_name).cpp --args -I$(r_connection_dir)
unload.sql just drops the previously loaded function and create.sql just creates it. The problem arises only when the test.sql with the previously shown SQL query is executed.
Have been trying to resolve this issue for quite a long time so any help would be appreciated!
As mentioned above by Alex, Rconnection had to be loaded as a shared library or compiled along with the UDF. Solved this by compiling with the UDF.
# for host environment
nzudxcompile --host Rconnection.cpp -o temp_rconnection.o_x86
nzudxcompile --host myudf.cpp -o temp_myudf.o_x86
nzudxcompile --host --objs temp_rconnection.o_x86 --objs temp_myudf.o_x86 -o myudf.o_x86
# for spu environment
nzudxcompile Rconnection.cpp --spu -o temp_rconnection.o_spu10
nzudxcompile myudf.cpp --spu -o temp_myudf.o_spu10
nzudxcompile --spu --objs temp_rconnection.o_spu10 --objs temp_myudf.o_spu10 -o myudf.o_spu10

How to read gzip-File with fopen in octave 3.2.4?

I need to open a gzip file with fopen. The manual (help fopen) explains to add b and z to the mode string:
[f, msg] = fopen('file.gz', 'rbz')
results to the error:
f = -1
msg =
rb and r work separately, but not with z. Do i misunderstand the manual?
An example file can be generated by
echo -e "1,2\n2,3\n3,4\n4,3\n5,5" | gzip > file.gz
The octave version 3.2.4 is caused by my operating system: Ubuntu 12.04.3 LTS
function data = zcatcsvfile(filename, firstline)
data = [];
[status, content] = system(cstrcat('zcat ', filename, ' | tail -n +', num2str(firstline)));
data = str2num(content);
endfunction
Use this function to read a gzipped file filename and read as first line firstline. If the file has a header of 5 lines:
data=zcatcsvfile('data.gz', 6)