How to Create Parametric Survival Learner for MLR in R - mlr

I am following the instructions (https://mlr.mlr-org.com/articles/tutorial/create_learner.html) to create a parametric survival learner to use with MLR. My code is below.
When I try to make the MakeLearner(id = "AFT", "surv.parametric"), I get an error
dist is missing and no default is set even though I already specified the dist default in my code to be "weibull".
makeRLearner.surv.parametric = function() {
makeRLearnerSurv(
cl = "surv.parametric",
package = "survival",
par.set = makeParamSet(
makeDiscreteLearnerParam(id = "dist", default = "weibull",
values = c("weibull", "exponential", "lognormal", "loglogistic")),
),
properties = c("numerics", "factors", "weights", "prob", "rcens"),
name = "Parametric Survival Model",
short.name = "Parametric",
note = "This is created based on MLR3 surv.parametric learner"
)
}
trainLearner.surv.parametric = function (.learner, .task, .subset, .weights = NULL, ...)
{
f = getTaskFormula(.task)
data = getTaskData(.task, subset = .subset)
if (is.null(.weights)) {
mod = survival::survreg(formula = f, data = data, ...)
}
else {
mod = survival::survreg(formula = f, data = data, weights = .weights, ...)
}
mod
}
predictLearner.surv.parametric = function (.learner, .model, .newdata, ...)
{
survival::predict.survreg(.model$learner.model, newdata = .newdata, type = "response", ...)
}

Based on here, the prediction function needs to return linear predictors and that would be lp not response. Also, the cindex function of MLR does not seem to be consistent with the output of SurvReg. Based on this discussion, adding a minus seems to resolve the issue. So the prediction function would be as below.
predictLearner.surv.reg = function(.learner, .model, .newdata, ...) {
-predict(.model$learner.model, newdata = .newdata, type = "lp", ...)
}

Related

How can I create RLearner_regr_QRNN.R?

Original question
I would like to create a new learner for the quantile regression neural network. It is not in the lists for the learning methods already integrated with "mlr". Its format must be like this "RLearner_regr_QRNN.R"
Added after answer was accepted
I would like to define the "quantile regression neural network" as a new type of learner that has special properties and does not fit into one of the existing schemes. My code is below. Code is working but when I use it as a learner for my data, it gives an error that 'qrnn' is not an exported object from 'namespace:qrnn'. I do in advance thank you so much and look forward to hearing from you soon.
makeRLearner.regr.qrnn = function() {
makeRLearnerRegr(
cl = "regr.qrnn",
package = "qrnn",
par.set = makeParamSet(
makeIntegerLearnerParam(id = "n.hidden", default = 2L, lower = 1L),
makeUntypedLearnerParam(id = "n.hidden2", default = NULL),
makeUntypedLearnerParam(id = "w", default = NULL),
makeNumericVectorLearnerParam(id = "tau", default = c(0.1, 0.5, 0.9)),
makeIntegerLearnerParam(id = "iter.max", default = 5000L),
makeIntegerLearnerParam(id = "n.trials", default = 5L),
makeNumericLearnerParam(id = "lower", default = 0),
makeNumericVectorLearnerParam(id = "init.range", default = c(-0.5, 0.5, -0.5, 0.5, -0.5, 0.5)),
makeUntypedLearnerParam(id = "monotone", default = NULL),
makeNumericVectorLearnerParam(id = "eps.seq", default =c(2^(-8),2^(-12),2^(-16),2^(-20),2^(-24),2^(-28),2^(-32))),
makeDiscreteLearnerParam(id = "Th", values =c("sigmoid", "elu", "softplus"),default = "sigmoid"),
makeDiscreteLearnerParam(id = "Th.prime", values=c("sigmoid.prime", "elu.prime","softplus.prime", default = "sigmoid.prime")),
makeNumericLearnerParam(id = "penalty", default = 0),
makeIntegerLearnerParam(id = "n.errors.max", default = 10),
makeLogicalLearnerParam(id = "trace", default = TRUE),
makeDiscreteLearnerParam(id = "method", values =c("nlm","adam"), default = "nlm")
),
par.vals = list(n.hidden=5L, penalty=0),
properties = c("numerics", "factors", "ordered", "oobpreds", "featimp", "se", "weights"),
name = "QRNN",
short.name = "qrnn",
callees = "qrnn"
)
}
#' #export
trainLearner.regr.qrnn = function(.learner, .task, .subset, .weights = NULL, ...) {
if (is.null(.weights)) {
f = getTaskFormula(.task)
qrnn::qrnn(f, data = getTaskData(.task, .subset), linout = TRUE, ...)
} else {
f = getTaskFormula(.task)
qrnn::qrnn(f, data = getTaskData(.task, .subset), linout = TRUE, weights = .weights, ...)
}
}
#' #export
predictLearner.regr.qrnn = function(.learner, .model, .newdata, ...) {
predict(.model$learner.model, newdata = .newdata, ...)[, 1L]
}
You can find instructions on how to create a custom learner on our website.
Also, you might want to think over creating that learner for the new mlr3 package instead. Instructions are here.

Kotlin - How to make a for loop that iterate and return multiple values

I created a function that iterates by a if statement over a list in order to find a match, when found I wanted to return the match value, but it only happen once, the return statements are at the end of the function and the if statement.
The question is, How can I avoid that this function stops after the first match?, is there another way?, other functions that im not using?
When i run this code I get this:
Anything
Not a match
Not a match
Here is my code:
class Class1(var self: String,var tipo: String,var element: String)
var test_class = Class1("","","")
fun giver(){
test_class.self = "Anything"
test_class.tipo = "Something"
test_class.element = "Nothing"
}
class Funciones(){
fun match_finder(texto: String): Any{
var lista = listOf<String>(test_class.self,test_class.tipo,test_class.element)
var lista_de_listas = listOf<String>("test_class.self","test_class.tipo","test_class.element")
var count = -1
var variable = ""
for (i in lista_de_listas){
count = count + 1
println(count)
if (texto == i){
lista_de_listas = lista
var variable = lista_de_listas[count]
return variable
}
}
return "Not a match"
}
}
fun main(){
giver()
var x = "test_class.self"
var z = "test.class.tipo"
var t = "test.class.element"
var funcion = Funciones()
var y = funcion.match_finder(x)
var c = funcion.match_finder(z)
var r = funcion.match_finder(t)
println(y)
println(c)
println(r)
}
You have some typos in your example. You query for test.class.tipo but in your lista_de_listas you have test_class.tipo with underline. The same is true for test.class.element.
But you should consider to use a Map instead of two list to the lookup:
fun match_finder(texto: String): Any{
val map = mapOf(
"test_class.self" to test_class.self,
"test_class.tipo" to test_class.tipo,
"test_class.element" to test_class.element
)
return map.getOrDefault(texto,"Not a match")
}

How can I describe a network for function approximation with CNTK BrainsScript?

I want a network that approximates arbitrary functions.
Here, I assume that this network has one input, (n) hidden layers, each layer have (n) nodes, and one output.
I use CNTK config file mainly.
How can I describe such a network in CNTK config file?
I've been trying it out with SimpleNetworkBuilder like below and using sine wave training data.
Here is the config file and training data. Please refer it.
https://drive.google.com/open?id=0B83LnG3hRTNGUGxvTHJfYmZuMEE
To evaluating this network, I slightly modified an example project in CNTK package, CNTKLibraryCPPEvalGPUExamples. I've only put 'EvalMultithreads.cpp' in above zip file.
This network begins learning iteration but it outputs wrong values on my evaluation process(I've been using C++ Eval Library).
I gave a few data to this evaluation program and it outputs '1' on all outputs.
This network obviously learnt wrong.
command = trainNetwork:testNetwork
precision = "float";
traceLevel = 1;
deviceId = 0;
rootDir = ".";
dataDir = ".";
outputDir = "./Output";
modelPath = "$outputDir$/Models/mynn2"
dimension = 1
labelDimension = 1
# TRAINING CONFIG
trainNetwork = {
action = "train"
SimpleNetworkBuilder = [
layerSizes = 1:50*1:1
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
]
SGD = [
epochSize = 10000
minibatchSize = 1
learningRatesPerSample = 0.0001
momentumAsTimeConstant = 0.0
maxEpochs = 1
]
reader = {
readerType = "CNTKTextFormatReader"
file = "train_sine.txt"
input = {
features = { dim = $dimension$; format = "dense" }
labels = { dim = $labelDimension$; format = "dense" }
}
}
}
# TEST CONFIG
testNetwork = {
action = "test"
minibatchSize = 1 # reduce this if you run out of memory
reader = {
readerType = "CNTKTextFormatReader"
file = "test.txt"
input = {
features = { dim = $dimension$; format = "dense" }
labels = { dim = $labelDimension$; format = "dense" }
}
}
}
When doing regression you should use SquaredError not CrossEntropyWithSoftmax.

Search box in network Plot

I have created a Network of protein mutations using the forceNetwork() function of the networkD3 package. It get's rendered on the RStudio's "Viewer" pane.
I can then save this as an HTML file, for sharing, with the dynamic nature (like clicking nodes, highlighting connections etc) preserved.
A png version of my network plot looks like below:
This is a representation of a top 20% in my original data, and the complete data looks even more huge and complex.
I need to be able to add a search to this forceNetwork, so that then specific nodes can be located in a complex network. The javascript or jquery part of this can be easily achieved by editing the copy of package networkD3 and repackaging. But my main challenge is adding the html code for including a search box.
My main R code looks like this:
library(networkD3)
library(XLConnect)
wb <- loadWorkbook("input.xlsx")
nodes <- readWorksheet(wb, sheet="Node", startRow = 1, startCol = 1, header = TRUE)
links <- readWorksheet(wb, sheet="Edges", startRow = 1, startCol = 1, header = TRUE)
fn <- forceNetwork(Links = links, Nodes = nodes,
Source = "Source", Target = "ID", Value = "Combo",
NodeID = "Mutation", linkDistance = JS('function(d){return d.value * 50;}'),
Nodesize = "IF", Group = "Combo", radiusCalculation = JS("d.nodesize+6"),
zoom = T, bounded = F, legend = T,
opacity = 0.8,
fontSize = 16 )
fn
My inspiration has come from the jsfiddle by Simon Raper.
What would be the best way to include a search in this situation? The option I have thought of is to first save the rendering as an html. Then read and edit the html and insert the piece of code for the search.
I tried to use Rhtml for this, but it doesn't seem trivial. Any pointers would be greatly appreciated.
Although I'm not crazy about this interactivity, I thought it would be a good opportunity for demonstrating how to use htmltools with htmlwidgets. Later, I will recreate with crosstalk, but for now, here is how I would replicate the example provided.
direct replication
library(htmltools)
library(networkD3)
data(MisLinks)
data(MisNodes)
# make a forceNetwork as shown in ?forceNetwork
fn <- forceNetwork(
Links = MisLinks, Nodes = MisNodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
Group = "group", opacity = 0.4, zoom = TRUE
)
fn <- htmlwidgets::onRender(
fn,
'
function(el,x){
debugger;
var optArray = [];
for (var i = 0; i < x.nodes.name.length - 1; i++) {
optArray.push(x.nodes.name[i]);
}
optArray = optArray.sort();
$(function () {
$("#search").autocomplete({
source: optArray
});
});
d3.select(".ui-widget button").node().onclick=searchNode;
function searchNode() {
debugger;
//find the node
var selectedVal = document.getElementById("search").value;
var svg = d3.select(el).select("svg");
var node = d3.select(el).selectAll(".node");
if (selectedVal == "none") {
node.style("stroke", "white").style("stroke-width", "1");
} else {
var selected = node.filter(function (d, i) {
return d.name != selectedVal;
});
selected.style("opacity", "0");
var link = svg.selectAll(".link")
link.style("opacity", "0");
d3.selectAll(".node, .link").transition()
.duration(5000)
.style("opacity", 1);
}
}
}
'
)
browsable(
attachDependencies(
tagList(
tags$head(
tags$link(
href="http://code.jquery.com/ui/1.11.0/themes/smoothness/jquery-ui.css",
rel="stylesheet"
)
),
HTML(
'
<div class="ui-widget">
<input id="search">
<button type="button">Search</button>
</div>
'
),
fn
),
list(
rmarkdown::html_dependency_jquery(),
rmarkdown::html_dependency_jqueryui()
)
)
)
crosstalk version
note: crosstalk is experimental, so this might change
I did not spend time to optimize and perfect, but here is a version that sort-of does the same thing as the example but using crosstalk instead of custom code and a jquery-ui autocomplete.
library(htmltools)
library(networkD3)
# demonstrate with experimental crosstalk
# this will get much easier once we start converting
# htmlwidgets to work natively with crosstalk
#devtoools::install_github("rstudio/crosstalk")
library(crosstalk)
data(MisLinks)
data(MisNodes)
# make a forceNetwork as shown in ?forceNetwork
fn <- forceNetwork(
Links = MisLinks, Nodes = MisNodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
Group = "group", opacity = 0.4, zoom = TRUE
)
sd <- SharedData$new(MisNodes, key=~name, group="grp1" )
# no autocomplete so not the same
# but will use this instead of writing something new
fs <- filter_select(
id = "filter-node",
label = "Search Nodes",
sharedData = sd,
group = ~name
)
fn <- htmlwidgets::onRender(
fn,
'
function(el,x){
// get the crosstalk group
// we used grp1 in the SharedData from R
var ct_grp = crosstalk.group("grp1");
debugger;
ct_grp
.var("filter")
.on("change", function(val){searchNode(val.value)});
function searchNode(filter_nodes) {
debugger;
//find the node
var selectedVal = filter_nodes? filter_nodes : [];
var svg = d3.select(el).select("svg");
var node = d3.select(el).selectAll(".node");
if (selectedVal.length===0) {
node.style("opacity", "1");
svg.selectAll(".link").style("opacity","1");
} else {
var selected = node.filter(function (d, i) {
return selectedVal.indexOf(d.name) >= 0;
});
node.style("opacity","0");
selected.style("opacity", "1");
var link = svg.selectAll(".link").style("opacity", "0");
/*
svg.selectAll(".node, .link").transition()
.duration(5000)
.style("opacity", 1);
*/
}
}
}
'
)
browsable(
tagList(
fs,
fn
)
)

Linq-2-Sql code: Does this scale?

I'm just starting to use linq to sql. I'm hoping that someone can verify that linq-2-sql has deferred execution until the foreach loop is executed. Over all, can someone tell me if this code scales. It's a simple get method with a few search parameters. Thanks!
Code:
public static IList<Content> GetContent(int contentTypeID, int feedID, DateTime? date, string text)
{
List<Content> contentList = new List<Content>();
using (DataContext db = new DataContext())
{
var contentTypes = db.ytv_ContentTypes.Where(p => contentTypeID == -1 || p.ContentTypeID == contentTypeID);
var feeds = db.ytv_Feeds.Where(p => p.FeedID == -1 || p.FeedID == feedID);
var targetFeeds = from f in feeds
join c in contentTypes on f.ContentTypeID equals c.ContentTypeID
select new { FeedID = f.FeedID, ContentType = f.ContentTypeID };
var content = from t in targetFeeds
join c in db.ytv_Contents on t.FeedID equals c.FeedID
select new { Content = c, ContentTypeID = t.ContentType };
if (String.IsNullOrEmpty(text))
{
content = content.Where(p => p.Content.Name.Contains(text) || p.Content.Description.Contains(text));
}
if (date != null)
{
DateTime dateTemp = Convert.ToDateTime(date);
content = content.Where(p => p.Content.StartDate <= dateTemp && p.Content.EndDate >= dateTemp);
}
//Execution has been defered to this point, correct?
foreach (var c in content)
{
Content item = new Content()
{
ContentID = c.Content.ContentID,
Name = c.Content.Name,
Description = c.Content.Description,
StartDate = c.Content.StartDate,
EndDate = c.Content.EndDate,
ContentTypeID = c.ContentTypeID,
FeedID = c.Content.FeedID,
PreviewHtml = c.Content.PreviewHTML,
SerializedCustomXMLProperties = c.Content.CustomProperties
};
contentList.Add(item);
}
}
//TODO
return contentList;
}
Depends on what you mean with 'scales'. DB side this code has the potential of causing trouble if you are dealing with large tables; SQL Server's optimizer is really poor at handling the "or" operator in where clause predicates and tend to fall back to table scans if there are multiple of them. I'd go for a couple of .Union calls instead to avoid the possibility that SQL falls back to table scans just because of the ||'s.
If you can share more details about the underlying tables and the data in them, it will be easier to give a more detailed answer...