Error upon emloying parallel processing of tabnet in tidymodels - tidymodels

I am trying to make use of tabnet with tidymodels and the Titanic dataset. Here is my code:
pacman::p_load(tidyverse,
tidymodels,
tabnet,
torch,
doParallel,
reprex)
data(Titanic)
Titanic <- as.data.frame(Titanic)
#partition data
set.seed(1711)
titanic_split <- initial_split(Titanic, prop = 0.75, strata = Survived)
titanic_train <- training(titanic_split)
titanic_test <- testing(titanic_split)
#create cross-validation folds of training data
set.seed(1712)
folds <- vfold_cv(titanic_train,
folds = 3,
strata = Survived)
# define recipes for different models
titanic_rec <- recipe(formula = Survived ~ ., data = titanic_train) %>%
update_role(Survived, new_role = "outcome") %>%
step_zv() %>%
step_novel(all_nominal_predictors()) %>%
step_dummy(all_nominal_predictors(), one_hot = TRUE) %>%
step_YeoJohnson()
juice(prep(titanic_rec))
#define model
tab_spec <- tabnet(
mode = "classification",
epochs = 1, batch_size = 16384, decision_width = tune(), attention_width = tune(),
num_steps = tune(), penalty = 0.000001, virtual_batch_size = 512, momentum = 0.6,
feature_reusage = 1.5, learn_rate = tune()
) %>%
set_engine("torch", verbose= T)
wf <- workflow() %>%
add_model(tab_spec) %>%
add_recipe(titanic_rec)
grid <-
wf %>%
extract_parameter_set_dials() %>%
update(
decision_width = decision_width(range = c(20, 40)),
attention_width = attention_width(range = c(20, 40)),
num_steps = num_steps(range = c(4, 6)),
learn_rate = learn_rate(range = c(-2.5, -1))
) %>%
grid_max_entropy(size = 8)
auc_metric <- metric_set(yardstick::roc_auc)
auc_ctrl <- control_race(
verbose_elim = TRUE)
auc_results <- wf %>%
tune_grid(
resamples = folds,
control = auc_ctrl,
grid = grid)
This works like a charm . If i try however to use paraller processing I´m getting an error:
cl7 <- makePSOCKcluster(7)
registerDoParallel(cl7)
auc_results <- wf %>%
tune_grid(
resamples = folds,
control = auc_ctrl,
grid = grid)
Warning message:
All models failed. Run show_notes(.Last.tune.result) for more information.
Upon running shoe_notes i get the following : unique notes:
Error in UseMethod("filter"): no applicable method for 'filter' applied to an object of class "NULL"
Anyone knows how to fix this ?

Related

Several lines with different style in Caption in both html and docx - flextable

I need to show data caption, computer name and period in the header of table.
I have also requirements: zebra theme, merging cells if needed. That's why I chose flextable.
Here is my code:
library(officer) # border settings library
library(flextable) # drawing tables library
library(dplyr)
Caption <- "<b><big>Computer01.domain.com</big></b><br>Network Interface<br>Gbit Total/sec<br><small>2021-05-14 18:04 to 2021-05-25 13:29</small>"
bold_border <- fp_border(color="gray", width =2)
std_border <- fp_border(color="gray")
stub <- "2021-05-14 01:40 to 2021-05-17 08:26"
table_data <- data.frame (
Instance = c("Intel[R] Ethernet 10G",
"Intel[R] Ethernet Converged Network Adapter _1",
"Intel[R] Ethernet Converged Network Adapter _2",
"Intel[R] Ethernet 10G",
"Intel[R] Gigabit"),
Max = c(2.45, 2.41, 2.29, 2.17, 0),
Avg = c(0.15, 0.15, 0.15, 0.17, 0)
)
table <- table_data %>% flextable() %>%
set_caption(caption = Caption , html_escape = F) %>%
bg(bg = "#579FAD", part = "header") %>%
color(color = "white", part = "header") %>%
theme_zebra(
odd_header = "#579FAD",
odd_body = "#E0EFF4",
even_header = "transparent",
even_body = "transparent"
) %>%
set_table_properties(width = 1, layout = "autofit") %>%
hline(part="all", border = std_border ) %>%
vline(part="all", border = std_border ) %>%
border_outer( border = bold_border, part = "all" ) %>%
fix_border_issues() %>%
set_header_labels(
values = list(Instance = InstanceName ) ) %>%
flextable::font (part = "all" , fontname = "Calibri")
save_as_docx( table, path = file.path("c:\\temp", "test01.docx") )
save_as_html (table, path = file.path("c:\\temp", "test01.html"))
Here is what I got in html which is okay for me:
But in docx format my header style is not applied:
How can I create header like I did for html that can be saved to both html and docx?
If I have to create separate tables - one for html, other for docx - it's not so good but acceptable options. That case my question how to create header I made in html but for docx format?

R: Converting ggplot objects to interactive graphs

I am using the R programming language. I am trying to take different types of graphs (bar graphs, pie charts) and put them on the same page. I generated some fake data and made several graphs - then I put them together (see : Combining Different Types of Graphs Together (R))
library(dplyr)
library(ggplot2)
library(cowplot)
library(gridExtra)
library(plotly)
date= seq(as.Date("2014/1/1"), as.Date("2016/1/1"),by="day")
var <- rnorm(731,10,10)
group <- sample( LETTERS[1:4], 731, replace=TRUE, prob=c(0.25, 0.22, 0.25, 0.25) )
data = data.frame(date, var, group)
data$year = as.numeric(format(data$date,'%Y'))
data$year = as.factor(data$year)
###Pie
Pie_2014 <- data %>%
filter((data$year == "2014")) %>%
group_by(group) %>%
summarise(n = n())
Pie_2014_graph = ggplot(Pie_2014, aes(x="", y=n, fill=group)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +ggtitle( "Pie Chart 2014")
Pie_2015 <- data %>%
filter((data$year == "2015")) %>%
group_by(group) %>%
summarise(n = n())
Pie_2015_graph = ggplot(Pie_2015, aes(x="", y=n, fill=group)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +ggtitle( "Pie Chart 2015")
Pie_total = data %>%
group_by(group) %>%
summarise(n = n())
Pie_total_graph = ggplot(Pie_total, aes(x="", y=n, fill=group)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +ggtitle( "Pie Chart Average")
###bars
Bar_years = data %>%
group_by(year, group) %>%
summarise(mean = mean(var))
Bar_years_plot = ggplot(Bar_years, aes(fill=group, y=mean, x=year)) +
geom_bar(position="dodge", stat="identity") + ggtitle("Bar Plot All Years")
Bar_total = data %>%
group_by(group) %>%
summarise(mean = n())
Bar_total_plot = ggplot(Bar_total, aes(x=group, y=mean, fill=group)) +
geom_bar(stat="identity")+theme_minimal() + ggtitle("Bar Plot Average")
#assembling the graphs can be done two different ways
#first way
g1 <- grid.arrange(Pie_2014_graph, Pie_2015_graph , Pie_total_graph, nrow = 1)
g2 <- grid.arrange(Bar_total_plot, Bar_years_plot, nrow = 1)
g = grid.arrange(g1, g2, ncol = 1)
#second way
# arrange subplots in rows
top_row <- plot_grid(Pie_2014_graph, Pie_2015_graph, Pie_total_graph)
middle_row <- plot_grid(Bar_years_plot, Bar_total_plot)
# arrange our new rows into combined plot
p <- plot_grid(top_row, middle_row, nrow = 2)
p
From here, I am trying to use the plotly::ggplotly() command to make the above output "interactive" (move the mouse over the graphs and see labels). I know that this works for individual plots:
ggplotly(Bar_years_plot)
However, this command does not seem to work with the "cowplot" and the "gridExtra" outputs:
#gridExtra version:
ggplotly(g)
Error in UseMethod("ggplotly", p) :
no applicable method for 'ggplotly' applied to an object of class "c('gtable', 'gTree', 'grob', 'gDesc')"
#cowplot version: (produces empty plot)
ggplotly(p)
Warning messages:
1: In geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]) :
geom_GeomDrawGrob() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issues
2: In geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]) :
geom_GeomDrawGrob() has yet to be implemented in plotly.
If you'd like to see this geom implemented,
Please open an issue with your example code at
https://github.com/ropensci/plotly/issues
Does anyone know if there is a quick way to use the ggplotly() function for objects created with "gridExtra" or "cowplot"?
I know that with a bit of work, it might be possible using "htmltools":
library(htmltools)
doc <- htmltools::tagList(
div(Pie_2014_graph, style = "float:left;width:50%;"),
div(Pie_2015_graph,style = "float:left;width:50%;"),
div(Pie_total_graph, style = "float:left;width:50%;"),
div(Bar_years_plot, style = "float:left;width:50%;"),
div(Bar_total_plot, style = "float:left;width:50%;"))
save_html(html = doc, file = "out.html")
But I am not sure how to do this.
Can someone please show me how to make the collections of graphs interactive either using ggplotly() or with htmltools()?
Thanks.
You should apply ggplotly() to the individual graphs, not the collection graphs.
For example:
Pie_2014_graph = ggplotly(ggplot(Pie_2014, aes(x="", y=n, fill=group)) +
geom_bar(stat="identity", width=1) +
coord_polar("y", start=0) +ggtitle( "Pie Chart 2014") )

R: expss package: Error in do.call(data.frame, c(x, alis)) : variable names are limited to 10000 bytes

I defined some table functions in R using the expss package to automate tabulation.
One of my tables wants to show cases or percentages on categories followed by the mean. The mean can be based on the same category variable or it can be defined to be another variable.
Overall the code works perfect. For some variables though I keep struggling with the error code "Error in do.call(data.frame, c(x, alis)) : variable names are limited to 10000 bytes"
the code for this table
Table2 = function (Q, banner=banner, caption , Q.mean, ddata=d, questlab=dquest, mis.val=999) {
x_totaln<-eval(substitute(x),ddata)
x_totaln[is.na(eval(substitute(Q),ddata))]<-NA
if(missing(Q.mean))
{Q_mean<-eval(substitute(Q),ddata)}
else
{Q_mean<-eval(substitute(Q.mean),ddata)}
Q_mean[Q_mean==mis.val]<-NA
if(missing(caption))
{caption<-eval(substitute(var_lab(Q_mean)),questlab)}
eval.parent(substitute(
{
banner %>%
tab_cells (x_totaln) %>%
tab_stat_cases(total_row_position = c("none"),label = 'N') %>%
tab_cells (Q) %>%
tab_stat_cases(total_row_position = c("none"),label = 'N') %>%
tab_stat_cpct(total_row_position = c("none"), label = '%') %>%
tab_cells (Q_mean) %>%
tab_stat_mean(label = 'Mean') %>%
tab_pivot (stat_position = "inside_rows") %>%
drop_c () %>%
custom_format2() %>%
set_caption(caption)
}
))
}
This code is overall working perfect.
Table2(Q8_cat)
For some variables though it generates the error code
Table2(age_cat,Q.mean=age,caption="Your age at the start of the programme?")
Error in do.call(data.frame, c(x, alis)) :
variable names are limited to 10000 bytes
19.
do.call(data.frame, c(x, alis))
while including the variables in the code works again perfect
Table2test = function () {
x_totaln<-eval(substitute(x),d)
x_totaln[is.na(eval(substitute(age_cat),d))]<-NA
Q_mean<-eval(substitute(age),d)
Q_mean[Q_mean==999]<-NA
banner %>%
tab_cells (x_totaln) %>%
tab_stat_cases(total_row_position = c("none"),label = 'N') %>%
tab_cells (age_cat) %>%
tab_stat_cases(total_row_position = c("none"),label = 'N') %>%
tab_stat_cpct(total_row_position = c("none"), label = '%') %>%
tab_cells (Q_mean) %>%
tab_stat_mean(label = 'Mean') %>%
tab_pivot (stat_position = "inside_rows") %>%
drop_c () %>%
custom_format2() %>%
set_caption("Your age at the start of the programme?")
}
Any advice? Or anyone any idea why the error occurs?
Thanks
When you substitute variables in some cases they are represented as structure. In this case there is no variable name in the expression but only value: tab_cells(structure(c(22, 23, 22, 23, ... many numbers))). And we try to use this long representation as name in the resulted table. But R has limitation on the length of the names. And here the function fails. Solution is quite simple - we will always set variable labels which we will use as names. So the following code run without any errors:
Table2 = function (Q, banner=banner, caption , Q.mean, ddata=d, questlab=dquest, mis.val=999) {
x_totaln<-eval(substitute(x),ddata)
x_totaln[is.na(eval(substitute(Q),ddata))]<-NA
var_lab(x_totaln) = "Total" # add label for total
if(missing(Q.mean))
{Q_mean<-eval(substitute(Q),ddata)}
else
{Q_mean<-eval(substitute(Q.mean),ddata)}
Q_mean[Q_mean==mis.val]<-NA
if(missing(caption))
{caption<-eval(substitute(var_lab(Q_mean)),questlab)}
eval.parent(substitute(
{
banner %>%
tab_cells (x_totaln) %>%
tab_stat_cases(total_row_position = c("none"),label = 'N') %>%
tab_cells (Q) %>%
tab_stat_cases(total_row_position = c("none"),label = 'N') %>%
tab_stat_cpct(total_row_position = c("none"), label = '%') %>%
tab_cells ("|" = Q_mean) %>% # "|" suppress label for mean
tab_stat_mean(label = 'Mean') %>%
tab_pivot (stat_position = "inside_rows") %>%
drop_c () %>%
custom_format2() %>%
set_caption(caption)
}
))
}

How can I get the data of the second web page?

I am trying to get the data from a web in R using rvest package: https://etfdb.com/stock/AAPL/
But no matter how I tried, I can only get the table of the first page. Could anybody help me do this? Thank you so much.
See code below. tb1 and tb2 are the same!! That's wired.
url1 <- "https://etfdb.com/stock/AAPL/#etfs&sort_name=weighting&sort_order=desc&page=1"
url2 <- "https://etfdb.com/stock/AAPL/#etfs&sort_name=weighting&sort_order=desc&page=2"
tbs1 <- rvest::html_nodes(xml2::read_html(url1), "table")
tbs2 <- rvest::html_nodes(xml2::read_html(url2), "table")
tb1 <- rvest::html_table(tbs1[1])[[1]]
tb2 <- rvest::html_table(tbs2[1])[[1]]
This website post GET requests to update JSON data to the table. After some attempts, this is the code I came up with to deal with JSON data: (not a beautiful code but it works)
library(rjson)
library(rvest)
library(writexl)
lastpage <- 9;
df <- data.frame();
for (i in 1:lastpage){
x <- fromJSON(file = paste("https://etfdb.com/data_set/?tm=40274&cond={%22by_stock%22:25}&no_null_sort=&count_by_id=true&limit=25&sort=weighting&order=desc&limit=25&offset=", 25 * (i-1), sep = ""));
x <- x[2][[1]];
pg_df <- data.frame(matrix(unlist(x), nrow=length(x), byrow=T),stringsAsFactors=FALSE);
df <- rbind(df, pg_df);
}
for (i in 1:nrow(df)){
df$X1[i] <- read_html(df$X1[i]) %>% html_text(trim = TRUE);
df$X3[i] <- read_html(df$X3[i]) %>% html_text(trim = TRUE);
df$X5[i] <- read_html(df$X5[i]) %>% html_text(trim = TRUE);
}
df <- data.frame(df$X1, df$X3, df$X5, df$X7, df$X9);
colnames(df) <- c("Ticker", "ETF", "ETFdb.com Category", "Expense Ratio", "Weighting");
write_xlsx(
df,
path = "stock.xlsx",
col_names = TRUE,
format_headers = TRUE,
use_zip64 = FALSE
)
Update:
You can see the data source at the attribute data-url of the table here:
I'll update the code that makes it easier for you:
library(rjson)
library(rvest)
library(writexl)
stock_ticket <- "AAPL";
url <- paste("https://etfdb.com/stock/", stock_ticket, sep = "");
lastpage <- 9;
df <- data.frame();
data_url <- read_html(url) %>% html_node(xpath = "//table[#id='etfs']") %>% html_attr("data-url");
for (i in 1:lastpage){
x <- fromJSON(file = paste("https://etfdb.com", data_url, "&offset=", 25 * (i-1), sep = ""));
x <- x[2][[1]];
pg_df <- data.frame(matrix(unlist(x), nrow=length(x), byrow=T),stringsAsFactors=FALSE);
df <- rbind(df, pg_df);
}
for (i in 1:nrow(df)){
df$X1[i] <- read_html(df$X1[i]) %>% html_text(trim = TRUE);
df$X3[i] <- read_html(df$X3[i]) %>% html_text(trim = TRUE);
df$X5[i] <- read_html(df$X5[i]) %>% html_text(trim = TRUE);
}
df <- data.frame(df$X1, df$X3, df$X5, df$X7, df$X9);
colnames(df) <- c("Ticker", "ETF", "ETFdb.com Category", "Expense Ratio", "Weighting");
write_xlsx(
df,
path = "stock.xlsx",
col_names = TRUE,
format_headers = TRUE,
use_zip64 = FALSE
)

Scraping table with multiple headers in R using any package? (XML, rCurl, rlist htmltab, rvest etc)

I am attempting to scrape this table
http://www.hko.gov.hk/cis/dailyExtract_e.htm?y=1999&m=1
Here are all my attempts. None of them get even close to extracting any information. Am i missing something?
library("rvest")
library("tidyverse")
# METHOD 1
url <- "http://www.hko.gov.hk/cis/dailyExtract_e.htm?y=1999&m=1"
data <- url %>%
read_html() %>%
html_nodes(xpath='//*[#id="t1"]/tbody/tr[1]') %>%
html_table()
data <- data[[1]]
# METHOD 2
library(XML)
library(RCurl)
library(rlist)
theurl <- getURL("http://www.hko.gov.hk/cis/dailyExtract_e.htm?y=1999&m=1",.opts = list(ssl.verifypeer = FALSE) )
tables <- readHTMLTable(theurl)
tables <- list.clean(tables, fun = is.null, recursive = FALSE)
n.rows <- unlist(lapply(tables, function(t) dim(t)[1]))
tables[[which.max(n.rows)]]
# METHOD 3
library(htmltab)
tab <- htmltab("http://www.hko.gov.hk/cis/dailyExtract_e.htm?y=1999&m=1",
which = '//*[#id="t1"]/tbody/tr[4]',
header = '//*[#id="t1"]/tbody/tr[3]',
rm_nodata_cols = TRUE)
# METHOD 4
website <-read_html("http://www.hko.gov.hk/cis/dailyExtract_e.htm?y=1999&m=1")
scraped <- website %>%
html_nodes("table") %>%
.[(2)] %>%
html_table(fill = TRUE) %>%
`[[`(1)
# METHOD 5
getHrefs <- function(node, encoding)
if (!is.null(xmlChildren(node)$a)) {
paste(xpathSApply(node, './a', xmlGetAttr, "href"), collapse = ",")
} else {
return(xmlValue(xmlChildren(node)$text))
}
data <- ( readHTMLTable("http://www.hko.gov.hk/cis/dailyExtract_e.htm?y=1999&m=1", which = 1, elFun = getHrefs) )
The expected results should be the 12 colnames in the table & the data below