How do I add significance asterisks next to my values in a correlation matrix heat map? - heatmap

I found this code online at http://www.sthda.com/english/wiki/ggplot2-quick-correlation-matrix-heatmap-r-software-and-data-visualization
It provides instructions for how to create a correlation matrix heat map and it works well. However, I was wondering how to get little stars * next to the values in the matrix that are significant. How would I go about doing that. Any help is greatly appreciated!!
mydata <- mtcars[, c(1,3,4,5,6,7)]
head(mydata)
cormat <- round(cor(mydata),2)
head(cormat)
library(reshape2)
melted_cormat <- melt(cormat)
head(melted_cormat)
library(ggplot2)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) +
geom_tile()
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
upper_tri <- get_upper_tri(cormat)
# Melt the correlation matrix
library(reshape2)
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Heatmap
library(ggplot2)
ggplot(data = melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
# Reorder the correlation matrix
cormat <- reorder_cormat(cormat)
upper_tri <- get_upper_tri(cormat)
# Melt the correlation matrix
melted_cormat <- melt(upper_tri, na.rm = TRUE)
# Create a ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value))+
geom_tile(color = "white")+
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") +
theme_minimal()+ # minimal theme
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 12, hjust = 1))+
coord_fixed()
# Print the heatmap
print(ggheatmap)
ggheatmap +
geom_text(aes(Var2, Var1, label = value), color = "black", size = 4) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))

cor() doesn't show the significance level, you may have to use rcorr() from Hmisc package
This is quite similar to what you want (the graphic output is not so nice though)
library(ggplot2)
library(reshape2)
library(Hmisc)
library(stats)
abbreviateSTR <- function(value, prefix){ # format string more concisely
lst = c()
for (item in value) {
if (is.nan(item) || is.na(item)) { # if item is NaN return empty string
lst <- c(lst, '')
next
}
item <- round(item, 2) # round to two digits
if (item == 0) { # if rounding results in 0 clarify
item = '<.01'
}
item <- as.character(item)
item <- sub("(^[0])+", "", item) # remove leading 0: 0.05 -> .05
item <- sub("(^-[0])+", "-", item) # remove leading -0: -0.05 -> -.05
lst <- c(lst, paste(prefix, item, sep = ""))
}
return(lst)
}
d <- mtcars
cormatrix = rcorr(as.matrix(d), type='spearman')
cordata = melt(cormatrix$r)
cordata$labelr = abbreviateSTR(melt(cormatrix$r)$value, 'r')
cordata$labelP = abbreviateSTR(melt(cormatrix$P)$value, 'P')
cordata$label = paste(cordata$labelr, "\n",
cordata$labelP, sep = "")
cordata$strike = ""
cordata$strike[cormatrix$P > 0.05] = "X"
txtsize <- par('din')[2] / 2
ggplot(cordata, aes(x=Var1, y=Var2, fill=value)) + geom_tile() +
theme(axis.text.x = element_text(angle=90, hjust=TRUE)) +
xlab("") + ylab("") +
geom_text(label=cordata$label, size=txtsize) +
geom_text(label=cordata$strike, size=txtsize * 4, color="red", alpha=0.4)
Source

difference_p is the P_value of correlation matrix,
ax5 draws the sns.heatmap and return as ax5
data=correlation_p
for y in range(data.shape[0]):
for x in range(data.shape[1]):
if data[y,x]<0.1:
ax4.text(x + 0.5, y + 0.5, '-',size=48,
horizontalalignment='center',
verticalalignment='center',
)

Related

R: Forcing Plotly to Save "Full Sized" Plots?

I am working with the R programming language. I am following the tutorial here :
I was able to make the following plot:
library(dplyr)
library(plotly)
data <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv")
data_2007 <- data[which(data$year == 2007),]
data_2007 <- data_2007[order(data_2007$continent, data_2007$country),]
slope <- 2.666051223553066e-05
data_2007$size <- sqrt(data_2007$pop * slope)
colors <- c('#4AC6B7', '#1972A4', '#965F8A', '#FF7070', '#C61951')
fig <- plot_ly(data_2007, x = ~gdpPercap, y = ~lifeExp, color = ~continent, size = ~size, colors = colors,
type = 'scatter', mode = 'markers', sizes = c(min(data_2007$size), max(data_2007$size)),
marker = list(symbol = 'circle', sizemode = 'diameter',
line = list(width = 2, color = '#FFFFFF')),
text = ~paste('Country:', country, '<br>Life Expectancy:', lifeExp, '<br>GDP:', gdpPercap,
'<br>Pop.:', pop))
fig <- fig %>% layout(title = 'Life Expectancy v. Per Capita GDP, 2007',
xaxis = list(title = 'GDP per capita (2000 dollars)',
gridcolor = 'rgb(255, 255, 255)',
range = c(2.003297660701705, 5.191505530708712),
type = 'log',
zerolinewidth = 1,
ticklen = 5,
gridwidth = 2),
yaxis = list(title = 'Life Expectancy (years)',
gridcolor = 'rgb(255, 255, 255)',
range = c(36.12621671352166, 91.72921793264332),
zerolinewidth = 1,
ticklen = 5,
gridwith = 2),
paper_bgcolor = 'rgb(243, 243, 243)',
plot_bgcolor = 'rgb(243, 243, 243)')
Then, I tried saving the file:
htmltools::save_html(html = fig, file = "file.html")
The problem is, that this plot is saving in a "stretched" format:
Is there any way that I can "force" R/plotly to save a "full sized" plot instead of this "stretched" version?
Thanks!
You could specify the width and height in plot_ly like this:
library(dplyr)
library(plotly)
data <- read.csv("https://raw.githubusercontent.com/plotly/datasets/master/gapminderDataFiveYear.csv")
data_2007 <- data[which(data$year == 2007),]
data_2007 <- data_2007[order(data_2007$continent, data_2007$country),]
slope <- 2.666051223553066e-05
data_2007$size <- sqrt(data_2007$pop * slope)
colors <- c('#4AC6B7', '#1972A4', '#965F8A', '#FF7070', '#C61951')
fig <- plot_ly(data_2007, x = ~gdpPercap, y = ~lifeExp, color = ~continent, size = ~size, colors = colors,
type = 'scatter', mode = 'markers', sizes = c(min(data_2007$size), max(data_2007$size)),
marker = list(symbol = 'circle', sizemode = 'diameter',
line = list(width = 2, color = '#FFFFFF')),
text = ~paste('Country:', country, '<br>Life Expectancy:', lifeExp, '<br>GDP:', gdpPercap,
'<br>Pop.:', pop),
autosize = F, width = 1200, height = 600)
fig <- fig %>% layout(title = 'Life Expectancy v. Per Capita GDP, 2007',
xaxis = list(title = 'GDP per capita (2000 dollars)',
gridcolor = 'rgb(255, 255, 255)',
range = c(2.003297660701705, 5.191505530708712),
type = 'log',
zerolinewidth = 1,
ticklen = 5,
gridwidth = 2),
yaxis = list(title = 'Life Expectancy (years)',
gridcolor = 'rgb(255, 255, 255)',
range = c(36.12621671352166, 91.72921793264332),
zerolinewidth = 1,
ticklen = 5,
gridwith = 2),
paper_bgcolor = 'rgb(243, 243, 243)',
plot_bgcolor = 'rgb(243, 243, 243)')
htmltools::save_html(html = fig, file = "file.html")
Created on 2022-08-31 with reprex v2.0.2
Output:
You can change the size to whatever you want.

Error while exporting graph in html format in R

I got this error when trying to plot a graph with plotly and export it using saveWidget: 'options' must be a fully named list, or have no names (NULL). Here is how I plot and save my graph:
Age <-c(69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183)
Pertubation <- c(0.002967936,-0.794788368,-1.561544673,-2.938300977,-2.956057282,-3.724813586,-3.84656989,-4.000326195,-3.491082499,-3.705838803,-3.008595108,-3.439351412,-2.518107716,-2.108864021,-1.892620325,-2.175376629,-2.547132934,-2.694889238,-3.985645543,-2.702401847,-3.203158151,-3.238914456,-2.60867076,-2.564427064,-3.254183369,-1.973939673,-0.731695977,-0.625452282,-0.966208586,-0.811964891,-0.018721195,0.320522501,-0.227233804,-0.046990108,-0.820746412,-0.846502717,1.067740979,1.470984675,1.00222837,0.448472066,1.139715762,1.275959457,0.989203153,0.904446848,0.607690544,0.20693424,1.084177935,1.114421631,0.625665327,0.292909022,-0.833847282,-2.208603586,-3.397359891,-4.339116195,-4.950872499,-5.518628804,-6.992385108,-7.023141413,-8.193897717,-8.580654021,-9.428410326,-10.92316663,-11.37392293,-12.21167924,-12.77643554,-13.14219185,-13.67394815,-14.19370446,-14.16046076,-15.29121706,-16.06197337,-16.11472967,-16.29348598,-17.62424228,-17.79699859,-19.42675489,-19.6625112,-20.3642675,-21.4460238,-22.05378011,-23.87553641,-24.06129272,-25.00604902,-25.80180533,-26.77656163,-27.40431793,-28.86407424,-29.45483054,-29.37758685,-30.48534315,-30.07209946,-29.86985576,-29.82561207,-30.46936837,-30.56412467,-30.24788098,-29.40463728,-29.76939359,-29.11314989,-29.0989062,-29.2466625,-29.1654188,-29.28217511,-29.11493141,-28.44568772,-28.65344402,-28.43520033,-28.71795663,-29.06871293,-29.66546924,-28.39422554,-29.68398185,-29.73973815,-29.34349446,-29.45325076)
temp <- cbind(Age, Pertubation)
Pertubation_graph <- plot_ly(data.frame(temp), x = ~Age, y = ~res,
cex.main = 1.5, cex.lab = 1.2, axes = F) %>%
layout(title = paste( "Pig ID:", i, ", Lambda =", lambda, "\nDifference between CFI and TTC"),mtext("Percentage of difference: CFI - TTC (%)", side=4, line = 3, cex = 1.2, col= "blue"), plot_bgcolor = "fffff", xaxis = list(title = 'Age (d)'),
yaxis = list(title = 'Amount of difference: CFI - TTC (kg)'),type = "p", pch = 10, cex = 0.5,
ylim = c(min(B$dif.CFI, res), max(B$dif.CFI, res)), cex.main = 1.5, cex.lab = 1.2, axes = F)
Pertubation_graph <- Pertubation_graph %>% layout(axis(1, at= seq(dif$eval_day[1],dif$eval_day[length(dif$eval_day)], by = 5), cex.axis = 1.1),
axis(2, at=, cex.axis = 1.1),
axis(4, at=, cex.axis = 1.1, col.axis = "blue", col = "blue"),
abline(crit1,0, col = "red", lty = 2) ,
abline(0,0, col = "red"))
saveWidget(ggplotly(Pertubation_graph), file=paste0("C:/Users/Kevin Le/PycharmProjects/Pig Data Black Box/Graphs/Step3_graphs/", idc, ".", ID[idc], ".html"))
Can anyone help me out? Thanks everyone in advance.
The dif data table is in this link: https://docs.google.com/spreadsheets/d/1-Xy0ct9GaWU0VLmpgbBA3CA8RLSffLPKlJ-P1bvrT5A/edit?usp=sharing

Plot and table in one figure in R markdown for HTML output

I'm working in Rbookdown and I want to place a plot and a table in one figure, how can I achieve that? Below is the code i used so far. Can you help?
```{r echo=FALSE, message=FALSE, warning=FALSE, fig.height = 3.5, out.width = '70%', fig.align = "center"}
library(knitr)
library(kableExtra)
library(tidyverse)
library(latex2exp)
options(scipen=999)
mu = 0
sigma = 1
x = 1
# draw normal distribution
range = seq(mu - 4*sigma, mu + 4*sigma, 0.01)
y = dnorm(range, mu, sigma)
plot(range, y,
main = "Standard Normal Distribution", xlab = "Z-score", ylab = " ",
type = 'l', ylim = c(0, max(y) + 0.01), axes = FALSE)
axis(1, at = seq(mu - 4*sigma, mu + 4*sigma, sigma))
# Add area to the left of x
cord.a = c(0, seq(min(range), x, 0.01))
cord.b = c(dnorm(seq(min(range), x, 0.01), mu, sigma), 0)
polygon(cord.a, cord.b, col = "#61a5ff")
text(x = 1.1, y = -.06, TeX('$z = 1.00$'), cex = .8, xpd=NA)
text(x = 0, y = .15, TeX('$p = .8413$'), cex = .8, xpd=NA)
# Create standard normal table
options(digits = 4)
u=seq(0,3.09,by=0.01)
p=pnorm(u)
m=matrix(p,ncol=10,byrow=TRUE)
df.m = as.data.frame(m)
z.values = c("**0.0**", "**0.1**", "**0.2**", "**0.3**", "**0.4**", "**0.5**", "**0.6**",
"**0.7**", "**0.8**", "**0.9**", "**1.0**", "**1.1**", "**1.2**", "**1.3**",
"**1.4**", "**1.5**", "**1.6**", "**1.7**", "**1.8**", "**1.9**","**2.0**",
"**2.1**", "**2.2**", "**2.3**", "**2.4**", "**2.5**", "**2.6**", "**2.7**",
"**2.8**", "**2.9**", "**3.0**")
df.z.values = as.data.frame(z.values)
new.m = df.z.values %>%
bind_cols(df.m)
kable(new.m,
booktabs = TRUE,
col.names = c("$Z$", "0.00","0.01", "0.02", "0.03", "0.04",
"0.05", "0.06", "0.07", "0.08", "0.09"),
escape = FALSE,
caption = "Standaard Normaalverdeling",
linesep = "",
align = c('r')) %>%
kable_styling(font_size = 10)
Try this solution:
```{r echo=FALSE, message=FALSE, warning=FALSE, include = FALSE}
library(kableExtra)
#make and save our table into working directory
table1 <- head(mtcars[1:5]) %>%
kbl() %>%
kable_styling(full_width = F) %>%
save_kable("tab_kbl.png")
#make and save our plot into working directory
png('norm_pl.png')
plot(rnorm(10))
dev.off()
```
```{r,echo=FALSE, message=FALSE, warning=FALSE, fig.cap="My image", fig.align = "center"}
library(cowplot)
#combine our images in the one
img1 <- ggdraw() + draw_image("norm_pl.png", scale = 1)
img2 <- ggdraw() + draw_image("tab_kbl.png", scale = 1)
plot_grid(img1, img2)
```
An another variant
```{r, fig.align='center', fig.cap="My beautiful image"}
library(gridExtra)
library(grid)
library(cowplot)
t1 <- tableGrob(head(mtcars[1:5]), theme = ttheme_minimal())
p2 <- ggplot(mtcars, aes(cyl, mpg)) +
geom_point()
plot_grid(t1, p2, ncol = 2, rel_widths = c(2,1))
```

Difference in Computation Speed and Results Between MLR and MLR3

I don't get similar results when I use the same data and models using mlr and mlr3. Also I find mlr runs at least 20-fold faster. I used lung data from survival and I was able to replicate the difference in computation speed and results since I can't share my data.
mlr was completed in 1 min with C-index generally low compared to mlr3 that took 21 min to complete with C-index being much higher despite using same data, same preprocessing, same model and setting seed.
library(tidyverse)
library(tidymodels)
library(PKPDmisc)
library(mlr)
library(parallelMap)
library(survival)
# Data and Data Splitting
data = as_tibble(lung) %>%
mutate(status = if_else(status==1, 0, 1),
sex = factor(sex, levels = c(1:2), labels = c("male", "female")),
ph.ecog = factor(ph.ecog))
na <- sample(1:228, 228*0.1)
data$sex[na] <- NA
data$ph.ecog[na]<- NA
set.seed(123)
split <- data %>% initial_split(prop = 0.8, strata = status)
train <- split %>% training()
test <- split %>% testing()
# Task
task = makeSurvTask(id = "Survival", data = train, target = c("time", "status"))
# Resample
# For model assessment before external validation on test data
set.seed(123)
outer_cv = makeResampleDesc("CV", iter=4, stratify.cols = c("status")) %>%
makeResampleInstance(task)
# For feature selection and parameter tuning
set.seed(123)
inner_cv = makeResampleDesc("CV", iter=4, stratify.cols = c("status"))
# Learners
cox1 = makeLearner(id = "COX1", "surv.coxph") %>%
makeImputeWrapper(classes = list(factor = imputeMode(), numeric = imputeMedian()),
# Create dummy variable for factor features
dummy.classes = "factor") %>%
makePreprocWrapperCaret(ppc.center = TRUE, ppc.scale = TRUE) %>%
makeFeatSelWrapper(resampling = inner_cv, show.info = TRUE,
control = makeFeatSelControlSequential(method = "sfs"))
cox_lasso = makeLearner(id = "COX LASSO", "surv.glmnet") %>%
makeImputeWrapper(classes = list(factor = imputeMode(), numeric = imputeMedian()),
# Create dummy variable for factor features
dummy.classes = "factor") %>%
# Normalize numeric features
makePreprocWrapperCaret(ppc.center = TRUE, ppc.scale = TRUE) %>%
makeTuneWrapper(resampling = inner_cv, show.info = TRUE,
par.set = makeParamSet(makeNumericParam("lambda",lower = -3, upper = 0,
trafo = function(x) 10^x)),
control = makeTuneControlGrid(resolution = 10L))
cox_net = makeLearner(id = "COX NET", "surv.glmnet") %>%
makeImputeWrapper(classes = list(factor = imputeMode(), numeric = imputeMedian()),
# Create dummy variable for factor features
dummy.classes = "factor") %>%
# Normalize numeric features
makePreprocWrapperCaret(ppc.center = TRUE, ppc.scale = TRUE) %>%
makeTuneWrapper(resampling = inner_cv, show.info = TRUE,
par.set = makeParamSet(makeNumericParam("alpha", lower = 0, upper = 1,
trafo = function(x) round(x,2)),
makeNumericParam("lambda",lower = -3, upper = 1,
trafo = function(x) 10^x)),
control = makeTuneControlGrid(resolution = 10L))
# Benchmark
# parallelStartSocket(4)
start_time <- Sys.time()
set.seed(123)
mlr_bmr = benchmark(learners = list(cox1, cox_lasso, cox_net),
tasks = task,
resamplings = outer_cv,
keep.extract= TRUE,
models = TRUE)
end_time <- Sys.time()
mlr_time = end_time - start_time
# parallelStop()
mlr_res <- getBMRPerformances(mlr_bmr, as.df = TRUE) %>%
select(Learner = learner.id, Task = task.id, Cindex = cindex) %>%
mutate(Color_Package = "mlr",
Learner = word(str_replace(Learner, "\\.", " "), 1, -2))
##################################################################
library(mlr3verse)
# Task
task2 = TaskSurv$new(id = "Survival2", backend = train, time = "time", event = "status")
task2$col_roles$stratum = c("status")
# Resmaple
set.seed(123)
outer_cv2 = rsmp("cv", folds = 4)$instantiate(task2)
# For feature selection and parameter tuning
set.seed(123)
inner_cv2 = rsmp("cv", folds = 4)
# Learners
preproc = po("imputemedian", affect_columns = selector_type("numeric")) %>>%
po("imputemode", affect_columns = selector_type("factor")) %>>%
po("scale") %>>%
po("encode")
cox2 = AutoFSelector$new(learner = as_learner(preproc %>>%
lrn("surv.coxph")),
resampling = inner_cv2,
measure = msr("surv.cindex"),
terminator = trm("none"), # need to increase later
fselector = fs("sequential", strategy = "sfs")) # sfs is the default
cox2$id = "COX1"
cox_lasso2 = AutoTuner$new(learner = as_learner(preproc %>>%
lrn("surv.glmnet",
lambda = to_tune(p_dbl(lower = -3, upper = 0,
trafo = function(x) 10^x)))),
resampling = inner_cv2,
measure = msr("surv.cindex"),
terminator = trm("none"),
tuner = tnr("grid_search", resolution = 10))
cox_lasso2$id = "COX LASSO"
cox_net2 = AutoTuner$new(learner = as_learner(preproc %>>%
lrn("surv.glmnet",
alpha = to_tune(p_dbl(lower = 0, upper = 1)),
lambda = to_tune(p_dbl(lower = -3, upper = 1,
trafo = function(x) 10^x)))),
resampling = inner_cv2,
measure = msr("surv.cindex"),
terminator = trm("none"),
tuner = tnr("grid_search", resolution = 10))
cox_net2$id = "COX NET"
# Benchmark
desgin = benchmark_grid(tasks = task2,
learners = c(cox2, cox_lasso2, cox_net2),
resamplings = outer_cv2)
# future::plan("multisession")
# Error: Output type of PipeOp select during training (Task) incompatible with input type of PipeOp surv.coxph (TaskSurv)
start_time <- Sys.time()
set.seed(123)
mlr3_bmr = mlr3::benchmark(desgin)
end_time <- Sys.time()
mlr3_time = end_time - start_time
mlr3_res <- as.data.table(mlr3_bmr$score()) %>%
select(Task=task_id, Learner=learner_id, Cindex=surv.harrell_c) %>%
mutate(Color_Package = "mlr3")
mlr_res %>%
bind_rows(mlr3_res) %>%
ggplot(aes(Learner, Cindex, fill= Color_Package )) +
geom_boxplot(position=position_dodge(.8)) +
stat_summary(fun= mean, geom = "point", aes(group = Color_Package ),
position=position_dodge(.8), size = 3) +
labs(x="", y = " C-Index") +
theme_bw() + base_theme() + theme(legend.position = "top")

R markdown html document not properly show in Internet Explorer

I create a rmarkdown html document and following is my code:
---
title: "PA"
output:
html_document:
css: custom.css
theme: journal
toc: true
toc_float:
collapsed: false
smooth_scroll: false
toc_depth: 4
---
<style>
.list-group-item.active, .list-group-item.active:focus, .list-group-item.active:hover {
background-color: blue;
}
</style>
<style type="text/css">
#TOC {
color: purple;
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
h1 { /* Header 1 */
color: DarkBlue;
}
</style>
<div class="datatables html-widget html-widget-static-bound"
id="htmlwidget-3efe8ca4aa087193f03e"
style="width:960px;height:500px;">
```{r, echo=FALSE, message=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = FALSE)
library(lubridate)
library(ggplot2)
library(plotly)
library(DT)
library(knitr)
library(htmltools)
library(scales)
PA <- read.csv("PA.csv" , header = T , colClasses = c("Year"="factor", "Month"="factor"))
PA$Month <- as.numeric(as.character(PA$Month))
PA$Year <- as.numeric(as.character(PA$Year))
PA$date_label <- ymd( paste( PA$Year, month.abb[ PA$Month ], 01, sep = "-"))
```
```{r include = FALSE}
# If I want to use for-loop to print htmltools::tagList, this chunk is very important.
# However, I don't know why must need it.
DT::datatable(PA, extensions = 'Buttons', options = list(dom = 'Bfrtip',
buttons = list(c(I('colvis'), 'copy', 'print'), list(extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download'
)), searchHighlight = TRUE
))
ggplotly(ggplot(PA))
plot_ly(PA)
```
```{r, echo=FALSE, message=FALSE, warning=FALSE, results = 'asis'}
red.bold.italic.text <- element_text(face = "bold.italic", color = "red")
colors <- c('rgb(211,94,96)', 'rgb(128,133,133)', 'rgb(144,103,167)', 'rgb(171,104,87)', 'rgb(114,147,203)')
for(i in unique(PA$Products)) {
cat(" \n#", i, "{.tabset .tabset-pills} \n")
cat(paste("## Summary"), sep = "\n")
print(knitr::kable(mutate(summarise(group_by(PA[PA$Products == i,], Year, Company),sum(EPSUM), sum(Incurred)), LR.percent = `sum(Incurred)`/`sum(EPSUM)`/0.01), format = 'html'))
cat(paste("## Line Graph {.tabset .tabset-pills}"), sep = "\n")
cat(paste("### Line1"), sep = "\n")
print(htmltools::tagList(
ggplotly(ggplot(PA[PA$Products == i,], aes(x=date_label , y=EPSUM, col=Company , group=Company))
+ geom_line(size = 1)
+ scale_x_date(name = "Month", date_labels = "%b %Y", date_breaks = "2 month" )
+ labs(title = "EPSUM", x = "Time", y = "")
+ theme(title = red.bold.italic.text, axis.title = red.bold.italic.text)
+ scale_y_continuous(labels = comma)
)))
cat(paste("### Line2"), sep = "\n")
print(htmltools::tagList(
ggplotly(ggplot(PA[PA$Products == i,], aes(x=date_label , y=EPSUM, col=Company , group=Company))
+ geom_line(size = 1)
+ scale_x_date(name = "Month", date_labels = "%b %Y", date_breaks = "5 month" )
+ labs(title = "EPSUM", x = "Time", y = "")
+ theme(title = red.bold.italic.text, axis.title = red.bold.italic.text)
+ scale_y_continuous(labels = comma)
+ facet_wrap(~Company)
)))
cat(paste("## Bar Chart {.tabset .tabset-pills}"), sep = "\n")
cat(paste("### BAR1"), sep = "\n")
print(htmltools::tagList(
ggplotly(ggplot(summarise(group_by(PA[PA$Products == i,], Year, Company), sum(EPSUM)), aes(x=Year, y=`sum(EPSUM)`, fill=Company))
+ geom_bar(stat="identity", position=position_dodge())
+ labs(title = "EPSUM", x = "Time", y = "")
+ theme(title = red.bold.italic.text, axis.title = red.bold.italic.text)
+ scale_y_continuous(labels = comma)
)))
cat(paste("### BAR2"), sep = "\n")
print(htmltools::tagList(
ggplotly(ggplot(summarise(group_by(PA[PA$Products == i,], Year, Company), sum(EPSUM)), aes(x=as.factor(Year), y=`sum(EPSUM)`, fill=Year))
+ geom_bar(stat="identity", position=position_dodge())
+ labs(title = "EPSUM", x = "Time", y = "")
+ theme(title = red.bold.italic.text, axis.title = red.bold.italic.text)
+ scale_y_continuous(labels = comma)
+ facet_wrap(~Company)
)))
cat(paste("## Pie Chart"), sep = "\n")
print(htmltools::tagList(
plot_ly(summarise(group_by(PA[PA$Products == i,], Company, Year), sum(EPSUM)), labels = ~Company, values = ~`sum(EPSUM)`, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
hoverinfo = 'text',
text = ~paste(Company, 'EPSUM', `sum(EPSUM)`),
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)),
showlegend = T) %>%
layout(title = '',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
))
cat(paste("## Table"), sep = "\n")
print(htmltools::tagList(
DT::datatable(PA[PA$Products == i,][,c(-1,-12)], extensions = c('Buttons','Scroller'),
rownames = F, selection = list(target = 'row+column'),
options = list(pageLength = 400, dom = 'Bfrtip', scrollY = 200, scroller = TRUE,
buttons = list(c(I('colvis'), 'copy', 'print'),
list(extend = 'collection', buttons = c('csv', 'excel', 'pdf'),
text = 'Download')),
searchHighlight = TRUE)) %>%
formatCurrency(4:10, '', interval = 3, mark = ",")
))
cat(" \n")
}
```
data introduction:
5191 obs. of 11 variables (variables are not important, just know that there are 6 variables should remember: Products, Year, Month, Company, EPSUM, Incurred)
My question is: when I knit the html document, it takes too much time, is there any way to decrease the time?
After I knit, Internet Explorer cannot properly show, but Google Chrome can. I think it is because ActiveX. However, after I open it, it still cannot properly show. Any suggestion?
Not properly show.
It should show like this.
I think this post also figures out many other users' questions about how to use for-loop in R markdowm. And there are few reference on StackOverFlow.
Very appreciate.