Saving a leaflet map as an html file - html

I've created a leaflet map of corn yield in Kansas using USDA NASS data. The problem I'm running into is exporting the leaflet into an html file using the command:
htmlwidgets::saveWidget(my_interactive_map, "kansas_corn2.html")
but I get this error:
Error in system.file(config, package = package) : 'package' must be of length 1
However, I can produce an html file by using Export > Save as Web Page.. from the Viewer pane.
How can I achieve the same export result using the command line?
My code for making the map is:
my_interactive_map <- tm_shape(STATE) +
tm_polygons("Value", textNA = "Not Reported",
title = unit_desc, palette=c('#8290af','#512888','#190019'),
auto.palette.mapping=FALSE, n = 6, style = "quantile", contrast = 0.9, colorNA = "#C0C0C0",
border.col = "#E8E8E8", showNA = FALSE, legend.is.portrait = FALSE,
legend.hist = FALSE, popup.vars = c("County: " = "COUNTY_NAME", "Value: " = "Value")) +
tm_credits("U.S. Department of Agriculture, National Agriculture Statistics Service") +
tm_format_World(title = paste(year_filt, prodn_practice_desc, commodity_desc, statisticcat_desc, "by",
agg_level_desc, "for", state, sep = " "))
my_interactive_map

You appear to be using the tmap library. For that you could use the function detailed here:
library(tmap)
save_tmap(my_interactive_map, "kansas_corn2.html")

Related

I am trying to create a hit using html file for amazon mturk

import boto3
#making client object
MTURK_SANDBOX = 'https://mturk-requester-sandbox.us-east-1.amazonaws.com'
mturk = boto3.client('mturk',
aws_access_key_id = "AKIA3RTXAGOQVVBX3PWF",
aws_secret_access_key = "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",
region_name='us-east-1',
endpoint_url = MTURK_SANDBOX
)
questionfile = open("/home/nm6088/mturk files/prac4_Jun27/index.html","r")
questions = questionfile .read()
localRequirements = [{
'QualificationTypeId': '00000000000000000071',
'Comparator': 'EqualTo',
'LocaleValues': [{
'Country': 'US'
}],
'RequiredToPreview': True
}]
hit = mturk.create_hit(
Title='Write a simple version of the test',
Description='A test HIT that requires the user to write a simple text.',
Keywords='simple, qualification, test',
Reward='0.01',
MaxAssignments=1,
LifetimeInSeconds=3600,
AssignmentDurationInSeconds=600,
AutoApprovalDelayInSeconds=200,
Question = questions,
QualificationRequirements=localRequirements
)
print ("A new HIT has been created. You can preview it here:")
print ("https://workersandbox.mturk.com/mturk/preview?groupId=" + hit['HIT']['HITGroupId'])
print ("HITID = " + hit['HIT']['HITId'] + " (Use to Get Results)")
botocore.exceptions.ClientError: An error occurred (ParameterValidationError) when calling the CreateHIT operation: There was an error parsing the XML question or answer data in your request. Please make sure the data is well-formed and validates against the appropriate schema. Details: cvc-elt.1.a: Cannot find the declaration of element 'HTMLQuestion'. (1656352060543 s)

Not saving html interactive file with R

I am trying to design a circos plot using BioCircos R package. BioCircos allows to save the plots as .html interactive files. However, when I run the package using RScript the saved .html file is empty. To save the .html file I used saveWidget option from htmlwidgets package. Is it something wrong with saveWidget option? The code I used follows:
#!/usr/bin/Rscript
######R script for BioCircos test
library(htmlwidgets)
library(BioCircos)
genomes <- list("chra1" = 217471166, "chra2" = 181034961, "chra3" = 153873357, "chra4" = 153961319, "chra5" = 164033575,
"chra6" = 154486312, "chra7" = 133565930, "chra8" = 147241510, "chra9" = 91218944, "chra10" = 52432566, "chrb1" = 843366180, "chrb2" = 842558404, "chrb3" = 707956555, "chrb4" = 635713434, "chrb5" = 567300182,
"chrb6" = 439630435, "chrb7" = 236595445, "chrb8" = 231667822, "chrb9" = 230778867, "chrb10" = 151572763, "chrb11" = 103205957) # custom genome
links_chromosomes_01 <- c("chra1", "chra2", "chra3", "chra4", "chra4", "chra5", "chra6", "chra7", "chra7", "chra8", "chra8", "chra9", "chra10") # Chromosomes on which the links should start
links_chromosomes_02 <- c("chrb2", "chrb3", "chrb1", "chrb9", "chrb10", "chrb4", "chrb5", "chrb6", "chrb1", "chrb8", "chrb3", "chrb7", "chrb6") # Chromosomes on which the links should end
links_pos_01 <- c(115060347, 102611974, 14761160, 128700431, 128681496, 42116205, 58890582, 40356090,
146935315, 136481944, 157464876, 39323393, 84752508, 136164354,
99573657, 102580613,
111139346, 120764772, 90748238, 122164776,
44933176, 18823342,
48771409, 128288229, 150613881, 18509106, 123913217, 51237349,
34237851, 53357604, 78270031,
25306417, 25320614,
94266153,
41447919, 28810876, 2802465,
45583472,
81968637, 27858237, 17263637,
30569409) ### links chra chromosomes
links_pos_02 <- c(410543481, 463189512, 825903588, 353914638, 354135472, 717707494, 643107332, 724899652,
583713545, 558756961, 642015290, 154999098, 340216235, 557731577,
643350872, 655077847,
85356666, 157889318, 226411560, 161566470,
109857786, 25338955,
473876792, 124495704, 46258030, 572314729, 141584107, 426419779,
531245660, 220131772, 353941099,
62422773, 62387030,
116923325,
76544045, 33452274, 7942164,
642047816,
215981114, 39278129, 23302654,
418922633) ### links chrb chromosomes
links_labels <- c("aldh1a3", "amh", "cyp26b1", "dmrt1", "dmrt3", "fgf20", "hhip", "srd5a3",
"amhr2", "dhh", "fgf9", "nr0b1", "rspo1", "wnt1",
"aldh1a2", "cyp19a1",
"lhx9", "pdgfb", "ptch2", "sox10",
"cbln1", "wt1",
"esr1", "foxl2", "gata4", "lrpprc", "serpine2", "srd5a2",
"asns", "ctnnb1", "srd5a1",
"cyp26a1", "cyp26c1",
"wnt4",
"ar", "nr5a1", "ptgds",
"fgf16",
"cxcr4", "pdgfa", "sox8",
"sox9")
tracklist <- BioCircosLinkTrack('myLinkTrack', links_chromosomes_01, links_pos_01,
links_pos_01, links_chromosomes_02, links_pos_02, links_pos_02,
maxRadius = 0.55, labels = links_labels)
#plotting results
plot_chra_chrb <- BioCircos(tracklist, genome = chra_chrb_genomes, genomeFillColor = "RdBu", chrPad = 0.02, displayGenomeBorder = FALSE, genomeLabelTextSize = "10pt", genomeTicksScale = 4e+3,
elementId = "chra_chrb_comp_plot_test.html")
saveWidget(plot_chra_chrb, "chra_chrb_comp_plot_test.html", selfcontained = F, libdir = "lib")
The command line to run this script:
Rscript /path_to/Circle_plot_test.r
I tried to use this script in RStudio (without saveWidget() command), however it took too long to run in my personnel computer and the results was not displayed. However, this could be due to memory usage setup because when I took off some data, the script easily generates the plot in RStudio and I am able to save it. Is there other way to save the .hmtl interactive files in R or am I doing something wrong using htmlwidgets package in my script?
Thanks all in advance for any help and comments.
When you said it took too long to run, that was a sign that something was wrong! You weren't getting anything when you used saveWidget, because there is nothing returned from BioCiros.
I found two things that are a problem. The first one will result in a blank output—you can't use a '.' in the element ID. This ID will be used in the HTML coding.
You were getting huge delays due to the scale you set for genomeTickScale. That scaling value is for a tick mark attribute. I'm not sure why you set it to .004. However, when I comment out that line, it renders immediately. I have no issues with saving the widget, either.
--One other thing, you had chra_chrb_genomes as the object name assigned to the parameter genome in the function BioCircos. I assumed it was the object genome from your question since it was the only unused object.
The only things I changed were in the BioCircos function:
(plot_chra_chrb <- BioCircos(tracklist, genome = genomes, #chra_chrb_genomes,
genomeFillColor = "RdBu",
chrPad = 0.02,
displayGenomeBorder = FALSE,
genomeLabelTextSize = "10pt",
# genomeTicksScale = 4e+3, # problematic
elementId = "chra_chrb_comp_plot_test" # no periods
))

Running timeseries graphing function in Rmd producing cluttered x-axis labels (not present in test code)

I have a folder of xx .csv timeseries that I want to graph and knit into a clean HTML document. I have a ggplot code that produces the plot that I want using a single timeseries.csv. However, when I try to put the bones of that ggplot code in a function inside of a for loop to run each of the timeseries.csv files through the function I get a some plots with pretty different formatting.
Plot generated with my test ggplot code:
Plot generated with function and for loop:
Changes I'm trying to make to the ugly Rmd plot:
Nicely space the x-axis tick marks to whole mins (i.e. "11:14:00", "11:15:00")
Connect the data points (solved with subbing geom_line() with geom_path())
Example Rmd Code Below. Please Note that the graphs produced still have nice formatting, I'm not sure how to reproduce this problem sort of posting a 500 row dataframe. I also don't know how to post my rmd code without SO using the formatting commands in this post, so I threw in at 3 of " around my header formatting and at the end of the code to disable it.
Edits and Updates
I am getting a persistent error geom_path: Each group consists of only one observation. Do you need to adjust the group
aesthetic?.
As suggested by the commenters I tried removing plot() and using the the createChlDiffPlot() directly and replacing plot() with print(). Both produce the same ugly plots as before.
Replaced geom_line() with geom_path(). The points are now connected! x-axis cluttering is still there.
Time variable is reading as hms num
Many thanks for any help on this!
```
---
title: "Chl Filtration"
output:
flexdashboard::flex_dashboard:
theme: yeti
orientation: rows
editor_options:
chunk_output_type: console
---
```{r setup}
library(flexdashboard)
library(dplyr)
library(ggplot2)
library(hms)
library(ggthemes)
library(readr)
library(data.table)
#### Example Data
df1 <- data.frame(Time = as_hms(c("11:22:33","11:22:34","11:22:35","11:22:38","11:23:00","11:23:01","11:23:02")),
Chl_ug_L_Up = c(0.2,0.1,0.25,-0.2,-0.3,-0.15,0.1),
Chl_ug_L_Down = c(0.5,0.4,0.3,0.2,0.1,0,-0.1))
df2 <- data.frame(Time = as_hms(c("08:02:33","08:02:34","08:02:35","08:02:40","08:02:42","08:02:43","08:02:49")),
Chl_ug_L_Up = c(-0.2,-0.1,-0.25,0.2,0.3,0.15,-0.1),
Chl_ug_L_Down = c(-0.1,0,0.1,0.2,0.3,0.4,0.1))
data_directory = "./" # data folder in R project folder in the real deal
output_directory = "./" # output graph directory in R project folder
write_csv(df1, file.path(data_directory, "SO_example_df1.csv"))
write_csv(df2, file.path(data_directory, "SO_example_df2.csv"))
#### Function to create graphs
createChlDiffPlot = function(aTimeSeriesFile, aFileName, aGraphOutputDirectory, aType)
{
aFile_Mod = aTimeSeriesFile %<>%
select(Time, Chl_ug_L_Up, Chl_ug_L_Down) %>%
mutate(Chl_diff = Chl_ug_L_Up - Chl_ug_L_Down)
one_plot = ggplot(data = aFile_Mod, aes(x = Time, y = Chl_diff)) + # tried adding 'group = 1' in aes to connect points
geom_path(size = 1, color = "green") +
geom_point(color = "green") +
theme_gdocs() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank()) +
labs(x = "", y = "Chl Difference", title = paste0(aFileName, " - ", "Filtration"))
one_graph_name = paste0(gsub(".csv", "", aFileName), "_", aType, ".pdf")
ggsave(one_graph_name, one_plot, dpi = 600, width = 7, height = 5, units = "in", device = "pdf", aGraphOutputDirectory)
return(one_plot)
}
"``` ### remove the quotes when running example
Plots - After Velocity Adjustment
=====================================" ### remove quotes when running example
```{r, fig.width=13.5, fig.height=5}
all_files_Filtration = list.files(data_directory, pattern = ".csv")
# Loop to plot function
for(file in 1 : length(all_files_Filtration))
{
file_name = all_files_Filtration[file]
one_file = fread(file.path(data_directory, file_name))
# plot the time series agains
plot(createChlDiffPlot(one_file, file_name, output_directory, "Velocity_Paired"))
}
"``` #remove quotes when running example
```
I finally figured it out.
1) Replacing geom_line() with geom_path() connected the data points when rendered in Rmd.
2) df1$Time was formatted as a difftime object. When I looked at the dataframe in the global environment, Time :hmsnum 11:11:09 .... This made me think my format was ok, but when I ran class(df1$Time) I got [1] "hms" "difftime". With a quick google I found out difftime objects are not quite the same as hms, and my original time was generated by subtracting times. I added a conversion into my mutate function:
select(Time, Chl_ug_L_Up, Chl_ug_L_Down) %>%
mutate(Chl_diff = Chl_ug_L_Up - Chl_ug_L_Down,
Time = as_hms(Time)) # convert difftime objecct to hms
ggplot I think has some auto-formatting for hms variables, which is why difftime variable was producing ugly crowded x- axes.

Is there a way to take a list of strings and create a JSON file, where both the key and value are list items?

I am creating a python script that can read scanned, and tabular .pdfs and extract some important data and insert it into a JSON to later be implemented into a SQL database (I will also be developing the DB as a project for learning MongoDB).
Basically, my issue is I have never worked with any JSON files before but that was the format I was recommended to output to. The scraping script works, the pre-processing could be a lot cleaner, but for now it works. The issue I run into is the keys, and values are in the same list, and some of the values because they had a decimal point are two different list items. Not really sure where to even start.
I don't really know where to start, I suppose since I know what the indexes of the list are I can easily assign keys and values, but then it may not be applicable to any .pdf, that is the script cannot be coded explicitly.
import PyPDF2 as pdf2
import textract
with "TestSpec.pdf" as filename:
pdfFileObj = open(filename, 'rb')
pdfReader = pdf2.pdfFileReader(pdfFileObj)
num_pages = pdfReader.numpages
count = 0
text = ""
while count < num_pages:
pageObj = pdfReader.getPage(0)
count += 1
text += pageObj.extractText()
if text != "":
text = text
else:
text = textract.process(filename, method="tesseract", language="eng")
def cleanText(x):
'''
This function takes the byte data extracted from scanned PDFs, and cleans it of all
unnessary data.
Requires re
'''
stringedText = str(x)
cleanText = stringedText.replace('\n','')
splitText = re.split(r'\W+', cleanText)
caseingText = [word.lower() for word in splitText]
cleanOne = [word for word in caseingText if word != 'n']
dexStop = cleanOne.index("od260")
dexStart = cleanOne.index("sheet")
clean = cleanOne[dexStart + 1:dexStop]
return clean
cleanText = cleanText(text)
This is the current output
['n21', 'feb', '2019', 'nsequence', 'lacz', 'rp', 'n5', 'gat', 'ctc', 'tac', 'cat', 'ggc', 'gca', 'cat', 'ttc', 'ccc', 'gaa', 'aag', 'tgc', '3', 'norder', 'no', '15775199', 'nref', 'no', '207335463', 'n25', 'nmole', 'dna', 'oligo', '36', 'bases', 'nproperties', 'amount', 'of', 'oligo', 'shipped', 'to', 'ntm', '50mm', 'nacl', '66', '8', 'xc2', 'xb0c', '11', '0', '32', '6', 'david', 'cook', 'ngc', 'content', '52', '8', 'd260', 'mmoles', 'kansas', 'state', 'university', 'biotechno', 'nmolecular', 'weight', '10', '965', '1', 'nnmoles']
and we want the output as a JSON setup like
{"Date | 21feb2019", "Sequence ID: | lacz-rp", "Sequence 5'-3' | gat..."}
and so on. Just not sure how to do that.
here is a screenshot of the data from my sample pdf
So, i have figured out some of this. I am still having issues with grabbing the last 3rd of the data i need without explicitly programming it in. but here is what i have so far. Once i have everything working then i will worry about optimizing it and condensing.
# for PDF reading
import PyPDF2 as pdf2
import textract
# for data preprocessing
import re
from dateutil.parser import parse
# For generating the JSON file array
import json
# This finds and opens the pdf file, reads the data, and extracts the data.
filename = "*.pdf"
pdfFileObj = open(filename, 'rb')
pdfReader = pdf2.PdfFileReader(pdfFileObj)
text = ""
pageObj = pdfReader.getPage(0)
text += pageObj.extractText()
# checks if extracted data is in string form or picture, if picture textract reads data.
# it then closes the pdf file
if text != "":
text = text
else:
text = textract.process(filename, method="tesseract", language="eng")
pdfFileObj.close()
# Converts text to string from byte data for preprocessing
stringedText = str(text)
# Removed escaped lines and replaced them with actual new lines.
formattedText = stringedText.replace('\\n', '\n').lower()
# Slices the long string into a workable piece (only contains useful data)
slice1 = formattedText[(formattedText.index("sheet") + 10): (formattedText.index("secondary") - 2)]
clean = re.sub('\n', " ", slice1)
clean2 = re.sub(' +', ' ', clean)
# Creating the PrimerData dictionary
with open("PrimerData.json",'w') as file:
primerDataSlice = clean[clean.index("molecular"): -1]
primerData = re.split(": |\n", primerDataSlice)
primerKeys = primerData[0::2]
primerValues = primerData[1::2]
primerDict = {"Primer Data": dict(zip(primerKeys,primerValues))}
# Generatring the JSON array "Primer Data"
primerJSON = json.dumps(primerDict, ensure_ascii=False)
file.write(primerJSON)
# Grabbing the date (this has just the date, so json will have to add date.)
date = re.findall('(\d{2}[\/\- ](\d{2}|january|jan|february|feb|march|mar|april|apr|may|may|june|jun|july|jul|august|aug|september|sep|october|oct|november|nov|december|dec)[\/\- ]\d{2,4})', clean2)
Without input data it is difficult to give you working code. A minimal working example with input would help. As for JSON handling, python dictionaries can dump to json easily. See examples here.
https://docs.python-guide.org/scenarios/json/
Get a json string from a dictionary and write to a file. Figure out how to parse the text into a dictionary.
import json
d = {"Date" : "21feb2019", "Sequence ID" : "lacz-rp", "Sequence 5'-3'" : "gat"}
json_data = json.dumps(d)
print(json_data)
# Write that data to a file
So, I did figure this out, the problem was really just that because of the way my pre-processing was pulling all the data into a single list wasn't really that great of an idea considering that the keys for the dictionary never changed.
Here is the semi-finished result for making the Dictionary and JSON file.
# Collect the sequence name
name = clean2[clean2.index("Sequence") + 11: clean2.index("Sequence") + 19]
# Collecting Shipment info
ordered = input("Who placed this order? ")
received = input("Who is receiving this order? ")
dateOrder = re.findall(
r"(\d{2}[/\- ](\d{2}|January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|July|Jul|August|Aug|September|Sep|October|Oct|November|Nov|December|Dec)[/\- ]\d{2,4})",
clean2)
dateReceived = date.today()
refNo = clean2[clean2.index("ref.No. ") + 8: clean2.index("ref.No.") + 17]
orderNo = clean2[clean2.index("Order No.") +
10: clean2.index("Order No.") + 18]
# Finding and grabbing the sequence data. Storing it and then finding the
# GC content and melting temp or TM
bases = int(clean2[clean2.index("bases") - 3:clean2.index("bases") - 1])
seqList = [line for line in clean2 if re.match(r'^[AGCT]+$', line)]
sequence = "".join(i for i in seqList[:bases])
def gc_content(x):
count = 0
for i in x:
if i == 'G' or i == 'C':
count += 1
else:
count = count
return round((count / bases) * 100, 1)
gc = gc_content(sequence)
tm = mt.Tm_GC(sequence, Na=50)
moleWeight = round(mw(Seq(sequence, generic_dna)), 2)
dilWeight = float(clean2[clean2.index("ug/OD260:") +
10: clean2.index("ug/OD260:") + 14])
dilution = dilWeight * 10
primerDict = {"Primer Data": {
"Sequence": sequence,
"Bases": bases,
"TM (50mM NaCl)": tm,
"% GC content": gc,
"Molecular weight": moleWeight,
"ug/0D260": dilWeight,
"Dilution volume (uL)": dilution
},
"Shipment Info": {
"Ref. No.": refNo,
"Order No.": orderNo,
"Ordered by": ordered,
"Date of Order": dateOrder,
"Received By": received,
"Date Received": str(dateReceived.strftime("%d-%b-%Y"))
}}
# Generating the JSON array "Primer Data"
with open("".join(name) + ".json", 'w') as file:
primerJSON = json.dumps(primerDict, ensure_ascii=False)
file.write(primerJSON)

sparkline R creating html table

I found package sparkline:https://github.com/htmlwidgets/sparkline
but I have no idea how to create in markdown/html data.frame with sparkcharts.
I know there is an example in link above, but I don't know how to create that data frame in html automatically.
I'm not really sure what your problem is or which data frame you refer to. The example on the link you provide is working perfectly. Try the following steps
Start R Studio
Install the sparkline package if you haven't
library(devtools)
install_github('htmlwidgets/sparkline')
Use File -> New File -> R markdown
Copy in the example from the htmlwidgets in the editor and hit the knitr button.
and you will get an html file with several examples.
I think this is what you are looking for: https://leonawicz.github.io/HtmlWidgetExamples/ex_dt_sparkline.html
you can also find a dummy example here:
R combining data tables DT package and sparkline package box plot with target value
As an example:
Boiled down dummy example as follows (disregard the last column in the data/table)
library(data.table)
library(DT)
library(sparkline)
hist.A<-rnorm(100)
hist.B<-rnorm(100)
hist.C<-rnorm(100)
current.A<-rnorm(1)
current.B<-rnorm(1)
current.C<-rnorm(1)
#whisker should show full range of data
boxval.A<-paste(quantile(hist.A,probs=c(0,0.25,0.5,0.75,1)),collapse = ",")
boxval.B<-paste(quantile(hist.B,probs=c(0,0.25,0.5,0.75,1)),collapse = ",")
boxval.C<-paste(quantile(hist.C,probs=c(0,0.25,0.5,0.75,1)),collapse = ",")
data<-data.frame(Variable=c("A","B","C"),Current=c(current.A,current.B,current.C),boxplot=c(boxval.A,boxval.B,boxval.C))
data$boxWithTarget<-paste(data$boxplot,data$Current,Sep=",")
cd <- list(list(targets = 2, render = JS("function(data, type, full){ return '<span class=sparkSamples>' + data + '</span>' }")))
line_string <- "type: 'line', lineColor: 'black', fillColor: '#ccc', highlightLineColor: 'orange', highlightSpotColor: 'orange'"
box_string <- "type: 'box', raw:true, showOutliers:false,lineColor: 'black', whiskerColor: 'black', outlierFillColor: 'black', outlierLineColor: 'black', medianColor: 'black', boxFillColor: 'orange', boxLineColor: 'black'"
cb = JS("function (oSettings, json) {\n $('.sparkSeries:not(:has(canvas))').sparkline('html', { ",
line_string, " });\n $('.sparkSamples:not(:has(canvas))').sparkline('html', { ",
box_string, " });\n}")
d <- datatable(data.table(data), rownames = FALSE, options = list(columnDefs = cd,
fnDrawCallback = cb))
d$dependencies <- append(d$dependencies, htmlwidgets:::getDependency("sparkline"))
d