Related
I have a dataframe grouped by ID1, ID2, ID3 and variables V1, V2, V3 and V4. I am trying to capture values per group in R which are most different from median. For this, I have subtracted median from each value and squared that (bc there are some with negative values). Below is an example dataframe.
colnames <- c("ID1", "ID2", "ID3", "V1", "V2", "V3", "V4")
a <- c("A", "B", "C", "D")
b <- c("X", "Y", "Z", "T")
c <- c("1", "2", "3", "4")
d <- c(1.23,2.03,2.45,5.66)
e <- c(1,2,3,4)
df <-data.frame(a,b,c,d,e)
I have made a function med_removed as follows.
med_removed <- function(x, na.rm = TRUE, ...) {
mad <- sort((x- median(x, na.rm = T))^2)
y <- head(mad, 4)
y
}
df_selected <- df%>% group_by(ID1 ID2,ID3) %>%mutate_all(., med_removed)
The problem is that I want to select rows in the original dataframe based on the (x-median(x))^2 to pick up the top 2 values.
Does anyone know a good way of doing that.
Thanks
I'm looking to use the Excel Power Query to import some json that looks like the following (but much bigger, more fields etc.):
example-records.json
{
"records": {
"record_id_1": {
"file_no": "5792C",
"loads": {
"load_id_1": {
"docket_no": "3116115"
},
"load_id_2": {
"docket_no": "3116118"
},
"load_id_3": {
"docket_no": "3208776"
}
}
},
"record_id_2": {
"file_no": "5645C",
"loads": {
"load_id_4": {
"docket_no": "2000527155"
},
"load_id_5": {
"docket_no": "2000527156"
},
"load_id_6": {
"docket_no": "2000527146"
}
}
}
}
}
I want to get a table like the following at the load_id / docket level. A row per load_id
What I've tried
Clicking buttons in power query UI I get the following.
The problem is I can't include a file_no column and this doesn't work when there are lots of load ids.
let
Source = Json.Document(File.Contents("H:\Software\Site Apps\example-records.json")),
records = Source[records],
#"Converted to Table" = Record.ToTable(records),
#"Expanded Value" = Table.ExpandRecordColumn(#"Converted to Table", "Value", {"file_no", "loads"}, {"Value.file_no", "Value.loads"}),
#"Removed Columns" = Table.RemoveColumns(#"Expanded Value",{"Value.file_no"}),
#"Expanded Value.loads" = Table.ExpandRecordColumn(#"Removed Columns", "Value.loads", {"load_id_1", "load_id_2", "load_id_3", "load_id_4", "load_id_5", "load_id_6"}, {"Value.loads.load_id_1", "Value.loads.load_id_2", "Value.loads.load_id_3", "Value.loads.load_id_4", "Value.loads.load_id_5", "Value.loads.load_id_6"}),
#"Unpivoted Columns" = Table.UnpivotOtherColumns(#"Expanded Value.loads", {"Name"}, "Attribute", "Value"),
#"Expanded Value1" = Table.ExpandRecordColumn(#"Unpivoted Columns", "Value", {"docket_no"}, {"Value.docket_no"})
in
#"Expanded Value1"
You can use
let Source = JSON(Json.Document(File.Contents("c:\temp\example.json"))),
#"Removed Other Columns" = Table.SelectColumns(Source,{"Name.1", "Name.3", "Value"}),
#"Added Custom" = Table.AddColumn(#"Removed Other Columns", "Custom", each if [Name.3]=null then [Value] else null),
#"Filled Down" = Table.FillDown(#"Added Custom",{"Custom"}),
#"Filtered Rows" = Table.SelectRows(#"Filled Down", each ([Name.3] <> null))
in #"Filtered Rows"
based on this function I named JSON which comes from Imke https://www.thebiccountant.com/2018/06/17/automatically-expand-all-fields-from-a-json-document-in-power-bi-and-power-query/ which is reproduced below
let
func = (JSON) =>
let
Source = JSON,
ParseJSON = try Json.Document(Source) otherwise Source,
TransformForTable =
if Value.Is(ParseJSON, type record) then
Record.ToTable(ParseJSON)
else
#table(
{"Name", "Value"},
List.Zip({List.Repeat({0}, List.Count(ParseJSON)), ParseJSON})
),
AddSort = Table.Buffer(Table.AddColumn(TransformForTable, "Sort", each 0)),
LG = List.Skip(
List.Generate(
() => [Next = AddSort, Counter = 1, AddIndex = #table({"Sort"}, {{""}})],
each [AddIndex]{0}[Sort] <> "End",
each [
AddIndex = Table.AddIndexColumn([Next], "Index", 0, 1),
MergeSort = Table.CombineColumns(
Table.TransformColumnTypes(
AddIndex,
{{"Sort", type text}, {"Index", type text}},
"en-GB"
),
{"Sort", "Index"},
Combiner.CombineTextByDelimiter(".", QuoteStyle.None),
"Sort"
),
PJson = Table.TransformColumns(
MergeSort,
{{"Value", each try Json.Document(_) otherwise _}}
),
AddType = Table.AddColumn(
PJson,
"Type",
each
if Value.Is([Value], type record) then
"Record"
else if Value.Is([Value], type list) then
"List"
else if Value.Is([Value], type table) then
"Table"
else
"other"
),
AddStatus = Table.AddColumn(
AddType,
"Status",
each if [Type] = "other" then "Finished" else "Unfinished"
),
Finished = Table.SelectRows(AddStatus, each ([Status] = "Finished")),
Unfinished = Table.SelectRows(AddStatus, each ([Status] = "Unfinished")),
AddNext = Table.AddColumn(
Unfinished,
"Next",
each if [Type] = "Record" then {[Value]} else [Value]
),
RemoveCols = Table.RemoveColumns(AddNext, {"Value", "Type", "Status"}),
ExpandNext = Table.ExpandListColumn(RemoveCols, "Next"),
AddIndex2 = Table.AddIndexColumn(ExpandNext, "Index", 0, 1),
MergeSort2 = Table.CombineColumns(
Table.TransformColumnTypes(
AddIndex2,
{{"Sort", type text}, {"Index", type text}},
"en-GB"
),
{"Sort", "Index"},
Combiner.CombineTextByDelimiter(".", QuoteStyle.None),
"Sort"
),
TransformRecord = Table.TransformColumns(
MergeSort2,
{
{
"Next",
each try
Record.ToTable(_)
otherwise
try
if Value.Is(Text.From(_), type text) then
#table({"Value"}, {{_}})
else
_
otherwise
_
}
}
),
FilterOutNulls = Table.SelectRows(TransformRecord, each [Next] <> null),
Next =
if Table.IsEmpty(FilterOutNulls) then
#table({"Sort"}, {{"End"}})
else if Value.Is(FilterOutNulls[Next]{0}, type table) = true then
Table.ExpandTableColumn(
FilterOutNulls,
"Next",
{"Name", "Value"},
{"Name." & Text.From([Counter]), "Value"}
)
else
Table.RenameColumns(FilterOutNulls, {{"Next", "Value"}}),
Counter = [Counter] + 1
],
each Table.AddColumn([Finished], "Level", (x) => _[Counter] - 2)
)
),
Check = LG{2},
Combine = Table.Combine(LG),
Clean = Table.RemoveColumns(Combine, {"Status", "Type"}),
Trim = Table.TransformColumns(Clean, {{"Sort", each Text.Trim(_, "."), type text}}),
// Dynamic Padding for the sort-column so that it sorts by number in text strings
SelectSort = Table.SelectColumns(Trim, {"Sort"}),
SplitSort = Table.AddColumn(
SelectSort,
"Custom",
each List.Transform(try Text.Split([Sort], ".") otherwise {}, Number.From)
),
ToTable = Table.AddColumn(
SplitSort,
"Splitted",
each Table.AddIndexColumn(Table.FromColumns({[Custom]}), "Pos", 1, 1)
),
ExpandTable = Table.ExpandTableColumn(ToTable, "Splitted", {"Column1", "Pos"}),
GroupPos = Table.Group(
ExpandTable,
{"Pos"},
{{"All", each _, type table}, {"Max", each List.Max([Column1]), type text}}
),
Digits = Table.AddColumn(GroupPos, "Digits", each Text.Length(Text.From([Max]))),
FilteredDigits = List.Buffer(Table.SelectRows(Digits, each ([Digits] <> null))[Digits]),
SortNew = Table.AddColumn(
Trim,
"SortBy",
each Text.Combine(
List.Transform(
List.Zip({Text.Split([Sort], "."), List.Positions(Text.Split([Sort], "."))}),
each Text.PadStart(_{0}, FilteredDigits{_{1}}, "0")
),
"."
)
),
FilterNotNull = Table.SelectRows(SortNew, each ([Value] <> null)),
Reorder = Table.ReorderColumns(
FilterNotNull,
{"Value", "Level", "Sort", "SortBy"}
& List.Difference(
Table.ColumnNames(FilterNotNull),
{"Value", "Level", "Sort", "SortBy"}
)
),
Dots = Table.AddColumn(
#"Reorder",
"Dots",
each List.Select(Table.ColumnNames(#"Reorder"), (l) => Text.StartsWith(l, "Name"))
),
// This sort is just to view in the query editor. When loaded to the data model it will not be kept. Use "Sort by column" in the data model instead.
Sort = Table.Sort(Dots, {{"SortBy", Order.Ascending}})
in
Sort,
documentation = [
Documentation.Name = " Table.JsonExpandAll ",
Documentation.Description
= " Dynamically expands the <Json> Record and returns values in one column and additional columns to navigate. ",
Documentation.LongDescription
= " Dynamically expands the <Json> Record and returns values in one column and additional columns to navigate. Input can be JSON in binary format or the already parsed JSON. ",
Documentation.Category = " Table ",
Documentation.Version = " 1.2: Added column [Dots] (22/02/2019)",
Documentation.Author = " Imke Feldmann: www.TheBIccountant.com . ",
Documentation.Examples = {[Description = " ", Code = " ", Result = " "]}
]
in
Value.ReplaceType(func, Value.ReplaceMetadata(Value.Type(func), documentation))
Managed to use an added custom column, the action that enables the expansion to one load id per row.
#"Added Custom" = Table.AddColumn(#"Expanded Value", "Custom", each Record.ToTable([Value.loads]))
let
Source = Json.Document(File.Contents("H:\Software\Site Apps\example-records.json")),
records = Source[records],
#"Converted to Table" = Record.ToTable(records),
#"Expanded Value" = Table.ExpandRecordColumn(#"Converted to Table", "Value", {"file_no", "loads"}, {"Value.file_no", "Value.loads"}),
#"Added Custom" = Table.AddColumn(#"Expanded Value", "Custom", each Record.ToTable([Value.loads])),
#"Removed Columns" = Table.RemoveColumns(#"Added Custom",{"Value.loads"}),
#"Expanded Custom" = Table.ExpandTableColumn(#"Removed Columns", "Custom", {"Name", "Value"}, {"Custom.Name", "Custom.Value"}),
#"Expanded Custom.Value" = Table.ExpandRecordColumn(#"Expanded Custom", "Custom.Value", {"docket_no"}, {"Custom.Value.docket_no"}),
#"Renamed Columns" = Table.RenameColumns(#"Expanded Custom.Value",{{"Name", "record_id"}, {"Value.file_no", "file_no"}, {"Custom.Name", "load_id"}, {"Custom.Value.docket_no", "docket_no"}})
in
#"Renamed Columns"
##This is a container that I am using in the data table, but br tag in the column name is not working.
I used escape = FALSE in the data table but still facing the same issue.
and \n is also not working. I want something like this column name = first name (in the next line) last name.
test <- function(group,n){
htmltools::withTags(th(colspan = n, group, class = "dt-center"))
}
myContainer <- htmltools::withTags(table(
class = '',style="width:100%",
thead(
tr(
th(rowspan = 2, ' '),
th(colspan = 1, 'group 1', class = "dt-center"),
th(colspan = 2, 'group 2', class = "dt-center"),
th(colspan = 2, 'group 3', class = "dt-center")
),
tr(
th("new \\\\n ID"),
lapply(c("SUBJID","SITE<br>ID","AG<br>E","SUBJID","RACE"), th)
)
)
))
Server <- function(input, output, session) {
adae<-read_sas("C:/Arinjay_Intern/Work/ADaM/adae.sas7bdat")
output$intTable<-renderDT({adae_df %>%
datatable(class= 'compact', extensions = 'Buttons', rownames = F, container = myContainer,escape = FALSE,
callback = JS(c("$('table.dataTable thead th').css('border-top', 'none');",
"$('table.dataTable.no-footer').css('border-top', 'none');"
)),
options = list(dom = 'tB', pageLength = 5,
ordering = FALSE, class= "compact",
columnDefs = list(list(className = "dt-center", targets = "_all")),
buttons = 'pdf'
),
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: left;',
htmltools::em(HTML('N = number of subjects in the specified population. <br>n=number of subjects in each category. % = 100*n/N.')))
) %>%
formatStyle(c("USUBJID","SUBJID","SITEID","AGE", "SEX","RACE"), backgroundColor = 'white')
})
}
UI <-navbarPage(
"DT Interactive Tables",
tabPanel(
"ADaM DataSets",
fluidPage(
checkboxGroupInput('group','Please select a group',c('FD_Cohort','MRD_Cohort')),
textInput('n',"any value",value=2),
DTOutput("intTable")
)
)
)
shinyApp(UI,Server)
Expected output:
Neither \n or <br>, work in xtable. So, you could define the rows explicitly as shown below:
row1 <- c(" USUB","SUBJ","SITE","AG","SEX", "RACE")
row2 <- c("JID","ID","ID","E"," ", "")
myContainer <- htmltools::withTags(table(
class = 'dt-center', style="width:100%",
thead(
tr(
th(colspan = 2, 'group 1', class = "dt-center"),
th(colspan = 2, 'group 2', class = "dt-center"),
th(colspan = 2, 'group 3', class = "dt-center")
),
tr( lapply( row1, th)
),
tr( lapply( row2, th)
)
)
))
or you can write something in css or js to handle it. The above code gives the following output on a dummy dataset:
I am parsing a json data to write a csv file. I am using tidyjson package to do this work.
In some point I need to print all the subjects value below in a separate columns and score as a value. Meaning Physics, Mathematics will be a column name and score will be there value.
{
"results": {
"subjects": [
{
"subject": {
"name": "Physics",
"code": "PHY"
},
"score": 70
},
{
"subject": {
"name": "Mathematics",
"code": "MATH"
},
"score": 50
}
]
}
}
I have tried as below:
json_data %>%
as.tbl_json %>%
gather_array %>%
spread_values(user_id = jstring("user_id")) %>%
enter_object("results") %>%
enter_object("subjects") %>%
gather_array("subjects") %>%
spread_values(score = jstring("score")) %>%
enter_object("subject") %>%
spread_values(subject = jstring("subject")) %>%
mutate(Physics = case_when(.$name == "Physics" ~ score)) %>%
mutate(Mathematics = case_when(.$name == "Mathematics" ~ score))
But this shows multiple rows for one student. I need to show single row with each subject and score as a column value.
But this shows multiple rows for one student. I need to show single row with each subject and score as a column value.
That means, your need a unique row based on subject name? In that case you can use aggregate
if you have a data frame named df like,
subject <- c("phy", "math", "phy", "math")
Score <- c(10, NA, NA, 20)
df <- data.frame(subject, Score)
then,
aggregate(x=df[c("Score")], by=list(subjectName=df$subject), max, na.rm = TRUE)
output
subjectName Score
phy 10
math 20
I am trying to read files that has json content and convert that to tabular data based on some fields.
The file includes content like this:
{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"}
I need to be able to convert timestamp, source, host, status fields withing payloadData section for each line into a data frame in R.
I've tried this:
library(rjson)
d<-fromJSON(file="file.txt")
dput(d)
structure(list(senderDateTimeStamp = "2016/04/08 10:03:18", senderHost = NULL,
senderAppcode = "web", senderUsecase = "appinternalstats_prod",
destinationTopic = "web_appinternalstats_realtimedata_topic",
correlatedRecord = FALSE, needCorrelationCacheCleanup = FALSE,
needCorrelation = FALSE, correlationAttributes = NULL, correlationRecordCount = 0,
correlateTimeWindowInMills = 0, lastCorrelationRecord = FALSE,
realtimeESStorage = TRUE, receiverDateTimeStamp = 1460124283554,
payloadData = structure(list(timestamp = "2016-04-08T10:03:18.244",
status = "get", source = "MSG1",
region = "", evetid = "", osareqid = "", basis = "",
pricingdate = "", content = "", msgname = "", recipient = "",
objid = "", idlreqno = "", host = "web01", webservermember = "webSingleton"),
.Names = c("timestamp",
"status", "source", "region", "evetid",
"osareqid", "basis", "pricingdate", "content", "msgname",
"recipient", "objid", "idlreqno", "host", "webservermember"
)), payloadDataText = "", key = "web:appinternalstats_prod",
destinationTopicName = "web_appinternalstats_realtimedata_topic",
hdfsPath = "web/appinternalstats_prod", esindex = "web",
estype = "appinternalstats_prod", useCase = "appinternalstats_prod",
appCode = "web"), .Names = c("senderDateTimeStamp", "senderHost",
"senderAppcode", "senderUsecase", "destinationTopic", "correlatedRecord",
"needCorrelationCacheCleanup", "needCorrelation", "correlationAttributes",
"correlationRecordCount", "correlateTimeWindowInMills", "lastCorrelationRecord",
"realtimeESStorage", "receiverDateTimeStamp", "payloadData",
"payloadDataText", "key", "destinationTopicName", "hdfsPath",
"esindex", "estype", "useCase", "appCode"))
Any ideas how I could convert payloadData section of the json entry into a data frame?
This might be something you want:
library(rjson)
d<-fromJSON(file="file.txt")
myDf <- do.call("rbind", lapply(d, function(x) {
data.frame(TimeStamp = x$payloadData$timestamp,
Source = x$payloadData$source,
Host = $payloadData$host,
Status = x$payloadData$status)}))
Consider the package tidyjson:
library(tidyjson)
library(magrittr)
json <- '{"senderDateTimeStamp":"2016/04/08 10:03:18","senderHost":null,"senderCode":"web_app","senderUsecase":"appinternalstats_prod","destinationTopic":"web_app_appinternalstats_realtimedata_topic","correlatedRecord":false,"needCorrelationCacheCleanup":false,"needCorrelation":false,"correlationAttributes":null,"correlationRecordCount":0,"correlateTimeWindowInMills":0,"lastCorrelationRecord":false,"realtimeESStorage":true,"receiverDateTimeStamp":1460124283554,"payloadData":{"timestamp":"2016-04-08T10:03:18.244","status":"get","source":"MSG1","ITEM":"TEST1","basis":"","pricingdate":"","content":"","msgname":"","idlreqno":"","host":"web01","Webservermember":"Web"},"payloadDataText":"","key":"web_app:appinternalstats_prod","destinationTopicName":"web_app_appinternalstats_realtimedata_topic","esindex":"web_app","estype":"appinternalstats_prod","useCase":"appinternalstats_prod","Code":"web_app"}'
json %>%
gather_keys()
# head() of above
# document.id key
# 1 1 senderDateTimeStamp
# 2 1 senderHost
# 3 1 senderCode
# 4 1 senderUsecase
# 5 1 destinationTopic
# 6 1 correlatedRecord
json %>%
enter_object("payloadData") %>%
gather_keys() %>%
append_values_string()
# head() of above
# document.id key string
# 1 1 timestamp 2016-04-08T10:03:18.244
# 2 1 status get
# 3 1 source MSG1
# 4 1 ITEM TEST1
# 5 1 basis
# 6 1 pricingdate