Getting warning message - warnings

I am trying to run this code:
x = 0
y = 0
newdata <- subset(data, subject_ids == 25773861)
for(i in newdata$classification_id){
if(newdata$value == "Yes"){
x = x + 1
} else {
y = y + 1
}
}
But keep getting this warning:
Warning messages:
1: In if (newdata$value_simple == 0) { :
the condition has length > 1 and only the first element will be used
2: In if (newdata$value_simple == 0) { :
the condition has length > 1 and only the first element will be used
Any advice or help in solving this?

The general code of
if(condition){
some code
}else{
some code}
only look at the first value in the vector. So it is basically warning you that it is only looking at the first value in the object newdata$value. I am assuming you are getting all in either x or y not a split like you would want.
The two things I would fix in that code, start at the first two lines of the for loop
x = 0
y = 0
newdata <- subset(data, subject_ids == 25773861)
for(i in seq_along(newdata$classification_id)){ #seq_along makes a vector 1 to the length of your vector
if(newdata$value[[i]] == "Yes"){ #This will subset the newdata$Value into single values
x = x + 1
} else {
y = y + 1
}
}
Another option is to use the tidyverse, assuming you have it installed
library(tidyverse)
data %>%
filter(subject_ids == 25773861) %>%
group_by(value) %>%
count()

Related

Can't save data using push button (MATLAB)

I'm trying to create a figure where the user can select cells to turn on or off. Then, the user can click a button 'Enter' to save the board as an array. I successfully found a way to create interactivity in my plot thanks to a very useful explanation I found here. I just made some changes to suit my needs.
However, I can't find a way to save the board. The button is working (or at least isn't not working), but the data isn't saved. And I don't know how to fix that. Any help would be appreciated.
Here is my code:
function CreatePattern
hFigure = figure;
hAxes = axes;
axis equal;
axis off;
hold on;
% create a button to calculate the difference between 2 points
h = uicontrol('Position',[215 5 150 30],'String','Enter','Callback', #SaveArray);
function SaveArray(ButtonH, eventdata)
global initial
initial = Board;
close(hFigure)
end
N = 1; % for line width
M = 20; % board size
squareEdgeSize = 1;
% create the board of patch objects
hPatchObjects = zeros(M,M);
for j = M:-1:1
for k = 1:M
hPatchObjects(M - j+ 1, k) = rectangle('Position', [k*squareEdgeSize,j*squareEdgeSize,squareEdgeSize,squareEdgeSize], 'FaceColor', [0 0 0],...
'EdgeColor', 'w', 'LineWidth', N, 'HitTest', 'on', 'ButtonDownFcn', {#OnPatchPressedCallback, M - j+ 1, k});
end
end
%global Board
Board = zeros(M,M);
playerColours = [1 1 1; 0 0 0];
xlim([squareEdgeSize M*squareEdgeSize]);
ylim([squareEdgeSize M*squareEdgeSize]);
function OnPatchPressedCallback(hObject, eventdata, rowIndex, colIndex)
% change FaceColor to player colour
value = Board(rowIndex,colIndex);
if value == 1
set(hObject, 'FaceColor', playerColours(2, :));
Board(rowIndex,colIndex) = 0; % update board
else
set(hObject, 'FaceColor', playerColours(1, :));
Board(rowIndex,colIndex) = 1; % update board
end
end
end
%imwrite(~pattern,'custom_pattern.jpeg')

Recall from nltk.metrics.score returning None

I'm trying to calculate the precision and recall using the nltk.metrics.score (http://www.nltk.org/_modules/nltk/metrics/scores.html) with my NLTK.NaiveBayesClassifier.
However, I stumble upon the error:
"unsupported operand type(s) for +: 'int' and 'NoneType".
which I suspect is from my 10-fold cross-validation where in some reference sets, there are zero negative (the data set is a bit imbalanced where 87% of it is positive).
According to nltk.metrics.score,
def precision(reference, test):
"Given a set of reference values and a set of test values, return
the fraction of test values that appear in the reference set.
In particular, return card(``reference`` intersection
``test``)/card(``test``).
If ``test`` is empty, then return None."
It seems that some of my 10-fold set is returning recall as None since there are no Negative in the reference set. Any idea on how to approach this problem?
My full code is as follow:
trainfeats = negfeats + posfeats
n = 10 # 5-fold cross-validation
subset_size = len(trainfeats) // n
accuracy = []
pos_precision = []
pos_recall = []
neg_precision = []
neg_recall = []
pos_fmeasure = []
neg_fmeasure = []
cv_count = 1
for i in range(n):
testing_this_round = trainfeats[i*subset_size:][:subset_size]
training_this_round = trainfeats[:i*subset_size] + trainfeats[(i+1)*subset_size:]
classifier = NaiveBayesClassifier.train(training_this_round)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testing_this_round):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
cv_accuracy = nltk.classify.util.accuracy(classifier, testing_this_round)
cv_pos_precision = precision(refsets['Positive'], testsets['Positive'])
cv_pos_recall = recall(refsets['Positive'], testsets['Positive'])
cv_pos_fmeasure = f_measure(refsets['Positive'], testsets['Positive'])
cv_neg_precision = precision(refsets['Negative'], testsets['Negative'])
cv_neg_recall = recall(refsets['Negative'], testsets['Negative'])
cv_neg_fmeasure = f_measure(refsets['Negative'], testsets['Negative'])
accuracy.append(cv_accuracy)
pos_precision.append(cv_pos_precision)
pos_recall.append(cv_pos_recall)
neg_precision.append(cv_neg_precision)
neg_recall.append(cv_neg_recall)
pos_fmeasure.append(cv_pos_fmeasure)
neg_fmeasure.append(cv_neg_fmeasure)
cv_count += 1
print('---------------------------------------')
print('N-FOLD CROSS VALIDATION RESULT ' + '(' + 'Naive Bayes' + ')')
print('---------------------------------------')
print('accuracy:', sum(accuracy) / n)
print('precision', (sum(pos_precision)/n + sum(neg_precision)/n) / 2)
print('recall', (sum(pos_recall)/n + sum(neg_recall)/n) / 2)
print('f-measure', (sum(pos_fmeasure)/n + sum(neg_fmeasure)/n) / 2)
print('')
Perhaps not the most elegant, but guess the most simple fix would be setting it to 0 and the actual value if not None, e.g.:
cv_pos_precision = 0
if precision(refsets['Positive'], testsets['Positive']):
cv_pos_precision = precision(refsets['Positive'], testsets['Positive'])
And for the others as well, of course.

How to i load a json.rows file into R which has multiple inconsistent nested data?

I have 2 json.rows files with multiple nested data. I tried the below code to convert it into a dataframe and it worked for the first file.
tl <- function(e) { if (is.null(e)) return(NULL); ret <- typeof(e); if (ret == 'list' && !is.null(names(e))) ret <- list(type='namedlist') else ret <- list(type=ret,len=length(e)); ret; };
mkcsv <- function(v) paste0(collapse=',',v);
keyListToStr <- function(keyList) paste0(collapse='','/',sapply(keyList,function(key) if (is.null(key)) '*' else paste0(collapse=',',key)));
extractLevelColumns <- function(
nodes, ## current level node selection
..., ## additional arguments to data.frame()
keyList=list(), ## current key path under main list
sep=NULL, ## optional string separator on which to join multi-element vectors; if NULL, will leave as separate columns
mkname=function(keyList,maxLen) paste0(collapse='.',if (is.null(sep) && maxLen == 1L) keyList[-length(keyList)] else keyList) ## name builder from current keyList and character vector max length across node level; default to dot-separated keys, and remove last index component for scalars
) {
cat(sprintf('extractLevelColumns(): %s\n',keyListToStr(keyList)));
if (length(nodes) == 0L) return(list()); ## handle corner case of empty main list
tlList <- lapply(nodes,tl);
typeList <- do.call(c,lapply(tlList,`[[`,'type'));
if (length(unique(typeList)) != 1L) stop(sprintf('error: inconsistent types (%s) at %s.',mkcsv(typeList),keyListToStr(keyList)));
type <- typeList[1L];
if (type == 'namedlist') { ## hash; recurse
allKeys <- unique(do.call(c,lapply(nodes,names)));
ret <- do.call(c,lapply(allKeys,function(key) extractLevelColumns(lapply(nodes,`[[`,key),...,keyList=c(keyList,key),sep=sep,mkname=mkname)));
} else if (type == 'list') { ## array; recurse
lenList <- do.call(c,lapply(tlList,`[[`,'len'));
maxLen <- max(lenList,na.rm=T);
allIndexes <- seq_len(maxLen);
ret <- do.call(c,lapply(allIndexes,function(index) extractLevelColumns(lapply(nodes,function(node) if (length(node) < index) NULL else node[[index]]),...,keyList=c(keyList,index),sep=sep,mkname=mkname))); ## must be careful to translate out-of-bounds to NULL; happens automatically with string keys, but not with integer indexes
} else if (type%in%c('raw','logical','integer','double','complex','character')) { ## atomic leaf node; build column
lenList <- do.call(c,lapply(tlList,`[[`,'len'));
maxLen <- max(lenList,na.rm=T);
if (is.null(sep)) {
ret <- lapply(seq_len(maxLen),function(i) setNames(data.frame(sapply(nodes,function(node) if (length(node) < i) NA else node[[i]]),...),mkname(c(keyList,i),maxLen)));
} else {
## keep original type if maxLen is 1, IOW don't stringify
ret <- list(setNames(data.frame(sapply(nodes,function(node) if (length(node) == 0L) NA else if (maxLen == 1L) node else paste(collapse=sep,node)),...),mkname(keyList,maxLen)));
}; ## end if
} else stop(sprintf('error: unsupported type %s at %s.',type,keyListToStr(keyList)));
if (is.null(ret)) ret <- list(); ## handle corner case of exclusively empty sublists
ret;
}; ## end extractLevelColumns()
## simple interface function
flattenList <- function(mainList,...) do.call(cbind,extractLevelColumns(mainList,...));
but when i tried using the above function for my second file, I kept getting an error which said
Error in extractLevelColumns(lapply(nodes, `[[`, key), ..., keyList = c(keyList, :
error: inconsistent types (character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,character,cha
Here are a few sample images of the rows in my json file. The columns are very inconsistent.
https://i.stack.imgur.com/ZKgKk.png
https://i.stack.imgur.com/f3kNS.png
I know it's an old question but I recently faced a similar error while working with a nested list. Your error is because the function doesn't support the type inconsistencies between parallel nodes. So, one or more of your nodes have non-character type elements - either NULL or a list.
If it's NULL, you can convert the NULL to "NA" and it should work fine. If it's a list, unfortunately I couldn't make it work without throwing away information. I removed the node with type list and it worked.

Continuing a while loop after error in R

I am trying to use a while loop to download a bunch of JSON files.
They are numbered from 255 to 1. However, some of them are missing (for example, 238.json does not exist).
scheduleurl <- "http://blah.blahblah.com/schedulejsonfile="
i <- 255
while ( i > 0) {
last = paste0(as.character(i), ".json")
path = "/Users/User/Desktop/Temp"
fullpath = paste0(path, last)
ithscheduleurl <- paste0(scheduleurl, as.character(i))
download.file(ithscheduleurl, fullpath)
i <- i - 1
}
I basically want to write my while loop such that if it encounters a nonexisting file (as it will when i = 238), it basically continues to 237 instead of stopping.
I tried the tryCatch() function this way, but it didn't work (keeps trying the same URL) :
while ( i > 0) {
possibleError <- tryCatch({
last = paste0(as.character(i), ".json")
path = "/Users/dlopez/Desktop/Temp"
fullpath = paste0(path, last)
ithscheduleurl <- paste0(scheduleurl, as.character(i))
download.file(ithscheduleurl, fullpath)
i <- i - 1}
, error=function(e) e)
if(inherits(possibleError, "error")) {
next
}
}
Any help would be appreciated!
url.exists from the RCurl package should do the trick.
library(RCurl)
while ( i > 0) {
last = paste0(as.character(i), ".json")
path = "/Users/User/Desktop/Temp"
fullpath = paste0(path, last)
ithscheduleurl <- paste0(scheduleurl, as.character(i))
if (url.exists(ithscheduleurl)) {
download.file(ithscheduleurl, fullpath)
}
i <- i - 1
}

Display struct fields without the mess

I have a struct in Octave that contains some big arrays.
I'd like to know the names of the fields in this struct without having to look at all these big arrays.
For instance, if I have:
x.a=1;
x.b=rand(3);
x.c=1;
The obvious way to take a gander at the structure is as follows:
octave:12> x
x =
scalar structure containing the fields:
a = 1
b =
0.7195967 0.9026158 0.8946427
0.4647287 0.9561791 0.5932929
0.3013618 0.2243270 0.5308220
c = 1
In Matlab, this would appear as the more succinct:
>> x
x =
a: 1
b: [3x3 double]
c: 1
How can I see the fields/field names without seeing all these big arrays?
Is there a way to display a succinct overview (like Matlab's) inside Octave?
Thanks!
You might want to take a look at Basic Usage & Examples. There's several functions mentioned that sound like they'll control the displaying in the terminal.
struct_levels_to_print
print_struct_array_contents
These two functions sound like they're do what you want. I tried both and couldn't get the 2nd one to work. The 1st function changed the terminal output like so:
octave:1> x.a=1;
octave:2> x.b=rand(3);
octave:3> x.c=1;
octave:4> struct_levels_to_print
ans = 2
octave:5> x
x =
{
a = 1
b =
0.153420 0.587895 0.290646
0.050167 0.381663 0.330054
0.026161 0.036876 0.818034
c = 1
}
octave:6> struct_levels_to_print(0)
octave:7> x
x =
{
1x1 struct array containing the fields:
a: 1x1 scalar
b: 3x3 matrix
c: 1x1 scalar
}
I'm running a older version of Octave.
octave:8> version
ans = 3.2.4
If I get a chance I'll check that other function, print_struct_array_contents, to see if it does what you want. Octave 3.6.2 looks to be the latest version as of 11/2012.
Use fieldnames ()
octave:33> x.a = 1;
octave:34> x.b = rand(3);
octave:35> x.c = 1;
octave:36> fieldnames (x)
ans =
{
[1,1] = a
[2,1] = b
[3,1] = c
}
Or you you want it to be recursive, add the following to your .octaverc file (you may want to adjust it to your preferences)
function displayfields (x, indent = "")
if (isempty (indent))
printf ("%s: ", inputname (1))
endif
if (isstruct (x))
printf ("structure containing the fields:\n");
indent = [indent " "];
nn = fieldnames (x);
for ii = 1:numel(nn)
if (isstruct (x.(nn{ii})))
printf ("%s %s: ", indent, nn{ii});
displayfields (x.(nn{ii}), indent)
else
printf ("%s %s\n", indent, nn{ii})
endif
endfor
else
display ("not a structure");
endif
endfunction
You can then use it in the following way:
octave> x.a=1;
octave> x.b=rand(3);
octave> x.c.stuff = {2, 3, 45};
octave> x.c.stuff2 = {"some", "other"};
octave> x.d=1;
octave> displayfields (x)
x: structure containing the fields:
a
b
c: structure containing the fields:
stuff
stuff2
d
in Octave, version 4.0.0 configured for "x86_64-pc-linux-gnu".(Ubuntu 16.04)
I did this on the command line:
print_struct_array_contents(true)
sampleFactorList % example, my nested structure array
Output: (shortened):
sampleFactorList =
scalar structure containing the fields:
sampleFactorList =
1x6 struct array containing the fields:
var =
{
[1,1] = 1
[1,2] =
2 1 3
}
card =
{
[1,1] = 3
[1,2] =
3 3 3
}
To disable/get back to the old behaviour
print_struct_array_contents(false)
sampleFactorList
sampleFactorList =
scalar structure containing the fields:
sampleFactorList =
1x6 struct array containing the fields:
var
card
val
I've put this print_struct_array_contents(true) also into the .octaverc file.