Convert list elements to data frame column - reshape

Creating List:
my <- list(Low=c("21.1","126.8","78.3","32.4","34.5"),
High = ("301.3","17.3","20.3" ))
Display list output:
my
$`Low`
[1] "21.1" "126.8" "78.3" "32.4" "34.5"
$High
[1] "301.3" "17.3" "20.3"
Convert into Data Frame:
as.data.frame(my)
Getting Error:
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, :
arguments imply differing number of rows: 5, 3
My Desired Output is: two column dataframe.
Low
21.1
126.8
78.3
32.4
34.5
High
301.3
17.3
20.3

Below one will work for your request
my <- list(Low=c("21.1","126.8","78.3","32.4","34.5"),
High = c("301.3","17.3","20.3" ))
n.obs <- sapply(my, length)
seq.max <- seq_len(max(n.obs))
my_df <- data.frame(sapply(my, "[", i = seq.max))
names(my_df) = names(my)
print(my_df)

Related

Error in coxModelFrame.coxph(object) : invalid object set x=TRUE in the call to coxph

The following example is for anyone who is building a Cox Proportional Hazards models and trying to produce prediction error curves, but get an error stating:
Error in coxModelFrame.coxph(object) : invalid object
set x=TRUE in the call to coxph.
Here is the code to reproduce the error:
LIBRARIES
library(survival)
library(survminer)
library(pec)
library(Hmisc)
library(rms)
library(riskRegression)
#install.packages("doMC", repos="http://R-Forge.R-project.org")
library(doMC)
The Data
#Load and store the data
lcOrig <- read.csv("cancer.csv")
#Replace all the 1's with 0's (censored)
lcOrig$status <- gsub(pattern = "1", replacement = "0", x = lcOrig$status, fixed = TRUE)
#Replace all the 2's with 1's (death)
lcOrig$status <- gsub (pattern = "2", replacement = "1", x = lcOrig$status, fixed = TRUE)
#Do the same thing for sex (0 = Males, 1 = Females)
lcOrig$sex <- gsub(pattern = "1", replacement = "0", x = lcOrig$sex, fixed = TRUE)
lcOrig$sex <- gsub(pattern = "2", replacement = "1", x = lcOrig$sex, fixed = TRUE)
#Change the class of these variables to integer.
lcOrig$status <- as.integer(lcOrig$status)
lcOrig$sex <- as.integer(lcOrig$sex)
lcOrig$ph.ecog <- as.integer(lcOrig$ph.ecog)
#Remove missing values and column with over 20% missing data.
apply(lcOrig, 2, function(x) sum(is.na(x))/length(x))
lcOrig <- lcOrig[, c(1:9, 11)]
lc <- lcOrig[complete.cases(lcOrig), ]
Cox Proportional Hazards
fitform1 <- Surv(time, status) ~ inst + age + sex + ph.ecog + ph.karno + pat.karno + wt.loss
cox1 <- coxph(fitform1, data = lc)
PREDICTION ERROR CURVES
extends <- function(...) TRUE
library("doMC")
registerDoMC()
set.seed(0692)
fitpec1 <- pec(list("CPH" = cox1), data = lc, formula = fitform1, splitMethod = "cv10", B = 5, keep.index = TRUE, keep.matrix = TRUE)
The last line of code results in the following error:
Error in coxModelFrame.coxph(object) : invalid object
set x=TRUE in the call to coxph
SOLUTION
Change:
cox1 <- coxph(fitform1, data = lc)
To:
cox1 <- coxph(fitform1, data = lc, x = TRUE)
This did not use to be a requirement 2 years ago, but is now. I hope this helps save you some time!

How to loop values of a variable through a nested while loop calling on a function in Python

I have a while loop that calls on a function (def update(i)) and performs the calculation for the required number of times (until the while loop condition is no longer met) in Python3. What I now want to do is put different values of 'i' through the while loop and therefore through the equation 'dv' (shown below). So when the while loop ends I need the whole process to repeat with the next 'i' value. All the i values are in an np.arange array called 'i_es'. I have tried to implement this with the while loop nested inside a for loop as shown below...
import numpy as np
def update(i) :
dv = (e_l - v_s[-1] + i*r_m)/tau_m
v_s.append(v_s[-1] + (dv*dt))
return is_spiked()
def is_spiked() :
if v_s[-1] > v_th:
v_s[-1] = v_r
return True
return False
r_m = 10
tau_m = 10
v_th = -40
e_l = -70
v_r = -70
v_s = [v_r]
spike_count = 0
t = 0
t_total = 1000
dt = 1
i_e_start = 4
i_e_step = 0.1
i_e_final = 5
i_es = np.arange(i_e_start,i_e_final+i_e_step,i_e_step)
for i in i_es :
while t < t_total :
if update(i) :
spike_count += 1
t += dt
print ("Current = ",+ i, " Spike count = ", + spike_count)
However, when I run this I get the following output:
Current = 4.0 Spike count = 71
Current = 4.1 Spike count = 71
Current = 4.2 Spike count = 71
Current = 4.3 Spike count = 71
Current = 4.4 Spike count = 71
Current = 4.5 Spike count = 71
Current = 4.6 Spike count = 71
Current = 4.7 Spike count = 71
Current = 4.8 Spike count = 71
Current = 4.9 Spike count = 71
Current = 5.0 Spike count = 71
I can see that the current ('i') values are increasing as they should each time but the spike count is not changing.. The answer is always from the first value ('i' = 4) run through the loop.
Can anyone help with this?
Thanks in advance.

Mean of one variable when another variable equals 1 in matlab

I would like to get the mean of my reaction time column when my stimnum column is equal to 1
I am not sure if i can do this with one simple line of code or if i need to do a for loop.
stimnum = randi([1 3], [1 100]);
y = 1 + 1.*randn(1, 100);
rt = (y.^2) +.01;
A = rand(1,100);
correct = A>=0.2;
Data= [stimnum; rt; correct ]';
Data = dataset({ Data, 'Stimnum', 'RT', 'Correct'});
rtmean = mean (Data.RT{Data.Stimnum == 1});

Lua splitting a string without separators

Is there an easy way to split a string like this:
M34a79 or M2ab943 or M4c4
into
M,34,a,79 or M,2,ab,943 or M,4,c,4
without any separators?
You can do it with a pair of gsub calls:
x = "M34a79 or M2ab943 or M4c4"
x, _ = x:gsub( "(%d)(%a)", "%1,%2" )
x, _ = x:gsub( "(%a)(%d)", "%1,%2" )
print( x )
M,34,a,79 or M,2,ab,943 or M,4,c,4
Might not work in all cases, but does work on your example.
If you don’t mind using the LPEG
library:
local lpeg = require "lpeg"
local C, Ct, P, R = lpeg.C, lpeg.Ct, lpeg.P, lpeg.R
local lpegmatch = lpeg.match
local extract
do
local digit = R"09"
local lower = R"az"
local comma = P","
local space = P" "
local schema = Ct( C(P"M")
* (digit^1 / tonumber)
* C(lower^1)
* (digit^1 / tonumber))
local extractor = Ct((schema + 1)^0)
extract = function (str)
return lpegmatch (extractor, str)
end
end
This will match all sequences of characters of the input
that consist of (in that order)
the letter M,
a sequence of 1 or more decimal digits,
a sequence of 1 or more lowercase characters, and
another sequence of 1 or more decimal digits.
When processing the input each match is put in a subtable,
the digits are converted to Lua numbers on the fly.
Since the question requested it, the leading M is included
in the entries.
Usage example:
local data = extract [[M34a79 or M2ab943 or M4c4]]
for i = 1, #data do
local elm = data[i]
print (string.format ("[%d] = { [1] = %q, [2] = %d, [3] = %q, [4] = %d },",
i, table.unpack (elm)))
end
Output:
[1] = { [1] = "M", [2] = 34, [3] = "a", [4] = 79 },
[2] = { [1] = "M", [2] = 2, [3] = "ab", [4] = 943 },
[3] = { [1] = "M", [2] = 4, [3] = "c", [4] = 4 },
Solution:
http://www.coronalabs.com/blog/2013/04/16/lua-string-magic/
function string:split( inSplitPattern, outResults )
...
end
function val(x)
x = x:gsub( "(%d)(%a)", "%1,%2" )
x = x:gsub( "(%a)(%d)", "%1,%2" )
Table = string.split(x,",")
for i = 1, #Table do
print( Table[i] )
end
end
val("M3a5")
returns M 3 a 5

constructing an identifier string for each row in data

I have the following data:
library(data.table)
d = data.table(a = c(1:3), b = c(2:4))
and would like to get this result (in a way that would work with arbitrary number of columns):
d[, c := paste0('a_', a, '_b_', b)]
d
# a b c
#1: 1 2 a_1_b_2
#2: 2 3 a_2_b_3
#3: 3 4 a_3_b_4
The following works, but I'm hoping to find something shorter and more legible.
d = data.table(a = c(1:3), b = c(2:4))
d[, c := apply(mapply(paste, names(.SD), .SD, MoreArgs = list(sep = "_")),
1, paste, collapse = "_")]
one way, only slightly cleaner:
d[, c := apply(d, 1, function(x) paste(names(d), x, sep="_", collapse="_")) ]
a b c
1: 1 2 a_1_b_2
2: 2 3 a_2_b_3
3: 3 4 a_3_b_4
Here is an approach using do.call('paste'), but requiring only a single call to paste
I will benchmark on a situtation where the columns are integers (as this seems a more sensible test case
N <- 1e4
d <- setnames(as.data.table(replicate(5, sample(N), simplify = FALSE)), letters[seq_len(5)])
f5 <- function(d){
l <- length(d)
o <- c(1L, l + 1L) + rep_len(seq_len(l) -1L, 2L * l)
do.call('paste',c((c(as.list(names(d)),d))[o],sep='_'))}
microbenchmark(f1(d), f2(d),f5(d))
Unit: milliseconds
expr min lq median uq max neval
f1(d) 41.51040 43.88348 44.60718 45.29426 52.83682 100
f2(d) 193.94656 207.20362 210.88062 216.31977 252.11668 100
f5(d) 30.73359 31.80593 32.09787 32.64103 45.68245 100
To avoid looping through rows, you can use this:
do.call(paste, c(lapply(names(d), function(n)paste0(n,"_",d[[n]])), sep="_"))
Benchmarking:
N <- 1e4
d <- data.table(a=runif(N),b=runif(N),c=runif(N),d=runif(N),e=runif(N))
f1 <- function(d)
{
do.call(paste, c(lapply(names(d), function(n)paste0(n,"_",d[[n]])), sep="_"))
}
f2 <- function(d)
{
apply(d, 1, function(x) paste(names(d), x, sep="_", collapse="_"))
}
require(microbenchmark)
microbenchmark(f1(d), f2(d))
Note: f2 inspired in #Ricardo's answer.
Results:
Unit: milliseconds
expr min lq median uq max neval
f1(d) 195.8832 213.5017 216.3817 225.4292 254.3549 100
f2(d) 418.3302 442.0676 451.0714 467.5824 567.7051 100
Edit note: previous benchmarking with N <- 1e3 didn't show much difference in times. Thanks again #eddi.

Resources