is there better way below? using r's data.table carry out sampling.
it trying sample table(samp.from.data) using weights specific number based on count can added original data...
count.data <- data.table(cp=letters[1:10], count=sample(10:60,10,replace=true)) orig.data <- data.table(cp=rep(letters[1:10],times=count.data$count), vc=sample(letters[1:6],size=sum(count.data$count),replace=true)) # check count.data representation of orig.data orig.data %>% group_by(cp) %>% summarise(count=n()) samp.from.data <- data.table(cp=rep(letters[1:10],each=20), uid=seq(200), weight=runif(200,1,2)) setkey(count.data,'cp') setkey(samp.from.data,'cp') setkey(orig.data,'cp') ll <- count.data[samp.from.data,] ll1 <- ll[,.sd[sample(.n,head(count,1),replace=true,prob=weight)],by=cp] setkey(ll1,'cp') # add in sampled values original data # there better way sampling add adding original data more directly? orig.data$uid <- ll1[,uid]
Comments
Post a Comment