Using the libaray data.table
is a faster solution because setorder
is faster than order
and sort
:
library(data.table)
select_top_n<-function(scores,n_top){
d <- data.frame(
x = copy(scores),
indice=seq(1,length(scores)))
setDT(d)
setorder(d,-x)
n_top_indice<-d$indice[1:n_top]
return(n_top_indice)
}
select_top_n2<-function(scores,n_top){
n_top_indice<-order(-scores)[1:n_top]
return(n_top_indice)
}
select_top_n3<-function(scores,n_top){
n_top_indice<-sort(s, index.return=TRUE, decreasing=TRUE)$ix[1:n_top]
return(n_top_indice)
}
Testing:
set.seed(123)
s=runif(100000)
library(microbenchmark)
mbm<-microbenchmark(
ind1 = select_top_n(s,100),
ind2=select_top_n2(s,100),
ind3=select_top_n3(s,100),
times = 10L
)
Output:
Unit: milliseconds
expr min lq mean median uq max neval
ind1 5.824576 5.98959 6.209746 6.052658 6.270312 7.422736 10
ind2 9.627950 10.08661 10.274867 10.377451 10.560912 10.588223 10
ind3 10.397383 11.32129 12.087122 12.498817 12.856840 13.155845 10
Refer to Getting the top values by group