performance - Double for-loop operation in R (with an example) -


please @ following small working example:

#### pseudo data nobs1 <- 4000 nobs2 <- 5000 mylon1 <- runif(nobs1, min=0, max=1)-76 mylat1 <- runif(nobs1, min=0, max=1)+37 mylon2 <- runif(nobs2, min=0, max=1)-76 mylat2 <- runif(nobs2, min=0, max=1)+37  #### define distance function thedistance <- function(lon1, lat1, lon2, lat2) {  r <- 6371 # earth mean radius [km]  delta.lon <- (lon2 - lon1)  delta.lat <- (lat2 - lat1)  <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2  c <- 2 * asin(min(1,sqrt(a)))  d = r * c  return(d) }  ptm <- proc.time()  #### calculate distances between locations # initiate resulting distance vector ndistance <- nobs1*nobs2 # number of distances mydistance <- vector(mode = "numeric", length = ndistance)  k=1 (i in 1:nobs1) {  (j in 1:nobs2) {   mydistance[k] = thedistance(mylon1[i],mylat1[i],mylon2[j],mylat2[j])   k=k+1  } }  proc.time() - ptm 

the computation time:

  user  system elapsed  249.85    0.16  251.18 

here, question whether there still room speeding double for-loop calculation. thank much.

here's option decreases runtime ~2 seconds on machine because part of vectorized.

a direct comparison original solution follows.

test data:

nobs1 <- 4000 nobs2 <- 5000 mylon1 <- runif(nobs1, min=0, max=1)-76 mylat1 <- runif(nobs1, min=0, max=1)+37 mylon2 <- runif(nobs2, min=0, max=1)-76 mylat2 <- runif(nobs2, min=0, max=1)+37 

original solution:

#### define distance function thedistance <- function(lon1, lat1, lon2, lat2) {   r <- 6371 # earth mean radius [km]   delta.lon <- (lon2 - lon1)   delta.lat <- (lat2 - lat1)   <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2   c <- 2 * asin(min(1,sqrt(a)))   d = r * c   return(d) }  ptm <- proc.time()  #### calculate distances between locations # initiate resulting distance vector ndistance <- nobs1*nobs2 # number of distances mydistance <- vector(mode = "numeric", length = ndistance)  k=1 (i in 1:nobs1) {   (j in 1:nobs2) {     mydistance[k] = thedistance(mylon1[i],mylat1[i],mylon2[j],mylat2[j])     k=k+1   } }  proc.time() - ptm    user      system     elapsed  148.243       0.681     148.901  

my approach:

# modified (vectorized) distance function: thedistance2 <- function(lon1, lat1, lon2, lat2) {   r <- 6371 # earth mean radius [km]   delta.lon <- (lon2 - lon1)   delta.lat <- (lat2 - lat1)   <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2   c <- 2 * asin(pmin(1,sqrt(a)))   # pmin instead of min   d = r * c   return(d) }  ptm2 <- proc.time()  lst <- vector("list", length = nobs1)  (i in seq_len(nobs1)) {     lst[[i]] = thedistance2(mylon1[i],mylat1[i],mylon2,mylat2) }  res <- unlist(lst)  proc.time() - ptm2    user      system     elapsed   1.988       0.331       2.319  

are results equal?

all.equal(mydistance, res) #[1] true 

Comments

Popular posts from this blog

python - mat is not a numerical tuple : openCV error -

c# - MSAA finds controls UI Automation doesn't -

wordpress - .htaccess: RewriteRule: bad flag delimiters -