performance - Double for-loop operation in R (with an example) -
please @ following small working example:
#### pseudo data nobs1 <- 4000 nobs2 <- 5000 mylon1 <- runif(nobs1, min=0, max=1)-76 mylat1 <- runif(nobs1, min=0, max=1)+37 mylon2 <- runif(nobs2, min=0, max=1)-76 mylat2 <- runif(nobs2, min=0, max=1)+37 #### define distance function thedistance <- function(lon1, lat1, lon2, lat2) { r <- 6371 # earth mean radius [km] delta.lon <- (lon2 - lon1) delta.lat <- (lat2 - lat1) <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2 c <- 2 * asin(min(1,sqrt(a))) d = r * c return(d) } ptm <- proc.time() #### calculate distances between locations # initiate resulting distance vector ndistance <- nobs1*nobs2 # number of distances mydistance <- vector(mode = "numeric", length = ndistance) k=1 (i in 1:nobs1) { (j in 1:nobs2) { mydistance[k] = thedistance(mylon1[i],mylat1[i],mylon2[j],mylat2[j]) k=k+1 } } proc.time() - ptm
the computation time:
user system elapsed 249.85 0.16 251.18
here, question whether there still room speeding double for-loop calculation. thank much.
here's option decreases runtime ~2 seconds on machine because part of vectorized.
a direct comparison original solution follows.
test data:
nobs1 <- 4000 nobs2 <- 5000 mylon1 <- runif(nobs1, min=0, max=1)-76 mylat1 <- runif(nobs1, min=0, max=1)+37 mylon2 <- runif(nobs2, min=0, max=1)-76 mylat2 <- runif(nobs2, min=0, max=1)+37
original solution:
#### define distance function thedistance <- function(lon1, lat1, lon2, lat2) { r <- 6371 # earth mean radius [km] delta.lon <- (lon2 - lon1) delta.lat <- (lat2 - lat1) <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2 c <- 2 * asin(min(1,sqrt(a))) d = r * c return(d) } ptm <- proc.time() #### calculate distances between locations # initiate resulting distance vector ndistance <- nobs1*nobs2 # number of distances mydistance <- vector(mode = "numeric", length = ndistance) k=1 (i in 1:nobs1) { (j in 1:nobs2) { mydistance[k] = thedistance(mylon1[i],mylat1[i],mylon2[j],mylat2[j]) k=k+1 } } proc.time() - ptm user system elapsed 148.243 0.681 148.901
my approach:
# modified (vectorized) distance function: thedistance2 <- function(lon1, lat1, lon2, lat2) { r <- 6371 # earth mean radius [km] delta.lon <- (lon2 - lon1) delta.lat <- (lat2 - lat1) <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) * sin(delta.lon/2)^2 c <- 2 * asin(pmin(1,sqrt(a))) # pmin instead of min d = r * c return(d) } ptm2 <- proc.time() lst <- vector("list", length = nobs1) (i in seq_len(nobs1)) { lst[[i]] = thedistance2(mylon1[i],mylat1[i],mylon2,mylat2) } res <- unlist(lst) proc.time() - ptm2 user system elapsed 1.988 0.331 2.319
are results equal?
all.equal(mydistance, res) #[1] true
Comments
Post a Comment