Skip to content Skip to navigation

R workshop hands exercises and instructions

There are a few examples you can run through the queue on Sherlock, Rmpi.R snow-rmpi.R, and svd.R. The ksmooth files are for the example of compiling c code to R libraries. 

/one sherlock cp the files to a directory that you should create for the workshop:

cp /share/sw/free/R/3.2.5/intel.tcltk/Tutorial/* /your/local/directory

To subnmit to the queue:

sbatch slurm.Rmpi

sbatch slurm.Rmpi.svd

sbatch slurm.snow.rmpi

To start an interactive R session for the hands on exercises, please follow the following steps. 

Note: you need to ssh -X yourSUNetID@sherlock.stanford.edu to get graphical displays working.
 
(with this srun we are asking for 2 hours here, make longer or shorter if you like)
 
srun -N 1 -n 2 --time=2:00:00 --reservation=R_workshop  --x11 --pty bash
[zyzhang@sh-5-31 ~]$ ml load R/3.2.5.intel.tcltk
[zyzhang@sh-5-31 ~]$ R
> .libPaths()
[1] "/home/zyzhang/R/x86_64-pc-linux-gnu-library/3.3"   
[2] "/share/sw/free/R/3.2.5/intel.tcltk/lib64/R/library"
> .libPaths("/share/sw/free/R/3.2.5/intel.tcltk/lib64/R/library")
>require(Rmpi)
Loading required package: Rmpi

 

To submit a job to the queue, follow the following steps. 

create a file named slurm.R with the following content:
 
#!/bin/bash
#SBATCH --nodes=2
#SBATCH --tasks-per-node=2
#SBATCH --time=00:30:00
#SBATCH -p normal
#SBATCH --reservation=R_workshop
 
module load R/3.2.5.intel.tcltk
mpirun -np 1 Rscript Rmpi.R
 
Then submit to the queue with the following: 
[zyzhang@sherlock-ln02 login_node ~]$ sbatch slurm.R
 

The following are R code snippets that you can run interactively in an interactive session. It is designed such that you only need to copy and paste to your interactive R session. If any problme occurs, it is mostly because the comment lines are broken or the correct libraries are not loaded correctly. The comments are lines starting with "#". For each code snippet, there are minial explantions that I have put in there, mostly to seperate different piceces of code and explain what the codes are doing. If you are interested in a particular picece of code that illustrate a particular capability of R, you can try to run each line seperately and see what each line of code does and what the output of the executions are. 

I only had time to copy and paste the folloiwng without being able to format it in a way to make it easy to navigate. My apologies. Hopefully it is not too difficult to scoll back and forth.  

#[zyzhang@sherlock-ln02 ~]$ R
source(“Rmpi.R”) 
install.packages(“pkg”)
remove.packages(“pkg”)
help(solve)
example(solve)
?<command-name> or  ??<command-name>
ls(pos = "package:Rmpi")
.packages()
path.package("Rmpi")
installed.packages()
.libPaths()
.Library
 
a <- list(name="Joe", 4, foo=c(3,8,9)) # creates a list
seq(1, 10, 3) # creates a sequence
mtrix <- array( c(1,2,3,4,5,6), dim=c(2,3) ) # creates a matrix m. 
 

# how to define a function

f <- function(a=10, b)
{    return (a+b) }
f(20,40)
 

# R: memory profiling

#install.packages("ggplot2"); #install.packages("pryr")
require(ggplot2); require(pryr)
 
sizes <- sapply(0:50, function(n) object_size(seq_len(n)))
plot(0:50, sizes, xlab = "Length", ylab = "Size (bytes)",  type = "s")
plot(0:50, sizes - 40, xlab = "Length",  ylab = "Bytes excluding overhead", type = "n")
abline(h = 0, col = "grey80")
abline(h = c(8, 16, 32, 48, 64, 128), col = "grey80")
abline(a = 0, b = 4, col = "grey90", lwd = 4)
lines(sizes - 40, type = "s")
x1 <- 1:1e6
y1 <- list(1:1e6, 1:1e6, 1:1e6)
object_size(x1); object_size(y1) 
object_size("banana"); object_size(rep("banana", 10))
 
mem_used(); mem_change()
mem_change(x <- 1:1e6); mem_change(rm(x)); mem_change(NULL)
gc(): R does garbage collection automatically
 

#Try to understand memory leaks: 

 
f1 <- function() {  x <- 1:1e6;   10}
mem_change(x <- f1())
object_size(x) 
 
f2 <- function() {  x <- 1:1e6;   a ~ b}
mem_change(y <- f2())
object_size(y)
 
f3 <- function() {  x <- 1:1e6;   function() 10}
mem_change(z <- f3())
object_size(z)

 

#R: memory profiling with lineprof

 
require(devtools); require(lineprof); #devtools::install_github("hadley/lineprof")
read_delim <- function(file, header = TRUE, sep = ",") {
first <- scan(file, what = character(1), nlines = 1,
    sep = sep, quiet = TRUE)
p <- length(first)
all <- scan(file, what = as.list(rep("character", p)),
    sep = sep, skip = if (header) 1 else 0, quiet = TRUE)
all[] <- lapply(all, type.convert, as.is = TRUE)
if (header) {  names(all) <- first
  } else {  names(all) <- paste0("V", seq_along(all))  }
as.data.frame(all)}
library(ggplot2)
write.csv(diamonds, "diamonds.csv", row.names = FALSE)
source("read-delim.r")
prof <- lineprof(read_delim("diamonds.csv"))
#shine(prof)
prof
 

#Speed up R code: Rprof

system.time({n=10^7; data1=rnorm(n); data2=rnorm(10^7);
for(i in 1:n-1){data1[i]=data1[i]+data1[i+1]; data1[i]=exp(data1[i]^2)};
data1=data1*data2; matrix.data1=matrix(data1,nrow=100,byrow=TRUE);
matrix.data2=matrix(data2,nrow=100,byrow=TRUE);
almost.final.result=matrix.data1%*%t(matrix.data2);
final.result=solve(almost.final.result)})[]
 
Rprof("profiling.out")
n=10^7; data1=rnorm(n); data2=rnorm(10^7);
for(i in 1:n-1){data1[i]=data1[i]+data1[i+1]; data1[i]=exp(data1[i]^2)};
data1=data1*data2; matrix.data1=matrix(data1,nrow=100,byrow=TRUE);
matrix.data2=matrix(data2,nrow=100,byrow=TRUE);
almost.final.result=matrix.data1%*%t(matrix.data2); 
final.result=solve(almost.final.result)
Rprof()
summaryRprof("profiling.out")
 
line_RProf_function=function() {
data1=rnorm(10^7)
for(i in 1:(length(data)-1)) {
 data1[i]=data1[i]+data1[i+1]
 data1[i]=exp(data1[i]^2)
}
data2=rnorm(10^7)
data1=data2*data1
matrix.data1=matrix(data1, nrow=100, byrow=TRUE)
matrix.data2=matrix(data2, nrow=100, byrow=TRUE)
almost.final.result=matrix.data1%*%t(matrix.data2)
final.result=solve(almost.final.result)
}
Rprof("profiling.out", line.profiling=TRUE)
line_RProf_function()
Rprof()
 
summaryRprof("profiling.out", lines="show")
 
#writing to disk
system.time({data=rnorm(10^7)
write.table(data, "data.txt") })[]
 
#Speed up R code:vectors Measuring time
 
ptm <- proc.time()
for (i in 1:50) mad(stats::runif(500))
proc.time() - ptm
   
system.time(for (i in 1:50) mad(stats::runif(500)))
 
 

# The following are excersies for understanding vector allocation, operations, and speed.

 
n=100
a=NULL
system.time(for (i in 1:n) a=c(a, sqrt(i)))
system.time(for (i in 1:n) a=c(a,stats::runif(500)))
Speed up R code:vectors
n=1000
a=NULL #  vector grows one by one
system.time(for (i in 1:n) {a=c(a,stats::runif(500))})
 
n=1000
a=vector(length=n) # vector declared, concatenation
system.time(for (i in 1:n) {print(i); a=c(a,stats::runif(500))})
system.time(for (i in 1:n) {a[i]=stats::runif(500)}) # assignmentSpeed up R code:vectors
n=1000
a=NULL
system.time(for (i in 1:n) {a=c(a,stats::runif(500))})
system.time(for (i in 1:n) {a=c(a,stats::runif(500))})
system.time(for (i in 1:n) {a=c(a,stats::runif(500))})
system.time(for (i in 1:n) {a=c(a,stats::runif(500))})
system.time(for (i in 1:n) {a=c(a,stats::runif(500))})
system.time(for (i in 1:n) {a=c(a,stats::runif(500))})up R code:vectors
n=1000000
a=vector(length=n)
n=10000000
system.time(for(i in 1:n) a[i]=sqrt(i)*2)
 
 
n=10000000
a=vector(length=n)
system.time(for(i in 1:n) a[i]=sqrt(i)*2)
system.time({for(i in 1:n) a[i]=sqrt(i); a=a*2})
Speed up R code:vectors
m=100000
n=10000
a=matrix(nrow=m,ncol=n)
rm(a)
system.time({a=matrix(nrow=m,ncol=n)})
system.time({vrow=vector(length=m)}) 
system.time({vcol=vector(length=n)})
 
system.time(for(i in 1:m){for(j in 1:n){a[i,j]=vrow[i]*vcol[j]}})
 
system.time(for(i in 1:m){for(j in 1:n){a[i,j]=vrow[i]*vcol[j]}}) 
 
system.time(for(i in 1:m){a[i,]=vrow[i]*vcol}) # vector operation, row wise assignments
 
system.time(for(i in 1:n){a[,i]=vcol[i]*vrow}) # vector operation, column wise ssignments 
   
 
system.time(for(i in 1:n){a[,i]=vcol[i]*vrow})
 
system.time(for(j in 1:n){for(i in 1:m){a[i,j]=vrow[i]*vcol[j]}})
 
 

# Speed up R code: vectors, matrices, and tables 

system.time({
a=NULL
for(i in 1:10000)a=c(a,i^2)})[]
 
system.time({
a=NULL
for(i in 1:1000000)a=c(a,i^2)})[]
 
system.time({a=vector(length=1000000)
for(i in 1:1000000)a[i]=i^2})[]
system.time({a=vector(length=1000000)
for(i in 1:n) a[i]=sin(i)*2*pi})[]
 
system.time({twopi=2*pi
a=vector(length=1000000)
for(i in 1:n) a[i]=sin(i)*twopi})[]
 
system.time({a=vector(length=1000000)
for(i in 1000000)a[i]=sin(i)
a=2*pi*a})[]
system.time({n=100000; y=rnorm(n);  w=vector(length=n); 
for(i in 1:n)w[i]=sum(y[1:i]<y[i])})[]
 
system.time({n=100000; y=rnorm(n); 
w=matrix(1:n,nrow=n,ncol=1)
w=apply(w,1,function(x) sum(y[1:x]<y[x]))})[]
 

# exercise for compiled c to R function up R: c functions with R 

ksmooth1 <- function(data, xpts, h) {
dens <- double(length(xpts))
n <- length(data)
for(i in 1:length(xpts)) {
ksum <- 0
for(j in 1:length(data)) {
d <- xpts[i] - data[j]
ksum <- ksum + dnorm(d / h)}  
dens[i] <- ksum / (n * h)}
return(dens)}
data=rchisq(500, 3)
xpts=seq(0, 10, length=10000)
h=0.75
system.time({fx_estimated=ksmooth1(data, xpts, h)})[[3]]
 

# The following is a c function that does the same thing and that can be compiled to a R library  

#include <R.h>
#include <Rmath.h>
void kernel_smooth(double *data, int *n, double *xpts, 
int *nxpts, double *h, double *result){
    int i, j; double d, ksum; 
    for(i=0; i < *nxpts; i++){
        ksum - 0;
        for (j=0; j < *n; j++){
            d = xpts[i] - data[j];
            ksum += dnorm(d / *h, 0, 1, 0);
        }
        result[i] = ksum / ((*n) * (*h));
    }
}
 

# The following assumes that you are at a terminal, instead of within a R session. It shows how to compile a c functoin and make it

# to a R library. Make sure you have the R module loaded. 

 
R CMD SHLIB ksmooth1.c 
 

# do the following in the directory where ksmooth1.c is located. 

 
ls ksmooth1.*
ksmooth1.c  ksmooth1.o ksmooth1.so
 

# run the following in a R session and notice 

# that it is almost 50 times faster! Speed up R: c functions with R 

dyn.load("ksmooth1.so")
ksmooth2 <- function(data, xpts, h) {
n <- length(data)
nxpts <- length(xpts)
dens <- .C("kernel_smooth",
as.double(data),
as.integer(n),
as.double(xpts),
as.integer(nxpts),
as.double(h),
result = double(length(xpts)))
return(dens[[6]])}
data=rchisq(500, 3)
xpts=seq(0, 10, length=10000)
h=0.75
system.time({result=ksmooth2(data, xpts, h)})[[3]]
 

# the following is for the open MP math libraries 

library(pnmath)
ls(pos="package:pnmath")
getNumProcs()
getNumPnmathThreads()
system.time({A=matrix(1:10^9,nrow=1000); B=tan(sin(cos(tan(A))))})[]
 
system.time({A=matrix(1:10^9,nrow=1000); B=tan(sin(cos(tan(A))))})[]
 

# examples for doMC  

require(doMC)
Loading required package: doMC
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel
# registerDoMC() # Default is 2 cores
# registerDoMC(32) # For example, register 32 cores
registerDoMC(8) # use 8 cores 
# getDoParWorkers() # Check number of registered cores
# registerDoSEQ() # For sequential, also necessary when 
# changing # of cores before registerDoMC()
getDoParWorkers()
 

# R in parallel: foreach with combine option

library(foreach) 
x <- foreach(a=1:1000, b=rep(10, 2)) %do% {a + b}
 
m <- matrix(rnorm(9), 3, 3)
foreach(i=1:ncol(m), .combine=c) %do%  mean(m[,i])
 
d <- data.frame(x=1:10, y=rnorm(10))
s <- foreach(d=iter(d, by='row'), .combine=rbind) %dopar% d
Identical(d, s)
 
library(iterators)
 a <- matrix(1:16, 4, 4)
 b <- t(a)
 foreach(b=iter(b, by='col'), .combine=cbind) %dopar%   (a %*% b)
 
qsort <- function(x) {
    n <- length(x)
    if (n == 0) {         x       } 
    else {p <- sample(n, 1)
       smaller <- foreach(y=x[-p], .combine=c) %:% when(y <= x[p]) %do% y
       larger  <- foreach(y=x[-p], .combine=c) %:% when(y >  x[p]) %do% y
       c(qsort(smaller), x[p], qsort(larger)) }}
qsort(runif(12))

 

#R in parallel: foreach and combine

x <- foreach(i=1:4, .combine='cfun', .multicombine=TRUE) %do% rnorm(4)
foreach(i = 1:3) %do% sqrt(i)
x <- foreach(i=1:3, .combine='c') %do% exp(i)
x <- foreach(i=1:4, .combine='cbind') %do% rnorm(4)
x
 
library(iterators)
x <- foreach(a=irnorm(4, count=4), .combine='cbind') %do% a; x

 

#R in parallel: combine option

x <- iris[which(iris[,5] != "setosa"), c(1,5)]
trials <- 10000
ptime <- system.time({
r <- foreach(icount(trials), .combine=cbind) %dopar% {
ind <- sample(100, 100, replace=TRUE)
result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
coefficients(result1)}})[3]
ptime

#R in parallel: parallel

require(parallel, quiet=TRUE)
detectCores() 
n.cores <- detectCores()
pause <- function(i) {Sys.sleep(i)}
 
system.time(lapply(1:10, pause(0.25)))
system.time(mclapply(1:10, pause(0.25), mc.cores = n.cores))
 

#R in parallel: mcparallel()

require(parallel, quiet=TRUE)
detectCores() 
n.cores <- detectCores()
system.time({
   a1 <- mcparallel(1:5)
   a2 <- mcparallel(1:10)
   a3 <- mcparallel(1:15)
   a4 <- mcparallel(1:20)
   res <- mccollect(list(a1,a2,a3,a4)) })
 
  
getDoParRegistered()
getDoParWorkers()
registerDoMC()
getDoParRegistered()
getDoParWorkers()
registerDoMC(32)
getDoParWorkers()

 

# R in parallel: doMC 

library(doMC) #foreach, iterators, parallel
options(cores = 4)
registerDoMC()
system.time({data=vector(length=1000); data=foreach(i=1:1000) %dopar% {sqrt(1/(sin(i))^2)-sum(rnorm(10^6))};data=unlist(data) })[]
 
options(cores = 1)
registerDoMC()
system.time({data=vector(length=1000); data=foreach(i=1:1000) %dopar% {sqrt(1/(sin(i))^2)-sum(rnorm(10^6))};data=unlist(data) })[]
 

# example of random forest and parallel: 

x <- matrix(runif(500), 100)
y <- gl(2, 50)
library(randomForest)
rf <- foreach(ntree=rep(250, 4), .combine=combine) %do%
randomForest(x, y, ntree=ntree)
rf
 

# run it in parallel

library(doMC,quiet=TRUE)
detectCores()
registerDoMC()
getDoParRegistered()
getDoParWorkers()
rf <- foreach(ntree=rep(250, 4), .combine=combine, .packages='randomForest') %dopar%
randomForest(x, y, ntree=ntree)
rf
 

# R in parallel: user defined applyKernel fucntions.

applyKernel <- function(newX, FUN, d2, d.call, dn.call=NULL, ...) {
ans <- vector("list", d2) 
for(i in 1:d2) {
tmp <- FUN(array(newX[,i], d.call, dn.call), ...)
if(!is.null(tmp)) ans[[i]] <- tmp}
ans}
applyKernel(matrix(1:16, 4), mean, 4, 4)
 
applyKernel <- function(newX, FUN, d2, d.call, dn.call=NULL, ...) {
foreach(i=1:d2) %dopar% { # iterate over dimensions
FUN(array(newX[,i], d.call, dn.call), ...)}}
applyKernel(matrix(1:16, 4), mean, 4, 4)
 
applyKernel <- function(newX, FUN, d2, d.call, dn.call=NULL, ...) {
foreach(x=iter(newX,by='col')) %dopar% { # use iter
FUN(array(x,d.call, dn.call),...)}}
applyKernel(matrix(1:16, 4), mean, 4, 4)
 
applyKernel <- function(newX, FUN, d2, d.call, dn.call=NULL, ...) {
foreach(x=iblkcol(newX, 3), .combine='c', .packages='foreach') %dopar% {
foreach(i=1:ncol(x)) %do% FUN(array(x[,i], d.call, dn.call), ...)}}
iblkcol <- function(newX, nc){}
applyKernel(matrix(1:16, 4), mean, 4, 4)
 

# R in parallel: Rmpi

require(Rmpi)
#help(mpi.apply)
x=c(1,7:10)
mpi.spawn.Rslaves(nslaves=5)
mpi.apply(x,runif)
 
meanx=1:5
meanx
mpi.apply(meanx,rnorm,n=2,sd=4)
 
mpi.hostinfo()
slave.hostinfo()
mpi.comm.size()
mpi.comm.rank()
paste("I am",mpi.comm.rank(),"of",mpi.comm.size())
mpi.remote.exec(paste("I am", mpi.comm.rank(),"of",mpi.comm.size()))
mpi.close.Rslaves()

 

# Communicating data/cmd and executing commands

library(Rmpi)
mpi.spawn.Rslaves(nslaves=5)
 
mpi.bcast.cmd(id<-mpi.comm.rank())
mpi.bcast.cmd(x<-paste("I am no.", id))
mpi.bcast.cmd(mpi.gather.Robj(x))
x<-"I am a master"
mpi.gather.Robj(x)
mpi.remote.exec(x) # note the difference between gather and remote execution
mpi.close.Rslaves()
mpi.quit()

 

# Send data/comands and execute commands

mpi.spawn.Rslaves(nslaves=3)
x<-c("fruits","apple","banana","orange")
mpi.bcast.cmd(x<-mpi.scatter.Robj(x))
x<-mpi.scatter.Robj(x)
mpi.remote.exec(x); x
mpi.bcast.cmd(x<-mpi.allgather.Robj(x)); x<-mpi.allgather.Robj(x)
mpi.remote.exec(x); x
mpi.close.Rslaves()
 

 

# Communicating data/cmd and executing commands

mpi.spawn.Rslaves(nslaves=3)
red<-function(option="sum"){
  mpi.reduce(x,type=2,op=option)}
mpi.bcast.Robj2slave(red)
x<-c(1,2,3,4)
mpi.bcast.cmd(x<-mpi.scatter.Robj(x)); x<-mpi.scatter.Robj(x)
mpi.remote.exec(x);  x
mpi.remote.exec(red("sum")); mpi.reduce(x,2,"sum")
x
mpi.close.Rslaves()
 

# Communicating data/cmd and executing commands

library(Rmpi)
mpi.spawn.Rslaves(nslaves=3)
srecv<-function(){
if(mpi.comm.rank()==2)
x<-mpi.recv(x,1,0,1,1)}
mpi.bcast.Robj2slave(srecv)
x<-as.integer(21.34)
x
mpi.send(x,1,2,1,1)
mpi.bcast.cmd(x<-integer(1))
mpi.remote.exec(x)
mpi.close.Rslaves()
 
# R in parallel doMPI
library(doMPI)
cl <- startMPIcluster(count=2)
registerDoMPI(cl)
closeCluster(cl)
mpi.finalize()
mpi.quit()
 
x <- foreach(i=1:3, .combine="c") %dopar% sqrt(i)
 
x <- foreach(seed=c(7, 11, 13), .combine="cbind") %dopar% { 
set.seed(seed) ; rnorm(3) }

 

# serial version of code

x <- seq(-8, 8, by=0.5) 
v <- foreach(y=x, .combine="cbind") %dopar% { 
r <- sqrt(x^2 + y^2) +.Machine$double.eps ; sin(r) / r } 
persp(x, x, v)

# parallel version of code 

x <- seq(-8, 8, by=0.5) 
sinc <- function(y) {r <- sqrt(x^2 + y^2) + .Machine$double.eps; sin(r)/r}
r <- lapply(x, sinc)
v <- do.call("cbind", r)
persp(x, x, v)

 

# R in parallel: doMPI/foreach/.csv processing files. You need to have the files in the directory!!!!!!

ifiles <- list.files(pattern="\\.csv$") 
ofiles <- sub("\\.csv$", ".png", ifiles)
foreach(i=ifiles, o=ofiles, .packages="randomForest") %dopar% { 
d <- read.csv(i) 
rf <- randomForest(Species~., data=d, proximity=TRUE) 
png(filename=o) 
MDSplot(rf, d$Species) 
dev.off() 
NULL
}
 

# R in parallel: this code shows how to pass messages. If you are interested, you can read an understand it. Running it will not work though. 

message.pass <- function() {
 # Get each slave's rank
 myrank <- mpi.comm.rank()
 
 otherrank <- (myrank+1) %% mpi.comm.size()
 otherrank <- otherrank + (otherrank==0)
 
 # Send a message to the partner
 mpi.send.Robj(paste("I am rank",myrank), dest=otherrank, tag=myrank)
 # Receive the message & tag (includes source)
 recv.msg <- mpi.recv.Robj(mpi.any.source(),mpi.any.tag())
 recv.tag <- mpi.get.sourcetag()
 paste("Received message '",recv.msg,"' from process ",recv.tag[1],".
\n",sep="")
 
 

# R in parallel: Rmpi and snow

library(snow) 
ls(pos = "package:snow")
 
cl <- makeSOCKcluster(c("localhost","localhost"))
clusterApply(cl, 1:2, get("+"), 3)
clusterEvalQ(cl, library(boot))
 
x<-1,clusterExport(cl, "x")
clusterCall(cl, function(y) x + y, 2) 
 
library(Rmpi); library(snow);
cluster <- makeMPIcluster(4)
sayhello <- function() { info <- Sys.info()[c("nodename", "machine")]
paste("Hello from", info[1], "with CPU type", info[2]) }
names <- clusterCall(cluster, sayhello)
print(unlist(names))
parallelSum <- function(m, n){ A <- matrix(rnorm(m*n), nrow = m, ncol = n)
row.sums <- parApply(cluster, A, 1, sum)
print(sum(row.sums))}
parallelSum(500, 500)
stopCluster(cluster)
 

#R in parallel: Rmpi and snow

library(snow); library(Rmpi)
nclus=6; cl <-makeMPIcluster(nclus)
n=2000; mc=2000000
mcperclus=round(mc/nclus)
x=matrix(runif(n),n,1)
x=cbind(1,x)
#function of 2xn matrix x, n, nmc:  number of # columns per core
estimatebetas=function(x,n,nmc){
e=matrix(rnorm(n*nmc),n,nmc)
y=1+rep(x[,2],nmc)+e
solve(t(x)%*%x)%*%t(x)%*%y }
ptim=proc.time()[3]
b=clusterCall(cl,estimatebetas,x=x,n=n,nmc=mcperclus)
b=cbind(b[[1]],b[[2]],b[[3]],b[[4]],b[[5]],b[[6]])
tim=proc.time()[3]-ptim
cat(dim(b)," ",apply(b,1,mean),"\n")
cat(mc," iterations with sample sizes of ",n," took ",tim,"seconds.\n")
stopCluster(cl)
 

# The following is a Rmpi code for single value decompostion. It uses slave/master model. The master and slaves start a loop and sending/receiving messages from/to masters/slaves. Most of the R parallel code you may be writing may not be more complicated than this. So you you understand the structure and order of executions of this piece of code, probably you would be able to write very complicated R code with Rmpi!. You can run the following in an interactive session just by copy and paste. There is also an example in the directory where you copied the files and you can use that example to submit to the queue on Sherlock. That example uses 2 nodes and 2 cores on each node. Remeber that the script there uses the reservation we have reserved for the workshop. So after the workshop you need to modify the reservation part of the submission scirpt. 

 
library(Matrix)
library(irlba)
library("Rmpi")
 
nslvs <- 2
mpi.spawn.Rslaves(nslaves=nslvs)
 
if (mpi.comm.size() < 2) {
   print("More slave processes are required.")
   mpi.quit()}
 
.Last <- function(){
      if (is.loaded("mpi_initialize")){
         if (mpi.comm.size(1) > 0){
            print("Please use mpi.close.Rslaves() to close slaves.")
            mpi.close.Rslaves() }
         print("Please use mpi.quit() to quit R")
         .Call("mpi_finalize")}}
 
n=500 # dim of matrices
num.sv = n/2
m = 20  # number of matrices
set.seed(1234)
 
rand1 <- runif(m*n); rand2 <- runif(m*n)
x1 <- apply(cbind(rand1,rand2),1,min)
x2 <- abs(rand1-rand2)
x3 <- 1-x1-x2
 
# Sample 3 column indices from vector 1:n for # each set of weights (m*n)
col.ind.matrix <- sapply(1:(m*n),function(i) sample(1:n,size=3))
col.ind.asvector <- matrix(col.ind.matrix,nrow=(3*m*n))
row.ind.matrix <- sapply(1:n,function(x) rep(x,3)) # The row indices for one matrix (dim # n x 1)
row.ind.asvector <- matrix(row.ind.matrix,nrow=(3*n))
weights.asvector <- matrix(rbind(x1,x2,x3),nrow=(3*m*n))
thedata = data.frame(weights=weights.asvector,row=row.ind.asvector,col=col.ind.asvector)
 
partialsvd <- function() {
# sent:  1=ready_for_task, 2=done_task, #3=exiting 
# received:  1=task, 2=done_tasks
      junk <- 0;       done <- 0
     while (done != 1) {# Signal being ready to # receive a new task
            mpi.send.Robj(junk,0,1)
            task <- mpi.recv.Robj(mpi.any.source(),mpi.any.tag()) # Receive a task
            task_info <- mpi.get.sourcetag()
            tag <- task_info[2]
            if (tag == 1) {
               matrixNumber <- task$matrixNumber
               # Construct the corresponding sparse matrix
               col.ind.temp = thedata$col[(3*n*(matrixNumber-1)+1):(3*n*matrixNumber)]
               weight.ind.temp = thedata$weights[(3*n*(matrixNumber-1)+1):(3*n*matrixNumber)]
 
# Generate row indices
row.ind.temp = matrix(sapply(1:n,function(x) rep(x,3)),nrow=(3*n))
               weight.mat= sparseMatrix(i=row.ind.temp,j=col.ind.temp,x=weight.ind.temp)
               result.svd = irlba(weight.mat,nu=num.sv)
               # Send a results message back to the master
               results <- list(result=result.svd,matrixNumber=matrixNumber)
               mpi.send.Robj(results,0,2)}
            else if (tag == 2) {done <- 1 } # We'll just ignore any unknown messages }
            mpi.send.Robj(junk,0,3)}
mpi.bcast.Robj2slave(thedata)
mpi.bcast.Robj2slave(n)
mpi.bcast.Robj2slave(num.sv)
mpi.bcast.cmd(library("Matrix")) # for using sparse matrices
mpi.bcast.cmd(library("irlba")) # for fast computation of singular values
mpi.bcast.Robj2slave(partialsvd) # Send the function to the slaves
mpi.bcast.cmd(partialsvd()) # Call the function in all the slaves
 
# Create task list
tasks <- vector('list')
for (i in 1:m) {    tasks[[i]] <- list(matrixNumber=i) }
# Create data structures to store the results
svd.result = list()
weightMat = list()
junk <- 0
closed_slaves <- 0
n_slaves <- mpi.comm.size()-1
t1.mpi <- proc.time()
 
while (closed_slaves < n_slaves) { # Receive a message from a slave
      message <- mpi.recv.Robj(mpi.any.source(),mpi.any.tag())
      message_info <- mpi.get.sourcetag()
      slave_id <- message_info[1] ;  tag <- message_info[2]
      slave_id; tag
      if (tag == 1) {# slave ready for a task. Give it the next task, or tell it tasks are done.
         if (length(tasks) > 0) {# Send a task, and then remove it from the task list
            mpi.send.Robj(tasks[[1]], slave_id, 1);
            tasks[[1]] <- NULL    }
         else {mpi.send.Robj(junk, slave_id, 2) }
      } else if (tag == 2) {# Do something with the results. Store in the data structure
           matrixNumber <- message$matrixNumber
           svd.result[[matrixNumber]] <- message$result
      } else if (tag == 3) {closed_slaves <- closed_slaves + 1 }
      closed_slaves
      n_slave}
 
mpi.close.Rslaves()
t.mpi <- proc.time()-t1.mpi # system time for mpi implementation
 
# Extract the singular values
singular.vals = sapply(svd.result,with,d)
 
# Plot the approximate singular values found for the first 5 matrices
x = 1:(n/2)
matrixNumber = 1
plot(x=x,y=singular.vals[,1],ylab="approx. singular values", main=paste(n/2,"largest approximate singular values of matrices 1 to 5"))
for (matrixNumber in 2:5){
    points(x=singular.vals[,matrixNumber],col=matrixNumber)}
 

### The end !!!