## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library("gpuMagic") ## ----------------------------------------------------------------------------- getDeviceList() ## ----------------------------------------------------------------------------- getDeviceInfo(1) setDevice(1) ## ----------------------------------------------------------------------------- gpuMagic.getMemUsage() ## ----------------------------------------------------------------------------- #C=A%*%B #Each time the function will compute a column of C matMul<-function(ind,A,B){ C=A%*%B[,ind] return(C) } ## ----------------------------------------------------------------------------- n=10 m=50 k=100 A=matrix(runif(n*m),n,m) B=matrix(runif(m*k),m,k) C_sapply=sapply(1:k,matMul,A,B) #The internal matrix operation function in R C_internel=A%*%B #Compute error max(abs(C_sapply-C_internel)) ## ----------------------------------------------------------------------------- C_gpu=gpuSapply(1:k,matMul,A,B) #Compute error max(abs(C_gpu-C_internel)) ## ----------------------------------------------------------------------------- n=1000 m=1000 k=1000 A=matrix(runif(n*m),n,m) B=matrix(runif(n*m),m,k) system.time( gpuSapply(1:k,matMul,A,B) ) system.time( A%*%B ) ## ----eval = FALSE------------------------------------------------------------- # n=sample(1:10,1) # A=matrix(0,n,n) ## ----------------------------------------------------------------------------- dynamic_example1<-function(ind,A,n){ B=matrix(0,n) C=A+B return(C) } n=10 A=matrix(runif(n),n) #The code below wouldn't work, #the value of n is unknown in the compilation stage #res_device=gpuSapply(1,dynamic_example1,A,n) #works, the value of n is knwown as a macro in the compilation stage res_device=gpuSapply(1,dynamic_example1,A,n,.macroParms = c("n")) ## ----------------------------------------------------------------------------- #Perform the column sum of the first n elements in each column of A dynamic_example2<-function(ind,A,n){ #Dynamically subsetting the first n elements in the ind column of the matrix A #Almost equivalent to A_dyn=A[1:n,ind] #The size of A_dyn is unknown in the compilation stage since the value of n is unknown A_dyn=subRef(A,1:n,ind) C=sum(A_dyn) return(C) } n=10 M=100 A=matrix(runif(M),M,M) res_device=gpuSapply(1:M,dynamic_example2,A,n) res_cpu=sapply(1:M,dynamic_example2,A,n) #check error range(res_device-res_cpu) ## ----------------------------------------------------------------------------- dynamic_return<-function(ind,A,n){ #Dynamically subsetting the first n elements in the ind column of the matrix A #equivalent to A_dyn=A[1:n,ind] A_dyn=subRef(A,1:n,ind) #return A_dyn, the size is unknown return(A_dyn) #The largest length of the variable A_dyn tmp=matrix(0,nrow(A)) #tell the compiler the return size should be at least the same as the variable tmp return(tmp) } n=10 M=20 A=matrix(runif(M*M),M,M) #Expect a warning message: Undetermined return size has been found suppressWarnings({ res_device=gpuSapply(1:M,dynamic_return,A,n) }) #retrieve the result (The first n rows) res_device=res_device[1:n,] #check error range(res_device-A[1:n,]) ## ----------------------------------------------------------------------------- largestK<-function(ind,A,k){ #get a column of A A_col=subRef(A,,ind) #the largest k values and indices of A_col largest_value=matrix(0,k) largest_ind=matrix(0,k) #loop over A_col to find the largest k values for(i in 1:length(A_col)){ for(j in 1:k){ if(A_col[i]>largest_value[j]){ if(j!=k){ #Backward assignment to avoid self assignment problem #(see section: Important note) ind_src=(k-1):j largest_value[ind_src+1]=largest_value[ind_src] largest_ind[ind_src+1]=largest_ind[ind_src] } largest_value[j]=A_col[i] largest_ind[j]=i break } } } return_nocpy(largest_ind) } N=1000 M=1000 k=10 A=matrix(runif(N*M),N,M) #Warmup warm_res=gpuSapply(1:M,largestK,A=A,k=k,.macroParms = "k") warm_res=sapply(1:M,largestK,A=A,k=k) #count the time system.time({res_device=gpuSapply(1:M,largestK,A=A,k=k,.macroParms = "k")}) system.time({res_cpu=sapply(1:M,largestK,A=A,k=k)}) #Check the error range(res_device-res_cpu) ## ----------------------------------------------------------------------------- computeLoss<-function(ind,x,y,parms){ #Find the parameters for the thread parm=parms[ind,] #Compute y hat, use no-copy transpose y_hat=x%*%t_nocpy(parm) #absolute loss value(L1 loss) loss=sum(abs(y-y_hat)) return(loss) } #Sample size n=1000 #Number of parameters p=2 beta=c(2,3) #Generate data x=matrix(runif(n*p),n,p) e=runif(n) y=x%*%beta+e #Brute-force search #The seaching range and precision search_range=seq(0,10,0.1) parms=expand.grid(search_range,search_range) #Warmup warm_res=gpuSapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms) warm_res=sapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms) #count the time system.time({res_device=gpuSapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)}) system.time({res_cpu=sapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)}) #print out the parameters which minimize the loss function parms[which.min(res_device),] parms[which.min(res_cpu),] ## ----------------------------------------------------------------------------- version sessionInfo()