Mark Bishop

Analytical Chemistry and

Signal Acquisition/Analysis



/**
 * Copyright 2013 Mark Bishop
 * Adapted from example code provided by JCuda.org.
 *  This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version. 
 
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details: http://www.gnu.org/licenses
 
    The author makes no warranty for the accuracy, completeness, safety, or usefulness
    of any information provided and does not represent that its use would not infringe
    privately owned right.
 */
 
import jcuda.Pointer;
import jcuda.Sizeof;
import jcuda.jcublas.JCublas;
import jcuda.runtime.JCuda;
 
/*
 * Cuda uses column major, vector representations of matrices.
*/
 
public class GpuMatrixOps {
 
	public static void SeedGpu() {
		Pointer pointer = new Pointer();
		JCuda.cudaMalloc(pointer, 4);
		JCuda.cudaFree(pointer);
	}
 
	/**
	 * 
	 * @param A
	 *            Vector representation of A(m * n)
	 * @param B
	 *            Vector representation of B(m * n)
	 * @param m
	 * @param n
	 * @return C Vector representation of C(m + n) = A + B
	 */
	public static float[] GpuAdd(float[] A, float[] B, int m, int n) {
		float[] C = new float[n * m];
		float alpha = 1.0f;
		Pointer d_A = new Pointer();
		Pointer d_B = new Pointer();
		/* Initialize JCublas */
		JCublas.cublasInit();
		/* Allocate device memory */
		JCublas.cublasAlloc(m * n, Sizeof.FLOAT, d_A);
		JCublas.cublasAlloc(m * n, Sizeof.FLOAT, d_B);
		/* Initialize the device matrices with the host matrices */
		JCublas.cublasSetVector(m * n, Sizeof.FLOAT, Pointer.to(A), 1, d_A, 1);
		JCublas.cublasSetVector(m * n, Sizeof.FLOAT, Pointer.to(B), 1, d_B, 1);
		/* Do it */
		JCublas.cublasSaxpy(m * n, alpha, d_A, 1, d_B, 1);
		/* Read the result back */
		JCublas.cublasGetVector(m * n, Sizeof.FLOAT, d_B, 1, Pointer.to(C), 1);
		/* Free device memory. */
		JCublas.cublasFree(d_A);
		JCublas.cublasFree(d_B);
		/* Shut down JCublas */
		JCublas.cublasShutdown();
		return C;
	}
 
	/**
	 * 
	 * @param A
	 *            float[][] representation of A(m,n)
	 * @param B
	 *            float[][] representation of B(m,n)
	 * @return C float[][] representation of C(m,n) = A + B
	 */
	public static float[][] GpuAdd(float[][] A, float[][] B) {
		int m = A.length;
		int n = A[0].length;
		float[] h_A = Utilities.To1D(A);
		float[] h_B = Utilities.To1D(B);
		float[] h_C = GpuAdd(h_A, h_B, m, n);
		return Utilities.To2D(h_C, m, n);
	}
 
	/**
	 * 
	 * @param A
	 *            Vector representation of A(m * P)
	 * @param B
	 *            Vector representation of B(p * n)
	 * @param m 
	 * @param n
	 * @param p
	 * @return Vector representation of C(m * n)
	 */
	public static float[] GpuMultiply(float[] A, float[] B, int m, int n, int p) {
		float[] C = new float[m * n];
		Pointer d_A = new Pointer();
		Pointer d_B = new Pointer();
		Pointer d_C = new Pointer();
		float alpha = 1.0f;
		float beta = 0.0f;
		JCublas.cublasInit();
		JCublas.cublasAlloc(m * p, Sizeof.FLOAT, d_A);
		JCublas.cublasAlloc(p * n, Sizeof.FLOAT, d_B);
		JCublas.cublasAlloc(m * n, Sizeof.FLOAT, d_C);
		JCublas.cublasSetVector(m * p, Sizeof.FLOAT, Pointer.to(A), 1, d_A, 1);
		JCublas.cublasSetVector(p * n, Sizeof.FLOAT, Pointer.to(B), 1, d_B, 1);
		JCublas.cublasSetVector(m * n, Sizeof.FLOAT, Pointer.to(C), 1, d_C, 1);
		/*
		 * Cuda uses a column major vector representation of the matrix, so we
		 * have to submit AT & BT..
		 */
		JCublas.cublasSgemm('t', 't', m, n, p, alpha, d_A, p, d_B, n, beta,
				d_C, m);
 
		JCublas.cublasGetVector(m * n, Sizeof.FLOAT, d_C, 1, Pointer.to(C), 1);
		JCublas.cublasFree(d_A);
		JCublas.cublasFree(d_B);
		JCublas.cublasFree(d_C);
		JCublas.cublasShutdown();
		return C;
	}
 
	/**
	 * 
	 * @param A
	 *            Row major matrix as float[m][p]
	 * @param B
	 *            Row major matrix as float[p][n]
	 * @return C[m.n] = AB
	 */
	public static float[][] GpuMultiply(float[][] A, float[][] B) {
		int m = A.length;
		int n = B[0].length;
		int p = A[0].length;
		float[] h_A = Utilities.To1D(A);
		float[] h_B = Utilities.To1D(B);
		float[] h_C = GpuMultiply(h_A, h_B, m, n, p);
		return CpuMatrixOps.Transpose(Utilities.To2D(h_C, n, m));
	}
 
	/**
	 * 
	 * @param alpha scaler
	 * @param A Vector representation of A(m * n)
	 * @return alpha * A
	 */
	public static float[] Scale(float alpha, float[] A) {
		int mn = A.length;
		float[] alphaA = new float[mn];
 
		Pointer d_A = new Pointer();
		JCublas.cublasInit();
		JCublas.cublasAlloc(mn, Sizeof.FLOAT, d_A);
		JCublas.cublasSetVector(mn, Sizeof.FLOAT, Pointer.to(A), 1, d_A, 1);
 
		JCublas.cublasSscal(mn, alpha, d_A, 1);
 
		JCublas.cublasGetVector(mn, Sizeof.FLOAT, d_A, 1, Pointer.to(alphaA), 1);
		JCublas.cublasFree(d_A);
		JCublas.cublasShutdown();
 
		return alphaA;
	}
 
	/**
	 * 
	 * @param alpha scaler
	 * @param A A(m,n)
	 * @return scaler * A
	 */
	public static float[][] Scale(float alpha, float[][] A) {
		int m = A.length;
		int n = A[0].length;
		float[] h_A = Utilities.To1D(A);
		float[] h_B = Scale(alpha, h_A);
		return Utilities.To2D(h_B, m, n);
	}
 
}
 
 
 
ID may be exposed