spark MLlib之零 构建通用的解析矩阵程序
在使用spark MLlib时,有时候需要使用到一些基础的矩阵(向量),例如:全零矩阵,全一矩阵;以及矩阵之间的运算操作。这里整理了一些常用的矩阵操作方法:
矩阵:
package utils
import java.util.Random
/**
* 密集矩阵,用于封装模型参数
*/
class DenseMatrix(rowNum: Int, columnNum: Int) extends Serializable{
var matrix = Array.ofDim[Double](rowNum, columnNum)
def rows(): Int = {
rowNum
}
def columns(): Int = {
columnNum
}
def apply(i: Int): Array[Double] = {
matrix(i)
}
/**
* 构造0矩阵
*/
def zeros(): DenseMatrix = {
for (i <- 0 until rowNum) {
for (j <- 0 until columnNum) {
matrix(i)(j) = 0
}
}
this
}
/**
* 随机初始化矩阵的值
*/
def rand(): DenseMatrix = {
val rand = new Random(42)
for (i <- 0 until rowNum) {
for (j <- 0 until columnNum) {
matrix(i)(j) = rand.nextDouble
}
}
this
}
def set(i: Int, j: Int, value: Double) {
matrix(i)(j) = value
}
def get(i: Int, j: Int): Double = {
matrix(i)(j)
}
def +(scalar: Double): DenseMatrix = {
for (i <- 0 until rowNum) yield {
for (j <- 0 until columnNum) yield {
matrix(i)(j) += scalar
}
}
this
}
def -(scalar: Double): DenseMatrix = {
this - scalar
}
def +(other: DenseMatrix): DenseMatrix = {
for (i <- 0 until rowNum) yield {
for (j <- 0 until columnNum) yield {
matrix(i)(j) += other(i)(j)
}
}
this
}
def -(other: DenseMatrix): DenseMatrix = {
this + (other * (-1))
}
def *(scalar: Double): DenseMatrix = {
for (i <- 0 until rowNum) yield {
for (j <- 0 until columnNum) yield {
matrix(i)(j) *= scalar
}
}
this
}
}
object DenseMatrix {
def main(args: Array[String]): Unit = {}
}
向量:
package utils
import scala.collection.mutable.HashMap
import org.apache.spark.util.Vector
/**
* 定义一个基于HashMap的稀疏向量
*/
class SparserVector(dimNum: Int) {
var elements = new HashMap[Int, Double]
def insert(index: Int, value: Double) {
elements += index -> value;
}
def *(scale: Double): Vector = {
var x = new Array[Double](dimNum)
elements.keySet.foreach(k => x(k) = scale * elements.get(k).get);
Vector(x)
}
}
object SparserVector {
def main(args: Array[String]): Unit = {}
}