目录
import numpy as np >>> import numpy as np >>> group = np.array([[1, 1.1], [1,1], [0,0], [0,0.1]]) >>> group array([[ 1. , 1.1], [ 1. , 1. ], [ 0. , 0. ], [ 0. , 0.1]]) >>> groupMat = np.mat(group) >>> groupMat matrix([[ 1. , 1.1], [ 1. , 1. ], [ 0. , 0. ], [ 0. , 0.1]]) sIdx = groupMat[:,0] == 1
numpy筛选符合条件的索引
np.where() np.nonzero()
mat0 = groupMat[np.nonzero(groupMat[:,0] == 1)[0],:]
mat0 = dataSet[np.nonzero(dataSet[:, feature] > value)[0], :][0] mat1 = dataSet[np.nonzero(dataSet[:, feature]) <= value)[0], :][0] 在上面两式的末尾加[0]与不加有什么区别?
fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(mat0[:,0].flatten().A[0], mat0[:,1].flatten().A[0], s=8, c=’red’) plt.show()
np.array的arr.shape, arr.shape[0]与np.shape()函数的关系
np.tile函数
import numpy
numpy.tile([0,0],5)#在列方向上重复[0,0]5次,默认行1次
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
numpy.tile([0,0],(1,1))#在列方向上重复[0,0]1次,行1次
array([[0, 0]])
numpy.tile([0,0],(2,1))#在列方向上重复[0,0]1次,行2次
array([[0, 0],
[0, 0]])
numpy.tile([0,0],(3,1))
array([[0, 0],
[0, 0],
[0, 0]])
numpy.tile([0,0],(1,3))#在列方向上重复[0,0]3次,行1次
array([[0, 0, 0, 0, 0, 0]])
numpy.tile([0,0],(2,3)) #在列方向上重复[0,0]3次,行2次</span>
array([[0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0]])
numpy sum函数,sort函数,argsort()函数, srtInd = xMat[:,1].argsort(0) xSort = xMat[srtInd][:,0,:]
python内置排序函数:sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
dataSetSize = dataSet.shape[0]
#计算距离
diffMat = tile(inX, (dataSetSize,1)) - dataSet
sqDiffMat = diffMat2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances0.5
sortedDistIndicies = distances.argsort()
classCount={}
#选取距离最小的k个点
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel,0) + 1
#排序
sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)
归一化 minVals = dataSet.min(0) maxVals = dataSet.max(0) ranges = maxVals - minVals normDataSet = zeros(shape(dataSet)) m = dataSet.shape[0] normDataSet = dataSet - tile(minVals, (m,1)) normDataSet = normDataSet/tile(ranges, (m,1)) #element wise divide return normDataSet, ranges, minVals
np.sum函数
np.zeros()函数,np.zeros((m,1))参数是一个元组吗?
numpy matrix的 mat.copy()函数。使用copy函数会重新分配内存吗?
numpy 矩阵过滤功能 alphas[alphas>0] numpy数组过滤问题
def stumpClassify(dataMatrix, dimen, threshVal, threshIneq): retArray = ones((shape(dataMatrix)[0],1)) #生成二维数组,有shape(dataMatrix)[0]个元素,每个元素为一个list,有一个元素 if threshIneq == ‘lt’: retArray[dataMatrix[:,dimen] <= threshVal] = -1.0 else: retArray[dataMatrix[:,dimen] > threshVal] -1.0 return retArray
errArr[predictedVals == labelMat] = 0
np.nonzero()函数 np.multiply()
np.mat(classLabels).transpose()与mat.T
mat.A是什么意思?
np.exp(), np.log(), np.min(), np.max()
np.ones()和np.zeros()一样,都是返回numpy.ndarray类型,而不是numpy.matrix类型,所以不能用flatten().A[0]这种
expon = multiply(-1alphamat(classLabels).T, classEst) D = multiply(D, exp(expon)) D = D/D.sum() ufunc ‘multiply’ did not contain a loop with signature matching types dtype(‘S32’) dtype(‘S32’) dtype(‘S32’) 这个错误说明其中一个数组为字符串,不能直接对应元素相乘
np中matrix的mat.argsort()
rangeMin = dataMatrix[:,i].min(); rangeMax = dataMatrix[:,i].max();
np.max(), np.min()
np.inf
np.sign()
err = np.mat(np.ones((m,1)))
numpy linalg.det(xTx)计算行列式
numpy linalg.solve()函数 ws = xTx.I * (xMat.T * yMat)等价于ws = linalg.solve(xTx, xMat.T * yMatT)
numpy corrcoef方法 corrcoef(yHat.T, yMat) corrcoef(yHat, testMat[:,1], rowvar=0)[0, 1]
yHat.T.A
np.mean
np.var
numpy如何计算方差
numpy regularize函数 xMat = np.mat(xArr) yMat = np.mat(yArr).T yMean = np.mean(yMat, 0) yMat = yMat - yMean xMat = np.regularize(xMat)
map函数是什么意思? fltLine = np.map(float, curLine)
dataSet[:, -1].T.tolist()[0]
np.power
np.eye函数
np.cov函数 covMat = np.cov(meanRemoved, rowvar=0) eigVals, eigVects = linalg.eig(np.mat(covMat))计算特征向量 eigValInd = np.argsort(eigVals) eigValsInd = np.argsort[:,-(topNfeat+1):-1] #list的用法倒序 redEigVects = eigVects[:, eigValInd]
meanVal = np.mean(datMat[np.nonzero(~np.isnan(datMat[:,i].A))[0], i]) datMat[np.nonzero(np.isnan(datMat[:,i].A))[0], i] = meanVal
numpy linalg.svd函数 U, Sigma, VT = np.linalg.svd(corMat) 返回的Simga是一个array,只包含原对角矩阵主对角线上的元素 U, Sigma, VT = np.linalg.svd(dataMat) Sig4 = np.mat(np.eye(4) * Sigma[:4]) #建立对角矩阵
from numpy import linalg as la la.norm(inA - inB)
linalg.norm函数可以计算范数
mat.A与mat.getA()相同
np.logical_and函数 overLap = np.nonzero(np.logical_and(dataMat[:, item].A > 0, dataMat[:, j] > 0))[0]