Matplotlib学习笔记(一)

"Matplotlib学习笔记"

Posted by jhljx on January 28, 2018

目录

1. Pandas读取CSV
2. Pandas写入CSV

matplotlib画图无法显示中文 from pylab import *
mpl.rcParams[‘font.sans-serif’] = [‘SimHei’]

注意下面画图的方法 import matplolib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xMat[:,1].flatten().A[0], yMat[:,0].flatten().A[0], marker=’o’, s=10, c=’red’) #marker可以取’o’或者’^’ XCopy = xMat.copy() xCopy.sort(0) yHat = xCopy*ws ax.plot(xCopy[:,1], yHat) plt.show()

ax.plot(ridgeWeights)在画矩阵的时候按照什么规则来画?

import matplotlib.pyplot as plt plt.style.use(‘ggplot’) #将风格设置为类似R中的ggplot库

fig, ax = plt.subplots(figsize=(6,4)) ax.hist(df[‘petal width’], color=’black’); ax.set_ylabel(‘Count’, fontsize=12) ax.set_xlabel(‘Width’, fontsize=12) plt.title(‘Iris Petal Width’, fontsize=14, y=1.01) 与ax.set_title(‘Petal Scatterplot’)的区别?

fig, ax = plt.subplots(2,2, figsize=(6,4)) #返回的ax是一个数组 ax[0][0].hist(df[‘petal width’], color=’black’); ax[0][0].set_ylabel(‘Count’, fontsize=12) ax[0][0].set_xlabel(‘Width’, fontsize=12) ax[0][0].set_title(‘Iris Petal Width’, fontsize=14, y=1.01)

fig.tight_layout() 在matplotlib中,用subplots画子图时,有时候需要调整子图间矩,包括子图与边框的间矩,子图间上下间矩,子图间左右间矩,可以使用fig.tight_layout()函数

fig.tight_layout与plt.tight_layout

fig, ax = plt.subplots(figsize=(6,6)) bar_width = .8 labels = [x for x in df.columns if ‘length’ in x or ‘width’ in x] ver_y = [df[df[‘class’]==’Iris-versicolor’][x].mean() for x in labels] vir_y = [df[df[‘class’]==’Iris-virginica’][x].mean() for x in labels] set_y = [df[df[‘class’]==’Iris-setosa’][x].mean() for x in labels] x = np.arange(len(labels)) ax.bar(x, vir_y, bar_width, bottom=set_y, color=’darkgrey’) ax.bar(x, set_y, bar_width, bottom=ver_y, color=’white’) ax.bar(x, ver_y, bar_width, color=’black’) ax.set_xticks(x + (bar_width/2)) ax.set_xticklabels(labels, rotation=-70, fontsize=12); ax.set_title(‘Mean Feature Measurement By Class’, y=1.01) ax.legend([‘Virginica’,’Setosa’,’Versicolor’])

matplotlib.pyplot和pylab的区别 https://stackoverflow.com/questions/16849483/which-is-the-recommended-way-to-plot-matplotlib-or-pylab

pylab is a convenience module that bulk imports matplotlib.pyplot (for plotting) and numpy (for mathematics and working with arrays) in a single name space. Although many examples use pylab, it is no longer recommended. pylab將許多常用的module集中到統一的namespace,目標是提供一個類matlab的工作環境,使用者無需自行import所需功能。不過import explicitly是編程的好習慣,讓命名空間乾淨些,如無必要應避免使用pylab。

import matplotlib.pyplot as plt dataMat, labelMat = loadDataSet() dataArr = np.array(dataMat) n = np.shape(dataArr)[0] xcord1, ycord1, xcord2, ycord2 = [], [], [], [] for i in range(n): if(int(labelMat[i]) == 1): xcord1.append(dataArr[i,1]) ycord1.append(dataArr[i,2]) else: xcord2.append(dataArr[i,1]) ycord2.append(dataArr[i,2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcord1, ycord1, s=30, c=’red’, marker=’s’) ax.scatter(xcord2, ycord2, s=30, c=’green’) x = np.arange(-3.0, 3.0, 0.1) y = (-weights[0] - weights[1] * x) / weights[2] ax.plot(x, y) plt.xlabel(‘X1’) plt.ylabel(‘X2’) plt.show()

Matplotlib用法

fig = plt.figure(1, facecolor=’white’,figsize=(7,7), dpi=105) fig.clf() axprops = dict(xticks=[], yticks=[]) createPlot.ax1 = plt.subplot(111, frameon=False, **axprops) #全局变量totalW存储树的宽度,totalD存储树的深度 plotTree.totalW = float(getNumLeafs(inTree)) plotTree.totalD = float(getTreeDepth(inTree)) #树的宽度用于计算放置判断节点的位置,主要的计算原则是将它放在所有叶子节点中间,而不仅仅是它子节点的中间。 #下面的两个全局变量用来追踪已经绘制的节点位置,以及放置下一个节点的恰当位置。 plotTree.xOff = -0.5/plotTree.totalW plotTree.yOff = 1.0 plotTree(inTree, (0.5,1.0), ‘’) plt.show()

RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (matplotlib.pyplot.figure) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam figure.max_num_figures). max_open_warning, RuntimeWarning)

matplotlib.pyplot

plt.clf() plt.cla() fig, ax = plt.subplots() #plt.xticks(xrange, xticks) #这个貌似不管用 label_text = gene_name + ‘, score:’ + str(gene_score) ax.scatter(x, y, color=c, s=2.0, label=label_text) ax.boxplot(box_data,sym=’’) ax.set_xticks(xrange) ax.set_xticklabels(xticks) ax.set_xlim([0, len(merged_stage) - 0.5]) ax.set_ylim([0, 1.0]) ax.set_title(gene_name + “ methylation for different cancer stage”) plt.legend() plt.savefig(out_fig_path) plt.close(‘all’) #注意关闭,否则容易出warning,plt.close(fig)或者plt.close(‘all’)

其他可视化库:Seaborn

import seaborn as sns sns.pairplot(df, hue=”class”) hue参数是啥意思?

fig, ax = plt.subplots(2, 2, figsize=(7, 7)) sns.set(style=’white’, palette=’muted’) sns.violinplot(x=df[‘class’], y=df[‘sepal length’], ax=ax[0,0]) #为四个特征分别生成了小提琴图,小提琴图显示了特征的分布 sns.violinplot(x=df[‘class’], y=df[‘sepal width’], ax=ax[0,1]) sns.violinplot(x=df[‘class’], y=df[‘petal length’], ax=ax[1,0]) sns.violinplot(x=df[‘class’], y=df[‘petal width’], ax=ax[1,1]) fig.suptitle(‘Violin Plots’, fontsize=16, y=1.03) for i in ax.flat: plt.setp(i.get_xticklabels(), rotation=-90) fig.tight_layout()

ax.scatter(df[‘sepal width’][:50], df[‘sepal length’][:50])可以直接绘制pandas的dataframe

fig,ax = plt.subplots(figsize=(10,6)) plt.scatter(np.arange(len(fares[‘price’])),fares[‘price’]);

其他可视化库:folium (热图)

daily_rtn.hist(bins=50, color=’lightblue’, figsize=(12,8))

dataframe画图: fig, ax = plt.subplots(figsize=(15,10)) rf[rf[‘predicted’]==1][’$ chg’].plot(kind=’bar’) ax.set_title(‘Model Predicted Buys’, y=1.01) ax.set_ylabel(‘$ Change Open to Close’) ax.set_xlabel(‘Index’)

matplotlib画箱线图

f_importances = clf_rf.feature_importances_ f_names = X_train f_std = np.std([tree.feature_importances_ for tree in clf_rf.estimators_], axis=0)

zz = zip(f_importances, f_names, f_std) zzs = sorted(zz, key=lambda x: x[0], reverse=True)

imps = [x[0] for x in zzs[:20]] labels = [x[1] for x in zzs[:20]] errs = [x[2] for x in zzs[:20]]

plt.subplots(figsize=(15,10)) plt.bar(range(20), imps, color=”r”, yerr=errs, align=”center”) plt.xticks(range(20), labels, rotation=-70);