1 #coding=utf-8 2 #1.数据预处理 3 import numpy as np #导入模块,numpy是扩展链接库 4 import pandas as pd 5 import tensorflow 6 import keras 7 from keras.utils import np_utils 8 np.random.seed(10) #设置seed可以产生的随机数据 9 from keras.datasets import mnist #导入模块,下载读取mnist数据 10 (x_train_image,y_train_label),\ 11 (x_test_image,y_test_label)=mnist.load_data() #下载读取mnist数据 12 print('train data=',len(x_train_image)) 13 print('test data=',len(x_test_image)) 14 print('x_train_image:',x_train_image.shape) 15 print('y_train_label:',y_train_label.shape) 16 import matplotlib.pyplot as plt 17 def plot_image(image): 18 fig=plt.gcf() 19 fig.set_size_inches(2,2) 20 plt.imshow(image,cmap='binary') 21 plt.show() 22 y_train_label[0] 23 import matplotlib.pyplot as plt 24 def plot_image_labels_prediction(image,lables,prediction,idx,num=10): 25 fig=plt.gcf() 26 fig.set_size_inches(12,14) 27 if num>25:num=25 28 for i in range(0,num): 29 ax=plt.subplot(5,5,i+1) 30 ax.imshow(image[idx],cmap='binary') 31 title="lable="+str(lables[idx]) 32 if len(prediction)>0: 33 title+=",predict="+str(prediction[idx]) 34 ax.set_title(title,fontsize=10) 35 ax.set_xticks([]);ax.set_yticks([]) 36 idx+=1 37 plt.show() 38 plot_image_labels_prediction(x_train_image,y_train_label,[],0,10) 39 plot_image_labels_prediction(x_test_image,y_test_label,[],0,10) 40 x_Train=x_train_image.reshape(60000,784).astype('float32') #以reshape转化成784个float 41 x_Test=x_test_image.reshape(10000,784).astype('float32') 42 x_Train_normalize=x_Train/255 #将features标准化 43 x_Test_normalize=x_Test/255 44 y_Train_OneHot=np_utils.to_categorical(y_train_label)#将训练数据和测试数据的label进行one-hot encoding转化 45 y_Test_OneHot=np_utils.to_categorical(y_test_label) 46 #2.建立模型 47 from keras.models import Sequential #可以通过Sequential模型传递一个layer的list来构造该模型,序惯模型是多个网络层的线性堆叠 48 from keras.layers import Dense #全连接层 49 from keras.layers import Dropout #避免过度拟合 50 model=Sequential() 51 #建立输入层、隐藏层 52 model.add(Dense(units=1000, 53 input_dim=784, 54 kernel_initializer='normal', 55 activation='relu')) 56 model.add(Dropout(0.5)) 57 model.add(Dense(units=1000, 58 kernel_initializer='normal', 59 activation='relu')) 60 model.add(Dropout(0.5)) 61 #建立输出层 62 model.add(Dense(units=10, 63 kernel_initializer='normal', 64 activation='softmax')) 65 print(model.summary()) #查看模型的摘要 66 #3、进行训练 67 #对训练模型进行设置,损失函数、优化器、权值 68 model.compile(loss='categorical_crossentropy', 69 optimizer='adam',metrics=['accuracy']) 70 # 设置训练与验证数据比例,80%训练,20%测试,执行10个训练周期,每一个周期200个数据,显示训练过程2次 71 train_history=model.fit(x=x_Train_normalize, 72 y=y_Train_OneHot,validation_split=0.2, 73 epochs=10,batch_size=200,verbose=2) 74 #显示训练过程 75 import matplotlib.pyplot as plt 76 def show_train_history(train_history,train,validation): 77 plt.plot(train_history.history[train]) 78 plt.plot(train_history.history[validation]) 79 plt.title('Train History') 80 plt.ylabel(train) 81 plt.xlabel('Epoch') 82 plt.legend(['train','validation'],loc='upper left') #显示左上角标签 83 plt.show() 84 show_train_history(train_history,'acc','val_acc') #画出准确率评估结果 85 show_train_history(train_history,'loss','val_loss') #画出误差执行结果 86 #以测试数据评估模型准确率 87 scores=model.evaluate(x_Test_normalize,y_Test_OneHot) #创建变量存储评估后的准确率数据,(特征值,真实值) 88 print() 89 print('accuracy',scores[1]) 90 #进行预测 91 prediction=model.predict_classes(x_Test) 92 prediction 93 plot_image_labels_prediction(x_test_image,y_test_label,prediction,idx=340) 94 #4、建立模型提高预测准确率 95 #建立混淆矩阵 96 import pandas as pd #pandas 是基于NumPy 的一种工具,该工具是为了解决数据分析任务而创建的 97 pd.crosstab(y_test_label,prediction, 98 rownames=['label'],colnames=['predict']) 99 #建立真实值与预测值dataFrame 100 df=pd.DataFrame({'label':y_test_label,'predict':prediction}) 101 df[:2] 102 df[(df.label==5)&(df.predict==3)] 103 plot_image_labels_prediction(x_test_image,y_test_label,prediction,idx=340,num=1)
本文链接:http://task.lmcjl.com/news/12157.html