======Sample code====== #basic MLP network # nice documentation is available at https://keras.io/ # Step 1 # import classes and functions import numpy #package for scientific computing, support for large array etc. from keras.datasets import mnist #dataset from keras.models import Sequential #model from keras.layers import Dense #layer from keras.layers import Dropout #layer from keras.utils import np_utils #for transforming data import matplotlib.pyplot as plt #to plot images # Step 2: load MNIST data set (X_train, y_train), (X_test, y_test) = mnist.load_data() #Keras function # Print shape of dataset..it will print three tuples, namely the no. of images in dataset, height and width(60000, 28, 28) # uncomment if do not want to print print X_train.shape # plot images...subplot function is being used...nice documentation is available on the official webpage of matplotlib # arguments to subplot functions are number of rows, number of columns and number of subplots in the plot...comma is mandatory if values are less than 10 # you can experiment # uncomment if do not want to print plt.subplot(221) plt.imshow(X_train[0], cmap=plt.get_cmap('gray')) # ploting first image of training data set plt.subplot(222) plt.imshow(X_train[134], cmap=plt.get_cmap('gray')) # ploting 135th image in training data set plt.subplot(223) plt.imshow(X_test[2444], cmap=plt.get_cmap('gray')) # ploting 2445th image of test date set plt.subplot(224) plt.imshow(X_test[3], cmap=plt.get_cmap('gray')) # ploting 4th image of test data set # show the plot plt.show() # Step 3: Preprocess input data for Keras # flatten 28*28 images to a 784 vector for each image and pixel precision set to 32 bit num_pixels = X_train.shape[1] * X_train.shape[2] X_train = X_train.reshape(X_train.shape[0], num_pixels).astype('float32') X_test = X_test.reshape(X_test.shape[0], num_pixels).astype('float32') # normalize inputs from 0-255 to 0-1 X_train = X_train / 255 X_test = X_test / 255 # Step 4: Preprocess class labels # check shape of our class label data # uncomment if do not want to print print y_train.shape #We should have 10 different classes, one for each digit, but it looks like we only have a 1-dimensional array. #check labels for the first 10 training samples: print y_train[:10] # output of the form [5 0 4 1 9 2 1 3 1 4] #The y_train and y_test data are not split into 10 distinct class labels, but rather are represented as a single array with the class values. # Convert 1-dimensional class arrays to 10-dimensional class matrices y_train = np_utils.to_categorical(y_train) y_test = np_utils.to_categorical(y_test) num_classes = y_test.shape[1] # check again print y_train.shape # (60000, 10) print y_train[:10] # Step 5: Define model architecture # a very simple model is being created in next few lines...this is the most important part => creating a good network model = Sequential() model.add(Dense(num_pixels, input_dim=num_pixels, init='normal', activation='relu')) #only one hidden layer with relu as activation function model.add(Dense(num_classes, init='normal', activation='softmax')) #output layer with softmax as activation function # Step 6: Compile model # define optimizer, loss function, meterics => very important step which will determine the performance of your network model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Step 7: Train model model.fit(X_train, y_train, validation_data=(X_test, y_test), nb_epoch=2, batch_size=200, verbose=1) # Step 8: Evaluate model scores = model.evaluate(X_test, y_test) print("Error: %.2f%%" % (100-scores[1]*100))