How to do backpropagation in Numpy

I have adapted an example neural net written in Python to illustrate how the back-propagation algorithm works on a small toy example.

My modifications include printing, a learning rate and using the leaky ReLU activation function instead of sigmoid.

import numpy as np
 
# seed random numbers to make calculation
# deterministic (just a good practice)
np.random.seed(1)
# make printed output easier to read
# fewer decimals and no scientific notation
np.set_printoptions(precision=3, suppress=True)
 
# learning rate
lr = 1e-2
 
# sigmoid function
def sigmoid(x,deriv=False):
    if deriv:
        result = x*(1-x)
    else:
        result = 1/(1+np.exp(-x))
    return result
 
# leaky ReLU function
def prelu(x, deriv=False):
    c = np.zeros_like(x)
    slope = 1e-1
    if deriv:
        c[x<=0] = slope
        c[x>0] = 1
    else:
        c[x>0] = x[x>0]
        c[x<=0] = slope*x[x<=0]
    return c
 
# non-linearity (activation function)
nonlin = prelu # instead of sigmoid
 
# initialize weights randomly with mean 0
W = 2*np.random.random((3,1)) - 1
 
# input dataset
X = np.array([  [0,0,1],
                [0,1,1],
                [1,0,1],
                [1,1,1] ])
# output dataset            
y = np.array([[0,0,1,1]]).T
 
print('X:\n', X)
print('Y:\n', y)
print()
 
for iter in range(1000):
 
    # forward propagation
    l0 = X
    l1 = nonlin(np.dot(l0,W))
 
    # how much did we miss?
    l1_error = y - l1
 
    # compute gradient (slope of activation function at the values in l1)
    l1_gradient = nonlin(l1, True)    
    # set delta to product of error, gradient and learning rate
    l1_delta = l1_error * l1_gradient * lr
 
    # update weights
    W += np.dot(l0.T,l1_delta)
 
    if iter % 100 == 0:
        print('pred:', l1.squeeze(), 'mse:', (l1_error**2).mean())
 
print ("Output After Training:")
print ('l1:', np.around(l1))

Leave a Comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.