Preliminary

In [1]:
### Importing Libraries ###

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle

np.set_printoptions(precision=2,suppress=True)
np.warnings.filterwarnings('ignore')
In [2]:
### Some Functions ###

# Function that return a two bit(y1,y2) number given inputs(a1,a2,b1,b2) by taking their bit wise xor.
def get_y(a1,a2,b1,b2):
    y1 = a1^b1
    y2 = a2^b2

    return y1,y2
    
# Returns derivative of Relu for use in backpropgation
def relu_derivative(Zn):
    return np.greater(Zn,np.zeros(Zn.shape)) * np.ones(Zn.shape)

Creating Random Training Set

The Random dataset contains train_size data points (default 1000) that are uniformly sampled. Each point corresponds to one of 32 possible input combinations.

In [3]:
train_size = 1000
In [4]:
### Creating X_train ###

X_train = np.random.randint(0,2,(train_size,4))
X_train = pd.DataFrame(X_train,columns=("A1","A2","B1","B2"))
X_train = X_train.T
X_train.head()
Out[4]:
0 1 2 3 4 5 6 7 8 9 ... 990 991 992 993 994 995 996 997 998 999
A1 0 0 1 0 0 1 1 1 0 0 ... 0 0 1 1 0 0 0 1 1 0
A2 1 0 1 0 1 0 1 1 1 1 ... 0 0 1 0 1 1 0 0 0 1
B1 1 1 1 0 0 1 0 1 1 1 ... 1 1 1 1 1 1 1 1 1 0
B2 1 1 0 0 0 1 0 0 1 1 ... 1 1 1 1 1 0 0 0 1 1

4 rows × 1000 columns

In [5]:
### Creating Y_train ###

Y_train = np.full((train_size,2),-1)

for i in range(train_size):
    a = X_train.iloc[:,i]
    Y_train[i,0],Y_train[i,1] = get_y(a[0],a[1],a[2],a[3])
    
Y_train = pd.DataFrame(Y_train,columns=["Y1","Y2"])
Y_train = Y_train.T
Y_train.head()
Out[5]:
0 1 2 3 4 5 6 7 8 9 ... 990 991 992 993 994 995 996 997 998 999
Y1 1 1 0 0 0 0 1 0 1 1 ... 1 1 0 0 1 1 1 0 0 0
Y2 0 1 1 0 1 1 1 1 0 0 ... 1 1 0 1 0 1 0 0 1 0

2 rows × 1000 columns

Creating Cross-Validation Set

In [6]:
cv_size = 100
In [7]:
### Creating X_cv ###

X_cv = np.random.randint(0,2,(cv_size,4))
X_cv = pd.DataFrame(X_cv,columns=("A1","A2","B1","B2"))
X_cv = X_cv.T
X_cv.head()
Out[7]:
0 1 2 3 4 5 6 7 8 9 ... 90 91 92 93 94 95 96 97 98 99
A1 0 0 0 0 0 0 0 1 1 0 ... 0 0 1 1 1 1 1 1 1 0
A2 0 0 1 1 1 1 0 1 1 1 ... 0 1 1 1 1 0 1 1 1 0
B1 1 0 0 0 0 1 0 1 0 0 ... 1 0 0 1 1 0 0 0 0 0
B2 1 0 1 1 0 1 0 1 1 0 ... 0 0 0 0 1 0 1 0 1 0

4 rows × 100 columns

In [8]:
### Creating Y_cv ###

Y_cv = np.full((cv_size,2),-1)

for i in range(cv_size):
    a = X_cv.iloc[:,i]
    Y_cv[i,0],Y_cv[i,1] = get_y(a[0],a[1],a[2],a[3])
    
Y_cv = pd.DataFrame(Y_cv,columns=["Y1","Y2"])
Y_cv= Y_cv.T
Y_cv.head()
Out[8]:
0 1 2 3 4 5 6 7 8 9 ... 90 91 92 93 94 95 96 97 98 99
Y1 1 0 0 0 0 1 0 0 1 0 ... 1 0 1 0 0 1 1 1 1 0
Y2 1 0 0 0 1 0 0 0 0 1 ... 0 1 1 1 0 0 0 1 0 0

2 rows × 100 columns

In [9]:
### Converting to numpy-ndarray ###

X_train = X_train.values
Y_train = Y_train.values

X_cv = X_cv.values
Y_cv = Y_cv.values

Neural Network Architecture :

  • 2 Layer model with a hidden layer containing 6 nodes.
  • Input Layer(5 nodes) ---> Hidden Layer 1(6 nodes) ---> Output Layer(2 nodes)
  • Activation for Layer 1 : Relu.
  • Activation for Layer 2 : Sigmoid.
In [10]:
### Defining the Architecture ###

n,m = X_train.shape
hidden_layer_nodes = 6

assert(X_train.shape[1] == Y_train.shape[1])
assert(X_cv.shape[1] == Y_cv.shape[1])

n0 = X_train.shape[0]      # Nodes in Input Layer
n1 = hidden_layer_nodes    # Nodes in Hidden Layer
n2 = Y_train.shape[0]      # Nodes in Output Layer
In [11]:
### Initializing the Parameters ###

W1 = np.random.randn(n1,n0) * np.sqrt(2/n0)
b1 = np.random.randn(n1,1) * np.sqrt(2/n0)

W2 = np.random.randn(n2,n1) * np.sqrt(2/n1)
b2 = np.random.randn(n2,1) * np.sqrt(2/n1)

print("Shape of W1 : " + str(W1.shape))
print("Shape of b1 : " + str(b1.shape))
print("Shape of W2 : " + str(W2.shape))
print("Shape of b2 : " + str(b2.shape))
Shape of W1 : (6, 4)
Shape of b1 : (6, 1)
Shape of W2 : (2, 6)
Shape of b2 : (2, 1)
In [12]:
### Setting up Monitering Tools ###

train_cost_vals = []
cv_cost_vals = []
train_acc_vals = []
cv_acc_vals = []
iter_vals = []

iteration = 0
In [13]:
### Initializing the Hyper-parameters ###

learning_rate = 0.03             # Learning Rate = 0.1
num_iterations = 100000          # No of iterations = 1,000,000

Training the Model

In [14]:
### Implementing the Neural Net ###

print("Beginning Training...")
print("----------------------------------------------------------------------------------------")

for i in range(num_iterations):
    iteration += 1
    
    train_crrct = 0
    train_total = 0
    cv_crrct = 0
    cv_total = 0 
    
    ### Forward Pass
    
    # Hidden Layer 1 
    Z1 = np.matmul(W1,X_train) + b1
    A1 = np.maximum(Z1,0)
    
    # Output Layer
    Z2 = np.matmul(W2,A1) + b2
    A2 = 1/(1 + np.exp(-1 * Z2))
    
#     pred = (A2 + 0.5) // 1
#     print(pred)
#     print(Y_train)
        
    ### Cost Calulation
    cost = (-1/m) * np.sum(np.multiply(Y_train, np.log(A2)) + np.multiply(1-Y_train, np.log(1-A2)))
    cost = np.squeeze(cost)
    train_cost = cost
    
    ### Backward Pass
    
    # Output Layer
    dZ2 = A2 - Y_train
    dW2 = (1/m) * np.matmul(dZ2,A1.T)
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)
    
    # Hidden Layer 1
    dZ1 = np.matmul(W2.T,dZ2) * relu_derivative(Z1)
    dW1 = (1/m) * np.matmul(dZ1,X_train.T)
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)
    
    ### Updating parameters
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    ### Getting Train Accuracy
    train_crrct += (abs(Y_train - A2) <.01).sum()
    train_total += Y_train.size
    train_acc = (100.0 * train_crrct)/train_total
    
    ### Getting Cross Validation Cost and Accuracy
    Z1 = np.matmul(W1,X_cv) + b1
    A1 = np.maximum(Z1,0)
    Z2 = np.matmul(W2,A1) + b2
    A2 = 1/(1 + np.exp(-1 * Z2))
    
    cv_cost = (-1/cv_size) * np.sum(np.multiply(Y_cv, np.log(A2)) + np.multiply(1-Y_cv, np.log(1-A2)))
    cv_cost = np.squeeze(cv_cost)

    cv_crrct += (abs(Y_cv - A2) <.01).sum()
    cv_total += Y_cv.size
    cv_acc = (100.0 * cv_crrct)/cv_total
    
    if(iteration%10==0):
        iter_vals.append(iteration)
        train_cost_vals.append(train_cost)
        cv_cost_vals.append(cv_cost)  
        train_acc_vals.append(train_acc)
        cv_acc_vals.append(cv_acc)
        
        if(iteration%5000==0):
            print("Iteration : " + str(iteration))
            print()
            print("- Train cost       : {:.5f}".format(train_cost))
            print("- Cross Val. cost  : {:.5f}".format(cv_cost))
            print("- Train acc        : {:.5f}".format(train_acc))
            print("- Cross Val. acc   : {:.5f}".format(cv_acc))
            print("----------------------------------------------------------------------------------------")

print("Done")
Beginning Training...
----------------------------------------------------------------------------------------
Iteration : 5000

- Train cost       : 0.43879
- Cross Val. cost  : 0.46600
- Train acc        : 30.85000
- Cross Val. acc   : 31.50000
----------------------------------------------------------------------------------------
Iteration : 10000

- Train cost       : 0.12091
- Cross Val. cost  : 0.16100
- Train acc        : 45.25000
- Cross Val. acc   : 41.50000
----------------------------------------------------------------------------------------
Iteration : 15000

- Train cost       : 0.09806
- Cross Val. cost  : 0.13824
- Train acc        : 59.60000
- Cross Val. acc   : 55.50000
----------------------------------------------------------------------------------------
Iteration : 20000

- Train cost       : 0.07528
- Cross Val. cost  : 0.10975
- Train acc        : 72.25000
- Cross Val. acc   : 62.50000
----------------------------------------------------------------------------------------
Iteration : 25000

- Train cost       : 0.04404
- Cross Val. cost  : 0.06488
- Train acc        : 81.60000
- Cross Val. acc   : 75.50000
----------------------------------------------------------------------------------------
Iteration : 30000

- Train cost       : 0.02583
- Cross Val. cost  : 0.03789
- Train acc        : 84.70000
- Cross Val. acc   : 79.00000
----------------------------------------------------------------------------------------
Iteration : 35000

- Train cost       : 0.01697
- Cross Val. cost  : 0.02470
- Train acc        : 90.70000
- Cross Val. acc   : 85.50000
----------------------------------------------------------------------------------------
Iteration : 40000

- Train cost       : 0.01220
- Cross Val. cost  : 0.01768
- Train acc        : 90.70000
- Cross Val. acc   : 85.50000
----------------------------------------------------------------------------------------
Iteration : 45000

- Train cost       : 0.00934
- Cross Val. cost  : 0.01350
- Train acc        : 90.70000
- Cross Val. acc   : 85.50000
----------------------------------------------------------------------------------------
Iteration : 50000

- Train cost       : 0.00748
- Cross Val. cost  : 0.01079
- Train acc        : 93.75000
- Cross Val. acc   : 91.00000
----------------------------------------------------------------------------------------
Iteration : 55000

- Train cost       : 0.00619
- Cross Val. cost  : 0.00892
- Train acc        : 93.75000
- Cross Val. acc   : 91.00000
----------------------------------------------------------------------------------------
Iteration : 60000

- Train cost       : 0.00478
- Cross Val. cost  : 0.00699
- Train acc        : 96.95000
- Cross Val. acc   : 94.50000
----------------------------------------------------------------------------------------
Iteration : 65000

- Train cost       : 0.00333
- Cross Val. cost  : 0.00474
- Train acc        : 96.95000
- Cross Val. acc   : 94.50000
----------------------------------------------------------------------------------------
Iteration : 70000

- Train cost       : 0.00267
- Cross Val. cost  : 0.00371
- Train acc        : 96.95000
- Cross Val. acc   : 94.50000
----------------------------------------------------------------------------------------
Iteration : 75000

- Train cost       : 0.00224
- Cross Val. cost  : 0.00308
- Train acc        : 96.95000
- Cross Val. acc   : 94.50000
----------------------------------------------------------------------------------------
Iteration : 80000

- Train cost       : 0.00194
- Cross Val. cost  : 0.00264
- Train acc        : 96.95000
- Cross Val. acc   : 94.50000
----------------------------------------------------------------------------------------
Iteration : 85000

- Train cost       : 0.00171
- Cross Val. cost  : 0.00231
- Train acc        : 96.95000
- Cross Val. acc   : 94.50000
----------------------------------------------------------------------------------------
Iteration : 90000

- Train cost       : 0.00152
- Cross Val. cost  : 0.00205
- Train acc        : 100.00000
- Cross Val. acc   : 100.00000
----------------------------------------------------------------------------------------
Iteration : 95000

- Train cost       : 0.00137
- Cross Val. cost  : 0.00184
- Train acc        : 100.00000
- Cross Val. acc   : 100.00000
----------------------------------------------------------------------------------------
Iteration : 100000

- Train cost       : 0.00125
- Cross Val. cost  : 0.00167
- Train acc        : 100.00000
- Cross Val. acc   : 100.00000
----------------------------------------------------------------------------------------
Done

Results

In [15]:
### Summary of Results ###

print("SUMMARY : ")
print("Architecture        : 5 - 6 - 2")
print("Activations         : Relu, Sigmoid")
print("Iterations          : " + str(iteration))
print("Learning Rate       : " + str(learning_rate))
print("Train Dataset       : Random")
print("Train Size          : " + str(train_size))
print("Cross Val. Size     : " + str(cv_size))
print("Train Accuracy      : {:.5f}".format(train_acc_vals[-1]))
print("Cross Val. Accuracy : {:.5f}".format(cv_acc_vals[-1]))
print("Train Cost          : {:.5f}".format(train_cost_vals[-1]))
print("Cross Val. Cost     : {:.5f}".format(cv_cost_vals[-1]))
#test_model()

plt.figure(figsize=[32,12], dpi=100)

plt.subplot(1, 2, 1)
plt.plot(iter_vals, train_acc_vals, '-', lw=3, c='red', label='Train Acc')
plt.plot(iter_vals, cv_acc_vals, '-', lw=3, c='green', label='Test Acc')
plt.legend(fontsize='20')
plt.title('Accuracy vs Iterations', size='30')
plt.xlabel('Number of Iterations', size='25')
plt.ylabel('Accuracy', size='25')
plt.grid(True, linestyle='-.',)
plt.tick_params(labelcolor='k', labelsize='20', width=3)

plt.subplot(1, 2, 2)
plt.plot(iter_vals, train_cost_vals, '-', lw=3, c='red', label='Train Cost')
plt.plot(iter_vals, cv_cost_vals, '-', lw=3, c='green', label='Test Cost')
plt.legend(fontsize='20')
plt.title('Cost vs Iterations', size='30')
plt.xlabel('Number of Iterations', size='25')
plt.ylabel('Cost', size='25')
plt.grid(True, linestyle='-.',)
plt.tick_params(labelcolor='k', labelsize='20', width=3)
SUMMARY : 
Architecture        : 5 - 6 - 2
Activations         : Relu, Sigmoid
Iterations          : 100000
Learning Rate       : 0.03
Train Dataset       : Random
Train Size          : 1000
Cross Val. Size     : 100
Train Accuracy      : 100.00000
Cross Val. Accuracy : 100.00000
Train Cost          : 0.00125
Cross Val. Cost     : 0.00167

Testing the Model

In [16]:
def test_model(test_size=1000):

    X_test = np.random.randint(0,2,(test_size,4))
    X_test = pd.DataFrame(X_test,columns=("A1","A2","B1","B2"))
    X_test = X_test.T

    Y_test = np.full((test_size,2),-1)

    for i in range(test_size):
        a = X_test.iloc[:,i]
        Y_test[i,0],Y_test[i,1] = get_y(a[0],a[1],a[2],a[3])

    Y_test = pd.DataFrame(Y_test,columns=["Y1","Y2"])
    Y_test= Y_test.T

    X_test = X_test.values
    Y_test = Y_test.values

    Z1 = np.matmul(W1,X_test) + b1
    A1 = np.maximum(Z1,0)
    Z2 = np.matmul(W2,A1) + b2
    A2 = 1/(1 + np.exp(-1 * Z2))

    test_acc = (100.0 * (abs(Y_test - A2) <.01).sum()/2) / test_size
    print("Test Accuracy on {} samples is : {:.2f}%".format(test_size,test_acc))
    
test_model()
Test Accuracy on 1000 samples is : 100.00%
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: