function [J grad] = nnCostFunction(nn_params, ...
input_layer_size, ...
hidden_layer_size, ...
num_labels, ...
X, y, lambda)
Theta1 = reshape(nn_params(1:hidden_layer_size * (input_layer_size + 1)), ...
hidden_layer_size, (input_layer_size + 1));
Theta2 = reshape(nn_params((1 + (hidden_layer_size * (input_layer_size + 1))):end), ...
num_labels, (hidden_layer_size + 1));
m = size(X, 1);
J = 0;
Theta1_grad = zeros(size(Theta1));
Theta2_grad = zeros(size(Theta2));
a1 = [ones(m, 1) X];
Y_tmp = zeros(m, num_labels);
for i = 1 : m
Y_tmp(i, y(i)) = 1;
end
a2 = [ones(m, 1) sigmoid(a1 * (Theta1'))];
h = sigmoid(a2 * (Theta2'));
J = (-1 / m) * sum(sum((Y_tmp .* log(h) + (1 - Y_tmp) .* log(1 - h))'));
%--------------------------------------------------------------------------
Theta1_tmp = Theta1(:, 2:end);
Theta2_tmp = Theta2(:, 2:end);
regular_term = (lambda / (2 * m)) * (sum(sum((Theta1_tmp .^ 2)')) + ...
sum(sum((Theta2_tmp .^ 2)')));
J = J + regular_term;
%--------------------------------------------------------------------------
Yk = zeros(m, num_labels);
for i = 1 : m
Yk(i, y(i)) = 1;
end
%{
for i = 1 : m
a1 = [1, X(i, :)]; % 1x401
z2 = a1 * Theta1';
a2 = sigmoid(z2);
a2 = [1, a2];
z3 = a2 * Theta2';
a3 = sigmoid(z3);
delta3 = a3 - Yk(i, :);
delta2 = delta3 * Theta2(:, 2:end) .* sigmoidGradient(z2);
Theta1_grad = Theta1_grad + delta2' * a1;
Theta2_grad = Theta2_grad + delta3' * a2;
endfor
a1 = [ones(m, 1) X];
z2 = a1 * Theta1';
a2 = [ones(m, 1) sigmoid(z2)];
z3 = a2 * Theta2';
a3 = sigmoid(z3);
delta3 = a3 - Yk;
delta2 = delta3 * Theta2(:, 2:end) .* sigmoidGradient(z2);
Theta1_grad = Theta1_grad + delta2' * a1;
Theta2_grad = Theta2_grad + delta3' * a2;
Theta1_grad = Theta1_grad ./ m;
Theta2_grad = Theta2_grad ./ m;
Theta1_tmp = Theta1;
Theta2_tmp = Theta2;
Theta1_tmp(:, 1) = 0;
Theta2_tmp(:, 1) = 0;
Theta1_grad = Theta1_grad + (lambda / m) .* Theta1_tmp;
Theta2_grad = Theta2_grad + (lambda / m) .* Theta2_tmp;
grad = [Theta1_grad(:) ; Theta2_grad(:)];
end