/** * General ANN. Two methods are abstract: forward and backPropagation. * * @author Shi-Huai Wen Email: shihuaiwen@outlook.com. */ publicabstractclassGeneralAnn { /** * The whole dataset. */ Instances dataset;
/** * Number of layers. It is counted according to nodes instead of edges. */ int numLayers;
/** * The number of nodes for each layer, e.g., [3, 4, 6, 2] means that there * are 3 input nodes (conditional attributes), 2 hidden layers with 4 and 6 * nodes, respectively, and 2 class values (binary classification). */ int[] layerNumNodes;
/** * ******************* * Forward prediction. * * @param paraInput The input data of one instance. * @return The data at the output end. * ******************* */ publicabstractdouble[] forward(double[] paraInput);
/** * ******************* * Back propagation. * * @param paraTarget For 3-class data, it is [0, 0, 1], [0, 1, 0] or [1, 0, 0]. * ******************* */ publicabstractvoidbackPropagation(double[] paraTarget);
/** * ******************* * Train using the dataset. * ******************* */ publicvoidtrain() { double[] tempInput = newdouble[dataset.numAttributes() - 1]; double[] tempTarget = newdouble[dataset.numClasses()]; for (inti=0; i < dataset.numInstances(); i++) { // Fill the data. for (intj=0; j < tempInput.length; j++) { tempInput[j] = dataset.instance(i).value(j); } // Of for j
// Fill the class label. Arrays.fill(tempTarget, 0); tempTarget[(int) dataset.instance(i).classValue()] = 1;
// Train with this instance. forward(tempInput); backPropagation(tempTarget); } // Of for i }// Of train
/** * ******************* * Get the index corresponding to the max value of the array. * * @return the index. * ******************* */ publicstaticintargmax(double[] paraArray) { intresultIndex= -1; doubletempMax= -1e10; for (inti=0; i < paraArray.length; i++) { if (tempMax < paraArray[i]) { tempMax = paraArray[i]; resultIndex = i; } // Of if } // Of for i
return resultIndex; }// Of argmax
/** * ******************* * Test using the dataset. * * @return The precision. * ******************* */ publicdoubletest() { double[] tempInput = newdouble[dataset.numAttributes() - 1];
for (inti=0; i < dataset.numInstances(); i++) { // Fill the data. for (intj=0; j < tempInput.length; j++) { tempInput[j] = dataset.instance(i).value(j); } // Of for j
// Train with this instance. tempPrediction = forward(tempInput);
tempPredictedClass = argmax(tempPrediction); if (tempPredictedClass == (int) dataset.instance(i).classValue()) { tempNumCorrect++; } // Of if } // Of for i
System.out.println("Correct: " + tempNumCorrect + " out of " + dataset.numInstances());
return tempNumCorrect / dataset.numInstances(); }// Of test } //Of class GeneralAnn
publicclassSimpleAnnextendsGeneralAnn { /** * The value of each node that changes during the forward process. The first * dimension stands for the layer, and the second stands for the node. */ publicdouble[][] layerNodeValues;
/** * The error on each node that changes during the back-propagation process. * The first dimension stands for the layer, and the second stands for the * node. */ publicdouble[][] layerNodeErrors;
/** * The weights of edges. The first dimension stands for the layer, the * second stands for the node index of the layer, and the third dimension * stands for the node index of the next layer. */ publicdouble[][][] edgeWeights;
/** * The change of edge weights. It has the same size as edgeWeights. */ publicdouble[][][] edgeWeightsDelta;
/** * ******************* * The first constructor. * * @param paraFilename The arff filename. * @param paraLayerNumNodes The number of nodes for each layer (maybe different). * @param paraLearningRate Learning rate. * @param paraMobp Momentum coefficient. * ******************* */ publicSimpleAnn(String paraFilename, int[] paraLayerNumNodes, double paraLearningRate, double paraMobp) { // Father constructor super(paraFilename, paraLayerNumNodes, paraLearningRate, paraMobp);
// Step 2. Inner layer initialization. for (intl=0; l < numLayers; l++) { layerNodeValues[l] = newdouble[layerNumNodes[l]]; layerNodeErrors[l] = newdouble[layerNumNodes[l]];
// One less layer because each edge crosses two layers. if (l + 1 == numLayers) { break; } // of if
// In layerNumNodes[l] + 1, the last one is reserved for the offset. edgeWeights[l] = newdouble[layerNumNodes[l] + 1][layerNumNodes[l + 1]]; edgeWeightsDelta[l] = newdouble[layerNumNodes[l] + 1][layerNumNodes[l + 1]]; for (intj=0; j < layerNumNodes[l] + 1; j++) { for (inti=0; i < layerNumNodes[l + 1]; i++) { // Initialize weights. edgeWeights[l][j][i] = random.nextDouble(); } // Of for i } // Of for j } // Of for l }// Of the constructor
/** * ******************* * Forward prediction. * * @param paraInput The input data of one instance. * @return The data at the output end. * ******************* */ publicdouble[] forward(double[] paraInput) { // Initialize the input layer. System.arraycopy(paraInput, 0, layerNodeValues[0], 0, layerNodeValues[0].length);
// Calculate the node values of each layer. double z; for (intl=1; l < numLayers; l++) { for (intj=0; j < layerNodeValues[l].length; j++) { // Initialize according to the offset, which is always +1 z = edgeWeights[l - 1][layerNodeValues[l - 1].length][j]; // Weighted sum on all edges for this node. for (inti=0; i < layerNodeValues[l - 1].length; i++) { z += edgeWeights[l - 1][i][j] * layerNodeValues[l - 1][i]; } // Of for i
// Sigmoid activation. // This line should be changed for other activation functions. layerNodeValues[l][j] = 1 / (1 + Math.exp(-z)); } // Of for j } // Of for l
return layerNodeValues[numLayers - 1]; }// Of forward
/** * ******************* * Back propagation and change the edge weights. * * @param paraTarget For 3-class data, it is [0, 0, 1], [0, 1, 0] or [1, 0, 0]. * ******************* */ publicvoidbackPropagation(double[] paraTarget) { // Step 1. Initialize the output layer error. intl= numLayers - 1; for (intj=0; j < layerNodeErrors[l].length; j++) { layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j]) * (paraTarget[j] - layerNodeValues[l][j]); } // Of for j
// Step 2. Back-propagation even for l == 0 while (l > 0) { l--; // Layer l, for each node. for (intj=0; j < layerNumNodes[l]; j++) { doublez=0.0; // For each node of the next layer. for (inti=0; i < layerNumNodes[l + 1]; i++) { if (l > 0) { z += layerNodeErrors[l + 1][i] * edgeWeights[l][j][i]; } // Of if
// Weight adjusting. edgeWeightsDelta[l][j][i] = mobp * edgeWeightsDelta[l][j][i] + learningRate * layerNodeErrors[l + 1][i] * layerNodeValues[l][j]; edgeWeights[l][j][i] += edgeWeightsDelta[l][j][i]; if (j == layerNumNodes[l] - 1) { // Weight adjusting for the offset part. edgeWeightsDelta[l][j + 1][i] = mobp * edgeWeightsDelta[l][j + 1][i] + learningRate * layerNodeErrors[l + 1][i]; edgeWeights[l][j + 1][i] += edgeWeightsDelta[l][j + 1][i]; } // Of if } // Of for i
// Record the error according to the differential of Sigmoid. // This line should be changed for other activation functions. layerNodeErrors[l][j] = layerNodeValues[l][j] * (1 - layerNodeValues[l][j]) * z; } // Of for j } // Of while }// Of backPropagation