/*
 imkmeans.c implementation of interfaces for IMinerNeuralNetworkModel objects.
*/

#include <math.h>

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>

#include "IMNeuralNetwork.h"


/* alloc memory for type IMinerNeuralNetworkModel. */
long IMCSRC_STDCALL IMinerNeuralNetworkModel_create(
 IMinerObject* pModel,		    /* out: data object */
 IMinerObject* md,			    /* in:  meta data object */
 long nInputs,                  /* in:  number of input nodes */
 long nOutputs,                 /* in:  number of output nodes */
 long nHiddenLayers,            /* in:  number of hidden layers */ 
 long nNodesPerLayer,           /* in:  number of nodes per layer */ 
 long bClassification,			/* in:  is model classification? */
 const long* pnNumericColumn,   /* in:  stores whether a column is numeric of length (nInputs) */
 const double* pdRangeVals,     /* in:  min & max value of inputs and then dependent variables */
 const double* pdInputWeights,  /* in:  input weights of length [(nInputs+1)*(nNodesPerLayer)] */
 const double* pdHiddenWeights, /* in:  scaling factors of length [(nNodesPerLayer+1)*(nNodesPerLayer)]*(nHiddenLayers-1) */
 const double* pdOutputWeights, /* in:  loading factors of length [(nNodesPerLayer+1)*(nOutputs)] */
 const char **pcDepLevels		/* in:  dependent levels if categorical (nOutputs) */
)
{ 
	char *ppszMD = "metaData", *ppszC = "classification", *ppszNC = "numericColumns", 
		 *ppszR = "dependentRange", *ppszIW = "dmInputWeights", *ppszHW = "dmHiddenWeights", 
		 *ppszOW = "dmOutputWeights", *ppszDL="dmDependentLevels", **ppszMemberNames;
	long nStatus=IMINER_SUCCESS, i=0, nHiddenLayerWeights=nHiddenLayers-1;
	const double *hWeights;

	if(pModel == NULL || nInputs < 1L  || nOutputs < 1L)
		return IMINER_BAD_INPUT_ARGS;

	ppszMemberNames = (char**)malloc((nHiddenLayerWeights+IMINER_NNET_NON_HIDDEN)*sizeof(char*));

	/* a NeuralNetworkModel always have IMINER_NNET_NON_HIDDEN+nHiddenLayerWeights elements as list */
	nStatus = IMinerList_create(pModel, IMINER_NNET_NON_HIDDEN+nHiddenLayerWeights, IMINER_MODE_MODEL); 
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: meta data */
	ppszMemberNames[IMINER_NNET_META_DATA_NUM] = ppszMD;
	nStatus = IMinerList_clone(IMINER_NNET_META_DATA(pModel), md);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: classification */
	ppszMemberNames[IMINER_NNET_TYPE_NUM] = ppszC;
	nStatus = IMinerVector_create(IMINER_NNET_TYPE(pModel), 1, IMINER_MODE_LONG, &bClassification);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: which columns numeric */
	ppszMemberNames[IMINER_NNET_NUMERIC_NUM] = ppszNC;
	nStatus = IMinerVector_create(IMINER_NNET_NUMERIC(pModel), nInputs, IMINER_MODE_LONG, pnNumericColumn);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: range */
	ppszMemberNames[IMINER_NNET_RANGE_NUM] = ppszR;
	nStatus = IMinerDoubleMatrix_create(IMINER_NNET_RANGE(pModel), 2, nInputs+1, pdRangeVals, NULL, NULL);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: input weights */
	ppszMemberNames[IMINER_NNET_INPUT_NUM] = ppszIW;
	nStatus = IMinerDoubleMatrix_create(IMINER_NNET_INPUT(pModel), nInputs+1, nNodesPerLayer, pdInputWeights, NULL, NULL);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: hidden weights */
	for (i=0; i<nHiddenLayerWeights; i++) {
		ppszMemberNames[i+IMINER_NNET_HIDDEN_NUM] = ppszHW;
		hWeights = &pdHiddenWeights[i*(nNodesPerLayer+1)*(nNodesPerLayer)];
		nStatus = IMinerDoubleMatrix_create(IMINER_NNET_HIDDEN(pModel, i), nNodesPerLayer+1, nNodesPerLayer, hWeights, NULL, NULL);
		if(nStatus != IMINER_SUCCESS)
			return nStatus;
	}

	/* member: output weights */
	ppszMemberNames[IMINER_NNET_OUTPUT] = ppszOW;
	nStatus = IMinerDoubleMatrix_create(IMINER_NNET_OUTPUT_PTR(pModel), nNodesPerLayer+1, nOutputs, pdOutputWeights, NULL, NULL);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: dependent levels */
	ppszMemberNames[IMINER_NNET_DEP_LEVELS] = ppszDL;
	nStatus = IMinerVector_create(IMINER_NNET_DEP_LEVELS_PTR(pModel), (bClassification) ? nOutputs+1 : 0, IMINER_MODE_STRING, pcDepLevels);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/*member 4 (special) : member names */
	nStatus = IMinerList_setNamesFromStrings(pModel, (const char**)ppszMemberNames);

	/* set the class ID */
	pModel->m_nMode = IMINER_MODE_MODEL;

	free(ppszMemberNames);
	
	return nStatus;
}

long IMCSRC_STDCALL IMinerNeuralNetworkModel_isValid(const IMinerObject* pModel)
{
	long nStatus=IMINER_SUCCESS, i=0, nLayers;
	IMinerObject* pElement;

	if(pModel==NULL  || !IMinerList_isValid(pModel))
		return 0L;

	if (!IMinerList_isValid(IMINER_NNET_META_DATA(pModel))) return 0L;

	pElement = IMINER_NNET_TYPE(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	pElement = IMINER_NNET_NUMERIC(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;
	
	pElement = IMINER_NNET_RANGE(pModel);
	if(!IMinerDoubleMatrix_isValid(pElement) )
		return 0L;
	
	pElement = IMINER_NNET_INPUT(pModel);
	if(!IMinerDoubleMatrix_isValid(pElement) )
		return 0L;

	pElement = IMINER_NNET_DEP_LEVELS_PTR(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	nLayers = IMINER_NNET_GET_LAYERS(pModel);
	for (i=0; i<nLayers-1; i++) {
		pElement = IMINER_NNET_HIDDEN(pModel, i);
		if(!IMinerDoubleMatrix_isValid(pElement) )
			return 0L;
	}

	pElement = IMINER_NNET_OUTPUT_PTR(pModel);
	if(!IMinerDoubleMatrix_isValid(pElement))
		return 0L;
	
	/* special member : member names mut also be valid and has the same length as this object*/
	pElement = IMINER_LIST_NAMES_PTR(pModel);
	if(!IMinerVector_isString(pElement) && pElement->m_nLen == pModel->m_nLen)
		return 0L;

	return 1L;
}

/* free memory */
long IMCSRC_STDCALL IMinerNeuralNetworkModel_destroy(IMinerObject* pModel )
{
	return IMinerObject_destroy(pModel);
}


/* write to stdout */
long IMCSRC_STDCALL IMinerNeuralNetworkModel_print(const IMinerObject* pModel)
{
	long nStatus, nLayers, i=0;

	/* printf("Begin IMinerNeuralNetworkModel ...\n"); */
	if(!IMinerNeuralNetworkModel_isValid(pModel))
	{
		IMiner_error("%s(%d) : ", __FILE__, __LINE__);
		IMiner_error("Invalid pModel\n");
		return IMINER_BAD_INPUT_ARGS;
	}

	nLayers = IMINER_NNET_GET_LAYERS(pModel);

	printf("Meta Data:\n");
	if (IMinerMetaData_print(IMINER_NNET_META_DATA(pModel))!=IMINER_SUCCESS) {
		return IMINER_BAD_INPUT_ARGS;
	}

	printf("Is Classification Model:\n");
	nStatus = IMinerVector_print(IMINER_NNET_TYPE(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Input Variables & Output Variable Range (output used for Regression ONLY):\n");
	nStatus = IMinerDoubleMatrix_print(IMINER_NNET_RANGE(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Dependent Levels:\n");
	nStatus = IMinerVector_print(IMINER_NNET_DEP_LEVELS_PTR(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Input Weights:\n");
	nStatus = IMinerDoubleMatrix_print(IMINER_NNET_INPUT(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Hidden Weights:\n");
	for (i=0; i<nLayers-1; i++) {
		nStatus = IMinerDoubleMatrix_print(IMINER_NNET_HIDDEN(pModel, i));
		if(nStatus != IMINER_SUCCESS)
			return IMINER_BAD_INPUT_ARGS;
	}

	printf("Output Weights:\n");
	nStatus = IMinerDoubleMatrix_print(IMINER_NNET_OUTPUT_PTR(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	return IMINER_SUCCESS;
}

static double imcrc_min(double one, double two) {
	if (one<two) return one;
	return two;
}
static double imcrc_max(double one, double two) {
	if (one>two) return one;
	return two;
}

/* predict cluster memberships */
long IMCSRC_STDCALL IMinerNeuralNetworkModel_predict(
 IMinerObject* pOutput,      /* out: output rectangular data */
 const IMinerObject* input,  /* in: input rectangular data */
 IMinerObject* pDescr,		 /* in: input description (if NULL, description will be 
                                    created from input data) */
 const IMinerObject* pModel  /* in: the model */
)
{
	long nStatus=IMINER_SUCCESS, nInputColumns, nInputRows, nOutputColumns, nConvertedColumns, nMode, layer;
	long i,j, row, node, output, bClassification, nInputs, nOutputs, nLayers, nNodesPerLayer, *pnColumnsModes;
	double minVal, maxVal, *hiddenVals, *tempHiddenVals, dLess, dRatio, dTemp, *outputStorage, normalize, dDepMinVal, dDepMaxVal, dLastLevel;
	IMinerObject pInput, *pnvNumericCols, *pvDepLevs, *pdmRangeVals, *pdmInputWeights, *pdmOutputWeights, *pdmHiddenWeights, A;

	char *pr_name, **columnNames;

	if(pOutput==NULL || !IMinerDataSet_isValid(input) || !IMinerNeuralNetworkModel_isValid(pModel))	
		return IMINER_BAD_INPUT_ARGS;

	/* convert input if needed*/
	nStatus = IMinerMetaData_InputConvert(&pInput, IMINER_NNET_META_DATA(pModel), input, pDescr);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	nLayers           = IMINER_NNET_GET_LAYERS(pModel);

	bClassification   = IMINER_LONG_VALUE(IMINER_NNET_TYPE(pModel), 0);

	pnvNumericCols	  = IMINER_NNET_NUMERIC(pModel);
	
	pdmRangeVals      = IMINER_NNET_RANGE(pModel);

	pdmInputWeights   = IMINER_NNET_INPUT(pModel);
	pdmOutputWeights  = IMINER_NNET_OUTPUT_PTR(pModel);
	pvDepLevs		  = IMINER_NNET_DEP_LEVELS_PTR(pModel);

	nInputColumns     = IMINER_DATASET_NCOLUMNS(input);
	nConvertedColumns = IMINER_DATASET_NCOLUMNS(&pInput);
	nInputRows        = IMINER_DATASET_NROWS(&pInput);
	nInputs           = IMINER_MATRIX_NROWS(pdmInputWeights)-1L;
	nOutputs          = IMINER_MATRIX_NCOLUMNS(pdmOutputWeights);
	nNodesPerLayer    = IMINER_MATRIX_NCOLUMNS(pdmInputWeights);

	dDepMinVal	      = IMINER_Aij(pdmRangeVals, 0, nInputs);
	dDepMaxVal	      = IMINER_Aij(pdmRangeVals, 1, nInputs);


	/* create a matrix double to store the input as double */
	nStatus = IMinerDoubleMatrix_create(&A, nInputRows, nConvertedColumns, NULL, NULL, NULL);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	/* Set the input data to the matrix
	TODO: apply coding factor expansion of categorical data */
	for(i=0L; i<nConvertedColumns; ++i) {
		nStatus = IMinerDataSet_getColumnMode(&nMode, &pInput, i);
		if(nStatus != IMINER_SUCCESS) return IMINER_BAD_INPUT_ARGS;
		/*if(nMode != IMINER_MODE_DOUBLE) return IMINER_BAD_INPUT_ARGS;*/

		/*scale the input data */
		minVal = IMINER_Aij(pdmRangeVals, 0, i);
		maxVal = IMINER_Aij(pdmRangeVals, 1, i);
		if (IMINER_LONG_VALUE(pnvNumericCols, i)) {
			dRatio = 2/(maxVal-minVal);
			dLess = 1 + dRatio*minVal;
		} else {
			dRatio = 1/(maxVal-minVal);
			dLess = dRatio*minVal;
		}
		
		for(row=0L; row<nInputRows; ++row) {
			dTemp = IMinerDataSet_getNonStringValue(&pInput, i, row);
			IMINER_Aij(&A, row, i) = dTemp*dRatio - dLess;
		}
	}

	/* create the output object of the same size as input + one column per component */
	nOutputColumns = /*nInputColumns + */nOutputs;
	if (bClassification) nOutputColumns++;
	pnColumnsModes = (long*) malloc(nOutputColumns*sizeof(long));
	for (i=0L; i<nOutputColumns; i++) pnColumnsModes[i] = IMINER_MODE_DOUBLE;
	nStatus = IMinerDataSet_create(pOutput, nInputRows, nOutputColumns, pnColumnsModes);
	if(nStatus != IMINER_SUCCESS) return nStatus;
	if (bClassification) {
		columnNames = (char**)malloc((nOutputs+1)*sizeof(char*));
		for (i=0; i<(nOutputs+1); i++) {
			pr_name = IMINER_STRING_VALUE(pvDepLevs, i);
			columnNames[i] = (char*)malloc((strlen(pr_name) + 5)*sizeof(char));
			sprintf(columnNames[i], "Pr(%s)", pr_name);
		}
	} else {
		columnNames = (char**)malloc(sizeof(char*));
		columnNames[0] = (char*)malloc(15*sizeof(char));
		sprintf(columnNames[0], "PREDICT.fit");
	}
	nStatus = IMinerDataSet_setColumnNamesFromStrings(pOutput, (const char**)columnNames);
	if(nStatus != IMINER_SUCCESS) return nStatus;
	/*free all memory*/
	if (bClassification) {
		for (i=0; i<(nOutputs+1); i++) free(columnNames[i]);
	} else {
		free(columnNames[0]);
	}
	free(columnNames);

	/* copy content from the input object */
	j = (bClassification) ? nOutputs+1 : nOutputs;
	for(i=0; j<nOutputColumns; ++j, ++i)
	{
		nStatus = IMinerDataSet_getColumnMode(&nMode, input, i);
		if(nStatus != IMINER_SUCCESS) return IMINER_BAD_INPUT_ARGS;
		/* copy content of pInput into jth column of pOutput */
		nStatus = IMinerDataSet_setColumnAt(pOutput, j, IMINER_DATASET_COLUMN_PTR(input, i)); 
		if(nStatus != IMINER_SUCCESS) return nStatus;
	}
		
	/* allocate space for hidden node values */
	tempHiddenVals = (double*)malloc(nNodesPerLayer*sizeof(double));
	hiddenVals = (double*)malloc(nNodesPerLayer*sizeof(double));
	outputStorage = (double*)malloc(nOutputs*sizeof(double));

	/* for each row (object), find the component score */
	for (row=0L; row<nInputRows; row++) {
		/* do input phase */
		for (node=0L; node<nNodesPerLayer; node++) {
			dTemp = 0.0;
			for (i=0L; i<nInputs; i++) {
				double w = IMINER_Aij(pdmInputWeights, i, node);
				double v = IMINER_Aij(&A, row, i);
				dTemp += IMINER_Aij(pdmInputWeights, i, node) * IMINER_Aij(&A, row, i);
			}
			dTemp += IMINER_Aij(pdmInputWeights, nInputs, node);

			hiddenVals[node] = 1.0 / (1.0 + exp(-dTemp));

		}
		
		/* do hidden layer phase */
		for (layer=0L; layer<nLayers-1; layer++) {
			pdmHiddenWeights = IMINER_NNET_HIDDEN(pModel, layer);
			for (node=0L; node<nNodesPerLayer; node++) {
				dTemp = 0.0;
				for (i=0L; i<nNodesPerLayer; i++) {
					dTemp += IMINER_Aij(pdmHiddenWeights, i, node) * hiddenVals[i];
				}
				dTemp += IMINER_Aij(pdmHiddenWeights, nNodesPerLayer, node);

				tempHiddenVals[node] = 1.0 / (1.0 + exp(-dTemp));
			}

			for (node=0L; node<nNodesPerLayer; node++) hiddenVals[node] = tempHiddenVals[node];
		}

		/* research the outputs for normalization and do early calculations */
		normalize = 1.0;
		for (output=0L; output<nOutputs; output++) {
			outputStorage[output] = 0.0;

			if (nLayers > 0) {
				for (i=0L; i<nNodesPerLayer; i++) {
					double w = IMINER_Aij(pdmOutputWeights, i, output);
					double v = hiddenVals[i];
					dTemp = IMINER_Aij(pdmOutputWeights, i, output) * hiddenVals[i];
					outputStorage[output] += dTemp;
				}
				dTemp = IMINER_Aij(pdmOutputWeights, nNodesPerLayer, output);
				outputStorage[output] += dTemp;
				normalize += exp(imcrc_max(imcrc_min(outputStorage[output], 15.0), -15.0));
			} else {
				for (i=0L; i<nInputs; i++) {
					dTemp = IMINER_Aij(pdmInputWeights, i, output) * IMINER_Aij(&A, row, i);
					outputStorage[output] += dTemp;
				}
				dTemp = IMINER_Aij(pdmInputWeights, nInputs, output);
				outputStorage[output] += dTemp;
				normalize += exp(imcrc_max(imcrc_min(outputStorage[output], 15.0), -15.0));
			}
		}


		/* do output phase */
		dLastLevel = 1.0;
		for (output=0L; output<nOutputs; output++) {
			dTemp = outputStorage[output];
			if (bClassification) {
				dTemp = exp(imcrc_max(imcrc_min(dTemp, 15.0), -15.0)) / normalize;
				dLastLevel -= dTemp;

				if (output==nOutputs-1) {
					IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, nOutputs), row) = dLastLevel;
				}
			} else {
				dTemp = 1.0 / (1.0 + exp(-dTemp));
				dTemp = dTemp*(dDepMaxVal-dDepMinVal) + dDepMinVal;
			}
			IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, output), row) = dTemp;
		}
	}

	IMinerObject_destroy(&pInput);
	IMinerObject_destroy(&A);

	free(hiddenVals);
	free(outputStorage);
	free(pnColumnsModes);
	free(tempHiddenVals);

	return IMINER_SUCCESS;
}

