/*
 imkmeans.c implementation of interfaces for IMinerNaiveBayesModel objects.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>

#include "IMNaiveBayes.h"


/* alloc memory for type IMinerNaiveBayesModel. */
long IMCSRC_STDCALL IMinerNaiveBayesModel_create(
 IMinerObject* pModel,		    /* out: data object */
 IMinerObject* md,			    /* in:  meta data object */
 long nIndepCols,				/* in:  number of indep columns */
 long nDepLevels,				/* in:  number of levels output */
 const long *nIndepLevels,		/* in:  number of independent levels of length (nIndepCols) */
 const double *pInitialProb,	/* in:  initial probability of length (nDepLevels) */
 const double *pMatrix,			/* in:  probability of each indep&dep level combination of length (nIndepCols*max(nIndepLevels)*nDepLevels) */
 const char **pcDepLevels		/* in:  dependent levels if categorical (nDepLevels) */
)
{
	char *ppszMemberNames[] = {"metaData", "numDepLevels","numIndepLevels", "initialProbs", "probMatrix","dmDependentLevels"};
	long nStatus=IMINER_SUCCESS, maxNumIndepLevel, col;

	if(pModel == NULL || nIndepCols < 1L)
		return IMINER_BAD_INPUT_ARGS;

	/* a NaiveBayesModel always have IMINER_NAIVE_BAYES_DEP_LEVS_NUM+1 elements as list */
	nStatus = IMinerList_create(pModel, IMINER_NAIVE_BAYES_DEP_LEVS_NUM+1, IMINER_MODE_MODEL);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: meta data */
	nStatus = IMinerList_clone(IMINER_NAIVE_BAYES_META_DATA(pModel), md);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: numDepLevels */
	nStatus = IMinerVector_create(IMINER_NAIVE_BAYES_DEPENDENT(pModel), 1, IMINER_MODE_LONG, &nDepLevels);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: numIndepLevels */
	nStatus = IMinerVector_create(IMINER_NAIVE_BAYES_INDEPENDENT(pModel), nIndepCols, IMINER_MODE_LONG, nIndepLevels);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: initialProbs */
	nStatus = IMinerVector_create(IMINER_NAIVE_BAYES_INIT_PROBS(pModel), nDepLevels, IMINER_MODE_DOUBLE, pInitialProb);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* get max num indep levels */
	for (col=0; col<nIndepCols; col++) {
		if (col==0) maxNumIndepLevel = nIndepLevels[col];
		else if (maxNumIndepLevel<nIndepLevels[col]) maxNumIndepLevel=nIndepLevels[col];
	}

	/* member: prob matrix */
	nStatus = IMinerDoubleMatrix_create(IMINER_NAIVE_BAYES_PROBABILITY(pModel), maxNumIndepLevel*nDepLevels, nIndepCols, pMatrix, NULL, NULL);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: dependent levels */
	nStatus = IMinerVector_create(IMINER_NAIVE_BAYES_DEP_LEVELS(pModel), nDepLevels, IMINER_MODE_STRING, pcDepLevels);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/*member 4 (special) : member names */
	nStatus = IMinerList_setNamesFromStrings(pModel, (const char**)ppszMemberNames);

	/* set the class ID */
	pModel->m_nMode = IMINER_MODE_MODEL;
	return nStatus;
}

long IMCSRC_STDCALL IMinerNaiveBayesModel_isValid(const IMinerObject* pModel)
{
	long nStatus=IMINER_SUCCESS;
	IMinerObject* pElement;

	if(pModel==NULL  || !IMinerList_isValid(pModel))
		return 0L;

	if (!IMinerList_isValid(IMINER_NAIVE_BAYES_META_DATA(pModel))) return 0L;

	pElement = IMINER_NAIVE_BAYES_DEPENDENT(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	pElement = IMINER_NAIVE_BAYES_INDEPENDENT(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	pElement = IMINER_NAIVE_BAYES_INIT_PROBS(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	pElement = IMINER_NAIVE_BAYES_PROBABILITY(pModel);
	if(!IMinerDoubleMatrix_isValid(pElement) )
		return 0L;

	pElement = IMINER_NAIVE_BAYES_DEP_LEVELS(pModel);
	if(!IMinerVector_isValid(pElement) )
		return 0L;

	/* special member : member names mut also be valid and has the same length as this object*/
	pElement = IMINER_LIST_NAMES_PTR(pModel);
	if(!IMinerVector_isString(pElement) && pElement->m_nLen == pModel->m_nLen)
		return 0L;

	return 1L;
}

/* free memory */
long IMCSRC_STDCALL IMinerNaiveBayesModel_destroy(IMinerObject* pModel )
{
	return IMinerObject_destroy(pModel);
}


/* write to stdout */
long IMCSRC_STDCALL IMinerNaiveBayesModel_print(const IMinerObject* pModel)
{
	long nStatus;

	/* printf("Begin IMinerNaiveBayesModel ...\n"); */
	if(!IMinerNaiveBayesModel_isValid(pModel))
	{
		IMiner_error("%s(%d) : ", __FILE__, __LINE__);
		IMiner_error("Invalid pModel\n");
		return IMINER_BAD_INPUT_ARGS;
	}

	printf("Meta Data:\n");
	if (IMinerMetaData_print(IMINER_NAIVE_BAYES_META_DATA(pModel))!=IMINER_SUCCESS) {
		return IMINER_BAD_INPUT_ARGS;
	}

	printf("Num Dependent Levels:\n");
	nStatus = IMinerVector_print(IMINER_NAIVE_BAYES_DEPENDENT(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Dependent Levels:\n");
	nStatus = IMinerVector_print(IMINER_NAIVE_BAYES_DEP_LEVELS(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Num Independent Levels:\n");
	nStatus = IMinerVector_print(IMINER_NAIVE_BAYES_INDEPENDENT(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Initial Probabilities:\n");
	nStatus = IMinerVector_print(IMINER_NAIVE_BAYES_INIT_PROBS(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Probability Matrix:\n");
	nStatus = IMinerDoubleMatrix_print(IMINER_NAIVE_BAYES_PROBABILITY(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	return IMINER_SUCCESS;
}

/* predict cluster memberships */
long IMCSRC_STDCALL IMinerNaiveBayesModel_predict(
 IMinerObject* pOutput,      /* out: output rectangular data */
 const IMinerObject* input,  /* in: input rectangular data */
 IMinerObject* pDescr,		 /* in: input description (if NULL, description will be
                                    created from input data) */
 const IMinerObject* pModel  /* in: the model */
)
{
	long nStatus=IMINER_SUCCESS, nInputColumns, nInputRows, nOutputColumns, nMode, nOutputs, *pnColumnsModes;
	long nIndepCols, nDepLevels, output, row, depLev, i, ilevelNum;
	double outputProb, dTemp, rowTotal;
	IMinerObject pInput, *pmProb, *pvIndepLevels, *pvInitProbs, *pvDepLevs, A;

	char *pr_name, **columnNames;

	if(pOutput==NULL || !IMinerDataSet_isValid(input) || !IMinerNaiveBayesModel_isValid(pModel))
		return IMINER_BAD_INPUT_ARGS;

	/* convert input if needed*/
	nStatus = IMinerMetaData_InputConvert(&pInput, IMINER_NAIVE_BAYES_META_DATA(pModel), input, pDescr);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	pmProb				= IMINER_NAIVE_BAYES_PROBABILITY(pModel);
	pvIndepLevels		= IMINER_NAIVE_BAYES_INDEPENDENT(pModel);
	pvInitProbs			= IMINER_NAIVE_BAYES_INIT_PROBS(pModel);
	pvDepLevs			= IMINER_NAIVE_BAYES_DEP_LEVELS(pModel);
	nIndepCols			= IMINER_OBJECT_LENGTH(pvIndepLevels);
	nDepLevels			= IMINER_LONG_VALUE(IMINER_NAIVE_BAYES_DEPENDENT(pModel), 0);

	nInputColumns		= IMINER_DATASET_NCOLUMNS(&pInput);
	nInputRows			= IMINER_DATASET_NROWS(&pInput);
	nOutputs			= nDepLevels;

	/* create a matrix double to store the input as double */
	nStatus = IMinerDoubleMatrix_create(&A, nInputRows, nInputColumns, NULL, NULL, NULL);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	/* create the output object of the same size as input + one column per component */
	nOutputColumns = /*nInputColumns + */nOutputs;
	pnColumnsModes = (long*) malloc(nOutputColumns*sizeof(long));
	for(i=0L; i<nOutputColumns; ++i) pnColumnsModes[i] = IMINER_MODE_DOUBLE;
	nStatus = IMinerDataSet_create(pOutput, nInputRows, nOutputColumns, pnColumnsModes);
	if(nStatus != IMINER_SUCCESS) return nStatus;
	columnNames = (char**)malloc(nDepLevels*sizeof(char*));
	for (i=0; i<nDepLevels; i++) {
		pr_name = IMINER_STRING_VALUE(pvDepLevs, i);
		columnNames[i] = (char*)malloc((strlen(pr_name) + 5)*sizeof(char));
		sprintf(columnNames[i], "Pr(%s)", pr_name);
	}
	nStatus = IMinerDataSet_setColumnNamesFromStrings(pOutput, (const char**)columnNames);
	if(nStatus != IMINER_SUCCESS) return nStatus;

	for (i=0; i<nDepLevels; i++) free(columnNames[i]);
	free(columnNames);

	/* Set the input data to the matrix
	TODO: apply coding factor expansion of categorical data */
	/* copy content from the input object */
	for(output=nOutputs,i=0; output<nOutputColumns; ++output, ++i)
	{
		nStatus = IMinerDataSet_getColumnMode(&nMode, &pInput, i);
		if(nStatus != IMINER_SUCCESS) return IMINER_BAD_INPUT_ARGS;
		//if(nMode != IMINER_MODE_DOUBLE) return IMINER_BAD_INPUT_ARGS;
		/* copy content of pInput into jth column of pOutput */
		nStatus = IMinerDataSet_setColumnAt(pOutput, output, IMINER_DATASET_COLUMN_PTR(&pInput, i));
		if(nStatus != IMINER_SUCCESS) return nStatus;
	}


	/* for each row (object), find the component score */
	for (row=0L; row<nInputRows; row++) {
		rowTotal=0.0;
		for (depLev=0L; depLev<nDepLevels; depLev++) {
			outputProb = IMINER_DOUBLE_VALUE(pvInitProbs, depLev);
			for (i=0L; i<nInputColumns; i++) {
				ilevelNum = (long)IMINER_FACTOR_DATA_VALUE(IMINER_DATASET_COLUMN_PTR(&pInput, i), row);
				dTemp = IMINER_Aij(pmProb, ilevelNum*nDepLevels+depLev, i);
				outputProb *= dTemp;
			}
			IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, depLev), row) = outputProb;
			rowTotal += outputProb;
		}

		for (depLev=0L; depLev<nDepLevels; depLev++) {
			dTemp = IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, depLev), row);
			dTemp /= rowTotal;

			IMINER_DOUBLE_VALUE(IMINER_DATASET_COLUMN_PTR(pOutput, depLev), row) = dTemp;
		}
	}

	free(pnColumnsModes);

	IMinerObject_destroy(&pInput);
	IMinerObject_destroy(&A);

	return IMINER_SUCCESS;
}

