/*
 imkmeans.c implementation of interfaces for IMinerKMeansClusterModel objects.
*/

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <memory.h>

#include "IMKMeansCluster.h"

/* alloc memory for type IMinerKMeansClusterModel. */
long IMCSRC_STDCALL IMinerKMeansClusterModel_create(IMinerObject* pModel,  /* out: data object */
 IMinerObject* md,			   /* in:  meta data object */
 long nClusters,               /* in:  number of rows */
 long nColumns,                /* in:  number of cols */ 
 const double* pdCenters,      /* in:  data as double of length (nClusters*nColumns) */
 const double* pdScale,        /* in:  scaling factors of length nColumns */
 const long*   pnSize          /* in:  cluster sizes of length nClusters */
)
{
	long nStatus = IMINER_SUCCESS;
	char* ppszMemberNames[] = {"metaData", "dmCenter", "dvScale", "lvSize"};

	if(pModel == NULL || nClusters < 1L  || nColumns < 1L)
		return IMINER_BAD_INPUT_ARGS;

	/* a KMeansClusterModel always have IMINER_KMEANS_SIZE_MEMBER+1 elements as list */
	nStatus = IMinerList_create(pModel, IMINER_KMEANS_SIZE_MEMBER+1, IMINER_MODE_MODEL); 
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member: meta data */
	nStatus = IMinerList_clone(IMINER_KMEANS_META_DATA(pModel), md);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member 0: dmCenter */
	nStatus = IMinerDoubleMatrix_create(IMINER_KMEANS_CENTER_PTR(pModel), nClusters, nColumns, pdCenters, NULL, NULL);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member 1: dvScale */
	nStatus = IMinerVector_create(IMINER_KMEANS_SCALE_PTR(pModel), nColumns, IMINER_MODE_DOUBLE, pdScale);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/* member 2: lvSize */
	nStatus = IMinerVector_create(IMINER_KMEANS_SIZE_PTR(pModel), nClusters, IMINER_MODE_LONG, pnSize);
	if(nStatus != IMINER_SUCCESS)
		return nStatus;

	/*member 4 (special) : member names */
	nStatus = IMinerList_setNamesFromStrings(pModel, (const char**)ppszMemberNames);

	/* set the class ID */
	pModel->m_nMode = IMINER_MODE_MODEL;
	return nStatus;
}

long IMCSRC_STDCALL IMinerKMeansClusterModel_isValid(const IMinerObject* pModel)
{
	long nStatus = IMINER_SUCCESS;
	long nRows, nColumns;
	IMinerObject* pElement;

	if(pModel == NULL || !IMinerList_isValid(pModel) || IMINER_OBJECT_LENGTH(pModel) < IMINER_KMEANS_SIZE_MEMBER+1)
		return 0L;

	if (!IMinerList_isValid(IMINER_KMEANS_META_DATA(pModel))) return 0L;

	/* member 0: data */
	pElement = IMINER_KMEANS_CENTER_PTR(pModel);
	if(!IMinerDoubleMatrix_isValid(pElement))
		return 0L;
	/* member 1: dim */
	pElement = IMINER_KMEANS_SCALE_PTR(pModel);
	if(!IMinerVector_isDouble(pElement) )
		return 0L;
	/* member 2: row name */
	pElement = IMINER_KMEANS_SIZE_PTR(pModel);
	if(!IMinerVector_isLong(pElement))
		return 0L;
	/* special member : member names mut also be valid and has the same length as this object*/
	pElement = IMINER_LIST_NAMES_PTR(pModel);
	if(!IMinerVector_isString(pElement) && pElement->m_nLen == pModel->m_nLen)
		return 0L;

	/* number of rows and columns must match with length of scale and size respectively*/
	nColumns = IMINER_MATRIX_NCOLUMNS(IMINER_KMEANS_CENTER_PTR(pModel));
	nRows    = IMINER_MATRIX_NROWS(IMINER_KMEANS_CENTER_PTR(pModel));

	if(IMINER_OBJECT_LENGTH(IMINER_KMEANS_SCALE_PTR(pModel)) != nColumns)
		return 0L;
	if(IMINER_OBJECT_LENGTH(IMINER_KMEANS_SIZE_PTR(pModel)) != nRows)
		return 0L;
	return 1L;
}

/* free memory */
long IMCSRC_STDCALL IMinerKMeansClusterModel_destroy(IMinerObject* pModel )
{
	return IMinerObject_destroy(pModel);
}


/* write to stdout */
long IMCSRC_STDCALL IMinerKMeansClusterModel_print(const IMinerObject* pModel)
{
	long nStatus;

	/* printf("Begin IMinerKMeansClusterModel ...\n"); */
	if(!IMinerKMeansClusterModel_isValid(pModel))
	{
		IMiner_error("%s(%d) : ", __FILE__, __LINE__);
		IMiner_error("Invalid pModel\n");
		return IMINER_BAD_INPUT_ARGS;
	}
	printf("Meta Data:\n");
	if (IMinerMetaData_print(IMINER_KMEANS_META_DATA(pModel))!=IMINER_SUCCESS) {
		return IMINER_BAD_INPUT_ARGS;
	}

	printf("Cluster Centers:\n");
	nStatus = IMinerDoubleMatrix_print(IMINER_KMEANS_CENTER_PTR(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Column Scaling Factor:\n");
	nStatus = IMinerVector_print(IMINER_KMEANS_SCALE_PTR(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	printf("Cluster sizes:\n");
	nStatus = IMinerVector_print(IMINER_KMEANS_SIZE_PTR(pModel));
	if(nStatus != IMINER_SUCCESS)
		return IMINER_BAD_INPUT_ARGS;

	return IMINER_SUCCESS;
}

/* predict cluster memberships */
long IMCSRC_STDCALL IMinerKMeansClusterModel_predict(IMinerObject* pOutput,     /* out: output rectangular data */
 const IMinerObject* input,  /* in: input rectangular data */													 
 IMinerObject* pDescr,		 /* in: input description (if NULL, description will be 
                                    created from input data) */
 const IMinerObject* pModel  /* in: the model */
)
{
	long nStatus = IMINER_SUCCESS;
	long i,j, k, nRows, nInputColumns, nOutputColumns, nClusters, iMyCluster, nMode;
	long* pnColumnsModes;
	double d, dDistanceToKthCenterSquares, dShortestDistanceSquares;
	IMinerObject pInput, *pdmCenter, *pdvScale, *pvCol=NULL, A;

	char *cluster_name="PREDICT.membership", **columnNames;

	if(pOutput==NULL || !IMinerDataSet_isValid(input) || !IMinerKMeansClusterModel_isValid(pModel))	
		return IMINER_BAD_INPUT_ARGS;

	/* convert input if needed*/
	nStatus = IMinerMetaData_InputConvert(&pInput, IMINER_KMEANS_META_DATA(pModel), input, pDescr);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	nInputColumns = IMINER_DATASET_NCOLUMNS(&pInput);
	nRows         = IMINER_DATASET_NROWS(&pInput);
	nClusters     = IMINER_MATRIX_NROWS(IMINER_KMEANS_CENTER_PTR(pModel));

	pdmCenter = IMINER_KMEANS_CENTER_PTR(pModel);
	pdvScale  = IMINER_KMEANS_SCALE_PTR(pModel);

	if(IMINER_MATRIX_NCOLUMNS(pdmCenter) < nInputColumns )
		return IMINER_BAD_INPUT_ARGS;

	/* create a matrix double to store the input as double */
	nStatus = IMinerDoubleMatrix_create(&A, nRows, nInputColumns, NULL, NULL, NULL);
	if(nStatus != IMINER_SUCCESS) return IMINER_FAIL;

	/* Set the input data to the matrix
	TODO: apply coding factor expansion of categorical data */

	for(j=0L; j< nInputColumns; ++j)
	{
		nStatus = IMinerDataSet_getColumnMode(&nMode, &pInput, j);
		if(nStatus != IMINER_SUCCESS) return IMINER_BAD_INPUT_ARGS;
		/*if(nMode != IMINER_MODE_DOUBLE) return IMINER_BAD_INPUT_ARGS;*/

		for(i=0L; i< nRows; ++i) {
			IMINER_Aij(&A, i, j) = IMinerDataSet_getNonStringValue(&pInput, j, i);
		}
	}

	/* create the output object of the same size as input + one column for membership ids of clusters */
	nOutputColumns = /*nInputColumns + */1L;
	pnColumnsModes = (long*) malloc(nOutputColumns*sizeof(long));

	/* first column is IMINER_MODE_INT( type long) for cluster ids */
	pnColumnsModes[0L] = IMINER_MODE_INT;
	for(j=1L; j< nOutputColumns; ++j) pnColumnsModes[j] = IMINER_MODE_DOUBLE;
	columnNames = (char**)malloc(sizeof(char*));
	columnNames[0] = cluster_name;
	nStatus = IMinerDataSet_create(pOutput, nRows, nOutputColumns, pnColumnsModes);
	if(nStatus != IMINER_SUCCESS) return nStatus;
	nStatus = IMinerDataSet_setColumnNamesFromStrings(pOutput, (const char**)columnNames);
	if(nStatus != IMINER_SUCCESS) return nStatus;
	free(pnColumnsModes);
	free(columnNames);

	/* copy content from the input object */
	for(j=1L; j< nOutputColumns; ++j)
	{
		nStatus = IMinerDataSet_getColumnMode(&nMode, &pInput, j-1L);
		if(nStatus != IMINER_SUCCESS) return IMINER_BAD_INPUT_ARGS;
		/* copy content of pInput into jth column of pOutput */
		nStatus = IMinerDataSet_setColumnAt(pOutput, j, IMINER_DATASET_COLUMN_PTR(&pInput, j-1L)); 
		if(nStatus != IMINER_SUCCESS) return nStatus;
	}
				
	/* for each row (object), find the closest cluster */
	for(i=0L; i<nRows; ++i)
	{		
		iMyCluster=0L;
		dShortestDistanceSquares=0.0;
		for(k=0L; k<nClusters; ++k)
		{
			dDistanceToKthCenterSquares = 0.0;
			for(j=0L; j<nInputColumns; ++j)
			{
				d =  IMINER_Aij(&A, i,j)/IMINER_DOUBLE_PTR(pdvScale)[j];
				d -= IMINER_Aij(pdmCenter, k,j);
				dDistanceToKthCenterSquares += d*d;
			}
			if(dDistanceToKthCenterSquares < dShortestDistanceSquares || k==0)
			{
				dShortestDistanceSquares = dDistanceToKthCenterSquares;
				iMyCluster = k;
			}
		}
		pvCol = IMINER_DATASET_VECTOR_PTR(pOutput, 0L);
		IMINER_LONG_VALUE(pvCol, i) = iMyCluster+1L;
	}

	IMinerObject_destroy(&pInput);
	IMinerObject_destroy(&A);

	return IMINER_SUCCESS;
}

