/*  Copyright (c) 2015-2016 Drew Schmidt
    All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

    1. Redistributions of source code must retain the above copyright notice,
    this list of conditions and the following disclaimer.

    2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.

    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

// Functions for computing cosine similarity on sparse data inputs
// TODO: covariance and (pearson) correlation

#include <math.h>
#include <stdlib.h>

#include "coop.h"
#include "utils/copy.h"
#include "utils/inverse.h"
#include "utils/fill.h"

#define TMP_VEC_SIZE 1024


// ---------------------------------------------
//  Static utils
// ---------------------------------------------

// NaN-out a row/column of cos matrix for numerical compatibility with dense methods
static inline void set2nan(const int j, const int n, double *restrict cos)
{
  for (int i=j; i<n; i++)
    cos[i + n*j] = NAN;
    
  for (int i=0; i<j; i++)
    cos[j + n*i] = NAN;
}



static inline double sparsedot_self(const int vecstart, const int vecend, const double * const a)
{
  double dot = 0.0;
  for (int i=vecstart; i<=vecend; i++)
    dot += a[i]*a[i];
    
  return dot;
}



// get the first and last indices in the COO for column i
static inline void get_startend(const int len, const int ind, int *col, int *vecstart, int *vecend, const int *cols)
{
  *vecstart = *col;
  
  while (*col < len && cols[*col] == ind)
    (*col)++;
    
  *vecend = *col - 1;
}



static inline int get_array(int *tmplen, int *current_tmp_size,
  const int vecstart, const int vecend,
  double **restrict b, int **restrict brows,
  const double *const restrict a, const int *restrict rows)
{
  void *realloc_ptr;
  
  *tmplen = vecend - vecstart;
  
  if (*tmplen > *current_tmp_size)
  {
    *current_tmp_size = *tmplen + 1;
    
    realloc_ptr = realloc(*b, (*current_tmp_size)*sizeof(**b));
    if (realloc_ptr == NULL)
    {
      free(*b);
      free(*brows);
      return -1;
    }
    else
      *b = realloc_ptr;
      
    realloc_ptr = realloc(*brows, (*current_tmp_size)*sizeof(**brows));
    if (realloc_ptr == NULL)
    {
      free(*b);
      free(*brows);
      return -1;
    }
    else
      *brows = realloc_ptr;
  }
  
  for (int k=0; k<=*tmplen; k++)
  {
    (*b)[k] = a[k + vecstart];
    (*brows)[k] = rows[k + vecstart];
  }
  
  return 0;
}



// ---------------------------------------------
//  Cosine
// ---------------------------------------------

/**
 * @brief
 * Compute the cosine similarity matrix of a sparse, COO-stored
 * matrix.
 *
 * @details
 * The implementation assumes the data is sorted by column index,
 * i.e. the COO is "column-major".
 *
 * Note that if the number of rows times the number of columns of
 * the sparse matrix is equal to len, then your matrix is actually
 * dense, but stored in a stupid way.
 *
 * @param index
 * 0 or 1 indexing from 0 or 1, respectively.
 * @param n
 * The total number of columns of sparsely-stored input matrix x,
 * i.e., the number of columns of the matrix if it were densely
 * stored.
 * @param len
 * The length of the a/rows/cols vectors.
 * @param a
 * The data for the input matrix, in COO (row, column, value) format.
 * @param rows/cols
 * The row/column index vectors.
 * @param cos
 * The output nxn matrix.
 *
 * @return
 * The function returns -1 if needed memory cannot be allocated, and
 * 0 otherwise.
*/
int coop_cosine_sparse_coo(const bool inv, const int index, const int n, const int len,
  const double * const restrict a, const int *restrict rows, const int *restrict cols,
  double *restrict cos)
{
  int len_colj;
  int vec1start, vec2start, vec2end;
  int vec1end = 0;
  
  int current_tmp_size = TMP_VEC_SIZE;
  double *a_colj = malloc(current_tmp_size * sizeof(*a_colj));
  int *rows_colj = malloc(current_tmp_size * sizeof(*rows_colj));
  if (a_colj == NULL || rows_colj == NULL)
  {
    FREE(a_colj);
    FREE(rows_colj);
    return -1;
  }
  
  
  set2zero(n*n, cos);
  
  for (int j=0; j<n; j++)
  {
    int col = vec1end;
    get_startend(len, j+index, &col, &vec1start, &vec1end, cols);
    
    // NaN-out row and column if col is 0
    if (vec1end < vec1start)
    {
      vec1end++;
      set2nan(j, n, cos);
      continue;
    }
    
    // store j't column of data/rows for better cache access
    int info = get_array(&len_colj, &current_tmp_size, vec1start, vec1end, &a_colj, &rows_colj, a, rows);
    if (info) return info;
    
    double xx = sparsedot_self(0, len_colj, a_colj);
    xx /= sqrt(xx);
    
    // i'th column, etc.
    for (int i=j+1; i<n; i++)
    {
      get_startend(len, i+index, &col, &vec2start, &vec2end, cols);
      
      int k = 0;
      int l = vec2start;
      double xy = 0.0;
      double yy = 0.0;
      
      while (k <= len_colj && l <= vec2end)
      {
        // catch up row of colj to row of coli
        while (k <= len_colj && rows_colj[k] < rows[l])
          k++;
        
        // dot products
        while (k <= len_colj && l <= vec2end && rows_colj[k] == rows[l])
        {
          double tmp = a[l];
          xy += a_colj[k] * tmp;
          yy += tmp*tmp;
          k++;
          l++;
        }
        
        // catch up row of coli to row of colj, self dot product along the way
        if (k <= len_colj)
        {
          while (l <= vec2end && rows_colj[k] > rows[l])
          {
            double tmp = a[l];
            yy += tmp*tmp;
            l++;
          }
        }
      }
      
      for (; l<=vec2end; l++)
      {
        double tmp = a[l];
        yy += tmp*tmp;
      }
      
      
      if (fabs(xy) > EPSILON && yy > EPSILON)
        cos[i + n*j] = xy / xx / sqrt(yy);
    }
    
    vec1end++;
  }
  
  
  free(a_colj);
  free(rows_colj);
  
  diag2one(n, cos);
  if (inv)
  {
    int ret = inv_sym_chol(n, cos);
    CHECKRET(ret);
  }
  symmetrize(n, cos);
  
  return 0;
}
