;;;; Nearest neighbor program.  Designed for any combination of numerical
;;;; and discrete attributes.  Does not use a k-d tree, just a simple list.

#|
How to use this program:

(load "...-data")
(init-nearest-nbr)
(train-nearest-nbr)			;not really necessary
(test-nearest-nbr ...)
|#

(defvar *nearest-nbr-data*)

;;; User routines.

(defun init-nearest-nbr ()
  (convert-data-to-nnbr-form)
  'done)

(defun train-nearest-nbr ()		;provided solely to make user
  'done)				; interface consistent

(defun test-nearest-nbr (input &optional (nbr-neighbors 1))
  (when (check-att-vector input)
    (let ((nearest (nearest-n (normalize-input input) nbr-neighbors)))
      (show-nearest nearest)
      (format t "~%Using inverse distance squared weighting:")
      (prettify-output
       (collect-and-tally-output nearest)))))

;;; Auxiliaries

(defun convert-data-to-nnbr-form ()
  (let* ((inputs (mapcar #'first *training-data*))
	 (outputs (mapcar #'second *training-data*))
	 (normalized-inputs (mapcar #'normalize-input inputs)))
    (setq *nearest-nbr-data*
	  (mapcar #'list normalized-inputs outputs inputs))))

(defun nearest-n (x n)
  ;;; Returns list of elements of form (dist-sq norm-in out in) for n nearest
  ;;; points to x, with these exceptions: If there is one or more exact
  ;;; matches, only these are included, else if there is a tie for the nth
  ;;; nearest, all these are included.
  (let* ((sorted-data
	   (sort
	     (mapcar #'(lambda (triple)
			 (cons (distance-sq x (first triple)) triple))
		     (copy-list *nearest-nbr-data*))
	     #'< :key #'first))
	 (nth-lowest-d-sq (first (nth (1- n) sorted-data))))
    (cond ((zerop (first (first sorted-data))) ;exact match
	   (remove-if #'plusp sorted-data :key #'first))
	  ((numberp nth-lowest-d-sq)
	   (remove-if #'(lambda (x) (> x nth-lowest-d-sq))
		      sorted-data :key #'first))
	  (t sorted-data)
	  )))

(defun normalize-input (input)
  (mapcar #'(lambda (value-list value)
	      (if (num-range-spec-p value-list)
		  (scale-to-0-1 value value-list)
		  value))
	  *input-ranges*
	  input))

(defun distance-sq (v1 v2)
  (reduce #'+
	  (mapcar #'(lambda (x1 x2)
		      (cond ((and (numberp x1) (numberp x2))
			     (expt (- x1 x2) 2))
			    ((eql x1 x2) 0)
			    (t 1)))
		  v1 v2)))

(defun collect-and-tally-output (nearest-data)
  (let* ((sq-dists (mapcar #'first nearest-data))
	 (weights (convert-d-sq-to-wts sq-dists))
	 (outputs (mapcar #'third nearest-data))
	 (attribute-vals (transpose-list-of-lists outputs))
	 (val-score-pairs-list (mapcar
				 #'(lambda (vals)
				     (mapcar #'list vals weights))
				 attribute-vals)))
    (mapcar #'tally-and-sort-scores val-score-pairs-list)
    ))

(defun convert-d-sq-to-wts (sq-dists)
  (if (zerop (first sq-dists)) ;assumes any number = 0 => all are 0
      (let ((normalization (length sq-dists)))
	(make-list normalization :initial-element (/ (float normalization))))
      (let* ((unnormalized-weights (mapcar #'/ sq-dists))
	     (normalization (float (reduce #'+ unnormalized-weights))))
	(mapcar #'(lambda (x) (/ x normalization))
		unnormalized-weights))
      ))

(defun transpose-list-of-lists (l)
  (unless (null (first l))
	  (cons (mapcar #'first l)
		(transpose-list-of-lists
		  (mapcar #'rest l)))))

(defun show-nearest (nearest)
  ;;; Input is list of elements of form (dist-sq in out).
  (let* ((dist-sq-list (mapcar #'first nearest))
	 (input-list (mapcar #'fourth nearest))
	 (output-list (mapcar #'third nearest))
	 (io-list (mapcar #'list input-list output-list)))
    (format t "~%Neighbor~p used [and normalized distance squared]:"
	    (length nearest))
    (mapc #'(lambda (io d-sq) (format t "~%~a [~6,4f]" io d-sq))
	  io-list
	  dist-sq-list)
    (format t "~%")
    ))
