;; ;; FILE: test.lsp ;; AUTH: Michael John Radwin ;; ;; DATE: Wed Dec 11 18:59:17 EST 1996 ;; $Id: test.lsp,v 1.5 1997/03/27 20:49:18 mjr Exp mjr $ ;; (load (merge-pathnames (pathname "learning.lsp") *load-pathname*)) ;(defparameter *data-file* ; (pathname "~mjr/course/cs297/data/breast-cancer-wisconsin.data")) (defparameter *data-file* (pathname "~mjr/course/cs297/data/robot.data")) ;(defparameter *data-file* ; (pathname "~mjr/course/cs297/data/bupa.data")) (defparameter *data-files* '(("~mjr/course/cs297/data/bupa.data" nil) ("~mjr/course/cs297/data/robot.data" t) ("~mjr/course/cs297/data/breast-cancer-wisconsin.data" nil) ("~mjr/course/cs297/data/waveform.21.data" nil))) ;; time() returns the value of time in seconds since 00:00:00 ;; UTC, January 1, 1970. ;; (get-universal-time) returns time in seconds since 00:00:00 ;; UTC, January 1, 1900. (defun compute-error (nn-point observed-class numeric?) (cond (numeric? (abs (/ (- observed-class (nn-point-class nn-point)) (nn-point-class nn-point)))) ((eq observed-class :inconclusive) :inconclusive) (t (not (eq observed-class (nn-point-class nn-point)))))) (defun total-error (error-list error-count numeric? len) (coerce (if numeric? (apply #'mean error-list) (/ error-count len)) 'float)) ;; nondestructively tests test-data from train-data (defun test-k-nearest (train-data test-data numeric? stream) (format stream "~&;; k-nearest-neighbor test started at ~a (~a total cases)~%" (get-universal-time) (length test-data)) (let ((error-vector (vector nil nil nil nil)) (error-count-vector (vector 0 0 0 0)) (inconclusive-vector (vector 0 0 0 0)) (len (length test-data)) (i 1)) (dolist (sample test-data) ;; some debugging info (format stream "~&;; [~a/~a] " i len) (incf i) (multiple-value-bind (mean7 mean5 mean3 mean1) (k-1357-nearest-neighbors sample train-data numeric?) (let ((idx 0)) (dolist (mean (list mean7 mean5 mean3 mean1)) (let ((err (compute-error sample mean numeric?))) (cond (numeric? (push err (aref error-vector idx))) ((eq err :inconclusive) (incf (aref inconclusive-vector idx))) (err (incf (aref error-count-vector idx)))) (format stream " error[~a]: ~a" idx (if numeric? (coerce err 'float) err)) (incf idx)))))) (format stream "~&;; test finished at ~a~%" (get-universal-time)) (format stream ";; k = 7 total error: ~a" (total-error (aref error-vector 0) (aref error-count-vector 0) numeric? len)) (when numeric? (format stream " (inconclusive: ~a)" (/ (aref inconclusive-vector 0) len))) (format stream "~&;; k = 5 total error: ~a" (total-error (aref error-vector 1) (aref error-count-vector 1) numeric? len)) (when numeric? (format stream " (inconclusive: ~a)" (/ (aref inconclusive-vector 1) len))) (format stream "~&;; k = 3 total error: ~a" (total-error (aref error-vector 2) (aref error-count-vector 2) numeric? len)) (when numeric? (format stream " (inconclusive: ~a)" (/ (aref inconclusive-vector 2) len))) (format stream "~&;; k = 1 total error: ~a" (total-error (aref error-vector 3) (aref error-count-vector 3) numeric? len)) (when numeric? (format stream " (inconclusive: ~a)" (/ (aref inconclusive-vector 3) len))) )) (defun test-bill (train-data test-data stream) (format stream "~&;; k-nearest-neighbor test-bill started at ~a (~a total cases)~%" (get-universal-time) (length test-data)) (let ( (within-1-vector (vector 0 0 0 0)) (within-5-vector (vector 0 0 0 0)) (within-10-vector (vector 0 0 0 0)) (len (length test-data)) (i 1)) (dolist (sample test-data) ;; some debugging info (format stream "~&;; [~a/~a] " i len) (incf i) (multiple-value-bind (mean7 mean5 mean3 mean1) (k-1357-nearest-neighbors sample train-data t) (let ((idx 0)) (dolist (mean (list mean7 mean5 mean3 mean1)) (let ((err (abs (- (nn-point-class sample) mean)))) (when (<= err 1) (incf (aref within-1-vector idx))) (when (<= err 5) (incf (aref within-5-vector idx))) (when (<= err 10) (incf (aref within-10-vector idx))) (format stream " error[~a]: ~a" idx err) (incf idx)))))) (format stream "~&;; test finished at ~a~%" (get-universal-time)) (format stream "~&;; k = 7") (format stream "~&;; Within 1: ~a (~a%)" (aref within-1-vector 0) (* (coerce (/ (aref within-1-vector 0) len) 'float) 100)) (format stream "~&;; Within 5: ~a (~a%)" (aref within-5-vector 0) (* (coerce (/ (aref within-5-vector 0) len) 'float) 100)) (format stream "~&;; Within 10: ~a (~a%)" (aref within-10-vector 0) (* (coerce (/ (aref within-10-vector 0) len) 'float) 100)) (format stream "~&;; k = 5") (format stream "~&;; Within 1: ~a (~a%)" (aref within-1-vector 1) (* (coerce (/ (aref within-1-vector 1) len) 'float) 100)) (format stream "~&;; Within 5: ~a (~a%)" (aref within-5-vector 1) (* (coerce (/ (aref within-5-vector 1) len) 'float) 100)) (format stream "~&;; Within 10: ~a (~a%)" (aref within-10-vector 1) (* (coerce (/ (aref within-10-vector 1) len) 'float) 100)) (format stream "~&;; k = 3") (format stream "~&;; Within 1: ~a (~a%)" (aref within-1-vector 2) (* (coerce (/ (aref within-1-vector 2) len) 'float) 100)) (format stream "~&;; Within 5: ~a (~a%)" (aref within-5-vector 2) (* (coerce (/ (aref within-5-vector 2) len) 'float) 100)) (format stream "~&;; Within 10: ~a (~a%)" (aref within-10-vector 2) (* (coerce (/ (aref within-10-vector 2) len) 'float) 100)) (format stream "~&;; k = 1") (format stream "~&;; Within 1: ~a (~a%)" (aref within-1-vector 3) (* (coerce (/ (aref within-1-vector 3) len) 'float) 100)) (format stream "~&;; Within 5: ~a (~a%)" (aref within-5-vector 3) (* (coerce (/ (aref within-5-vector 3) len) 'float) 100)) (format stream "~&;; Within 10: ~a (~a%)" (aref within-10-vector 3) (* (coerce (/ (aref within-10-vector 3) len) 'float) 100)) )) (defun test-agglomerative (test-data stream) (format stream "~&;; test-agglomerative started at ~a (~a total cases)~%" (get-universal-time) (length test-data)) (format t "~&;; test-agglomerative started at ~a (~a total cases)~%" (get-universal-time) (length test-data)) (let ((error-count 0) (i 1)) (dolist (sample test-data) ;; some debugging info (format stream "~&;; [~a/~a] " i (length test-data)) (incf i) (multiple-value-bind (class cluster) (agglomerative-predict sample) (if (not (eq class (nn-point-class sample))) (progn (incf error-count) (format stream " error: t~%")) (format stream " error: nil~%")))) (format t "~&;; test-agglomerative finished at ~a (total error: ~a)~%" (get-universal-time) (coerce (/ error-count (length test-data)) 'float))) ) (defun test-nearest (train-data test-data stream) (format stream "~&;; test started at ~a (~a total cases)~%" (get-universal-time) (length test-data)) (format t "~&;; test started at ~a (~a total cases)~%" (get-universal-time) (length test-data)) (let ((error-list '()) (error-count 0) (i 1)) (dolist (sample test-data) ;; some debugging info (format stream "~&;; [~a/~a] " i (length test-data)) (incf i) (multiple-value-bind (neighbor point distance) (naive-nearest-neighbor sample train-data) (let ((error (compute-error sample neighbor numeric?))) (if numeric? (push error error-list) (when error (incf error-count))) (format stream " error: ~a~%" (if numeric? (coerce error 'float) error)) ))) (format stream "~&;; test finished at ~a (total error: ~a)~%" (get-universal-time) (coerce (if numeric? (apply #'mean error-list) (/ error-count (length test-data))) 'float)) )) ;; (agglomerative-main-test "/u/mjr/course/cs297/data/breast-float.data" 1 t) (defun agglomerative-main-test (pathname numtimes stream) (format t "~&parsing datafile ~a~%" (or pathname *data-file*)) (let ((nn-point-list (parse-datafile (or pathname *data-file*)))) (format t "~&training/testing on ~a randomly distrubted datasets~%" numtimes) (dotimes (i numtimes) (multiple-value-bind (train-data test-data) (nn-point-list->train-and-test nn-point-list) (train-agglomerative train-data stream) (test-agglomerative test-data stream))))) (defun nearest-main-test (pathname numtimes numeric? stream) (format t "~&parsing datafile ~a~%" (or pathname *data-file*)) (let ((nn-point-list (parse-datafile (or pathname *data-file*)))) (format t "~&testing on ~a randomly distrubted datasets~%" numtimes) (dotimes (i numtimes) (multiple-value-bind (train-data test-data) (nn-point-list->train-and-test nn-point-list) (test-nearest train-data test-data numeric? stream))))) (defun k-nearest-main-test (pathname numtimes numeric? stream) (format t "~&parsing datafile ~a~%" (or pathname *data-file*)) (let ((nn-point-list (parse-datafile (or pathname *data-file*)))) (format t "~&testing on ~a randomly distrubted datasets~%" numtimes) (dotimes (i numtimes) (multiple-value-bind (train-data test-data) (nn-point-list->train-and-test nn-point-list) (test-k-nearest train-data test-data numeric? stream))))) (defun bill-main-test (pathname numtimes stream) (format t "~&parsing datafile ~a~%" (or pathname *data-file*)) (let ((nn-point-list (parse-datafile (or pathname *data-file*)))) (format t "~&testing on ~a randomly distrubted datasets~%" numtimes) (dotimes (i numtimes) (multiple-value-bind (train-data test-data) (nn-point-list->train-and-test nn-point-list) (test-bill train-data test-data stream))))) (defun test-to-file (output-path fileinfo-pair numiter k) (with-open-file (stream (pathname output-path) :direction :output :if-exists :append :if-does-not-exist :create) (if (= k 1) (nearest-main-test (car fileinfo-pair) numiter (cadr fileinfo-pair) stream) (k-nearest-main-test (car fileinfo-pair) numiter (cadr fileinfo-pair) stream))) t) (defun massive-test-run (pathname) (dolist (file *data-files*) (test-to-file pathname file 5 1) (test-to-file pathname file 5 3) (test-to-file pathname file 5 5) (test-to-file pathname file 5 7)) t)