pktools 2.6.7
Processing Kernel for geospatial data
svm.cpp
1#include <math.h>
2#include <stdio.h>
3#include <stdlib.h>
4#include <ctype.h>
5#include <float.h>
6#include <string.h>
7#include <stdarg.h>
8#include <limits.h>
9#include <locale.h>
10//test
11// #include <iostream>
12#include "svm.h"
13int libsvm_version = LIBSVM_VERSION;
14typedef float Qfloat;
15typedef signed char schar;
16#ifndef min
17template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
18#endif
19#ifndef max
20template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
21#endif
22template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
23template <class S, class T> static inline void clone(T*& dst, S* src, int n)
24{
25 dst = new T[n];
26 memcpy((void *)dst,(void *)src,sizeof(T)*n);
27}
28static inline double powi(double base, int times)
29{
30 double tmp = base, ret = 1.0;
31
32 for(int t=times; t>0; t/=2)
33 {
34 if(t%2==1) ret*=tmp;
35 tmp = tmp * tmp;
36 }
37 return ret;
38}
39#define INF HUGE_VAL
40#define TAU 1e-12
41#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
42
43static void print_string_stdout(const char *s)
44{
45 fputs(s,stdout);
46 fflush(stdout);
47}
48static void (*svm_print_string) (const char *) = &print_string_stdout;
49#if 1
50static void info(const char *fmt,...)
51{
52 char buf[BUFSIZ];
53 va_list ap;
54 va_start(ap,fmt);
55 vsprintf(buf,fmt,ap);
56 va_end(ap);
57 (*svm_print_string)(buf);
58}
59#else
60static void info(const char *fmt,...) {}
61#endif
62
63//
64// Kernel Cache
65//
66// l is the number of total data items
67// size is the cache size limit in bytes
68//
69class Cache
70{
71public:
72 Cache(int l,long int size);
73 ~Cache();
74
75 // request data [0,len)
76 // return some position p where [p,len) need to be filled
77 // (p >= len if nothing needs to be filled)
78 int get_data(const int index, Qfloat **data, int len);
79 void swap_index(int i, int j);
80private:
81 int l;
82 long int size;
83 struct head_t
84 {
85 head_t *prev, *next; // a circular list
86 Qfloat *data;
87 int len; // data[0,len) is cached in this entry
88 };
89
90 head_t *head;
91 head_t lru_head;
92 void lru_delete(head_t *h);
93 void lru_insert(head_t *h);
94};
95
96Cache::Cache(int l_,long int size_):l(l_),size(size_)
97{
98 head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
99 size /= sizeof(Qfloat);
100 size -= l * sizeof(head_t) / sizeof(Qfloat);
101 size = max(size, 2 * (long int) l); // cache must be large enough for two columns
102 lru_head.next = lru_head.prev = &lru_head;
103}
104
105Cache::~Cache()
106{
107 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
108 free(h->data);
109 free(head);
110}
111
112void Cache::lru_delete(head_t *h)
113{
114 // delete from current location
115 h->prev->next = h->next;
116 h->next->prev = h->prev;
117}
118
119void Cache::lru_insert(head_t *h)
120{
121 // insert to last position
122 h->next = &lru_head;
123 h->prev = lru_head.prev;
124 h->prev->next = h;
125 h->next->prev = h;
126}
127
128int Cache::get_data(const int index, Qfloat **data, int len)
129{
130 head_t *h = &head[index];
131 if(h->len) lru_delete(h);
132 int more = len - h->len;
133
134 if(more > 0)
135 {
136 // free old space
137 while(size < more)
138 {
139 head_t *old = lru_head.next;
140 lru_delete(old);
141 free(old->data);
142 size += old->len;
143 old->data = 0;
144 old->len = 0;
145 }
146
147 // allocate new space
148 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
149 size -= more;
150 swap(h->len,len);
151 }
152
153 lru_insert(h);
154 *data = h->data;
155 return len;
156}
157
158void Cache::swap_index(int i, int j)
159{
160 if(i==j) return;
161
162 if(head[i].len) lru_delete(&head[i]);
163 if(head[j].len) lru_delete(&head[j]);
164 swap(head[i].data,head[j].data);
165 swap(head[i].len,head[j].len);
166 if(head[i].len) lru_insert(&head[i]);
167 if(head[j].len) lru_insert(&head[j]);
168
169 if(i>j) swap(i,j);
170 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
171 {
172 if(h->len > i)
173 {
174 if(h->len > j)
175 swap(h->data[i],h->data[j]);
176 else
177 {
178 // give up
179 lru_delete(h);
180 free(h->data);
181 size += h->len;
182 h->data = 0;
183 h->len = 0;
184 }
185 }
186 }
187}
188
189//
190// Kernel evaluation
191//
192// the static method k_function is for doing single kernel evaluation
193// the constructor of Kernel prepares to calculate the l*l kernel matrix
194// the member function get_Q is for getting one column from the Q Matrix
195//
196class QMatrix {
197public:
198 virtual Qfloat *get_Q(int column, int len) const = 0;
199 virtual double *get_QD() const = 0;
200 virtual void swap_index(int i, int j) const = 0;
201 virtual ~QMatrix() {}
202};
203
204class Kernel: public QMatrix {
205public:
206 Kernel(int l, svm_node * const * x, const svm_parameter& param);
207 virtual ~Kernel();
208
209 static double k_function(const svm_node *x, const svm_node *y,
210 const svm_parameter& param);
211 virtual Qfloat *get_Q(int column, int len) const = 0;
212 virtual double *get_QD() const = 0;
213 virtual void swap_index(int i, int j) const // no so const...
214 {
215 swap(x[i],x[j]);
216 if(x_square) swap(x_square[i],x_square[j]);
217 }
218protected:
219
220 double (Kernel::*kernel_function)(int i, int j) const;
221
222private:
223 const svm_node **x;
224 double *x_square;
225
226 // svm_parameter
227 const int kernel_type;
228 const int degree;
229 const double gamma;
230 const double coef0;
231
232 static double dot(const svm_node *px, const svm_node *py);
233 double kernel_linear(int i, int j) const
234 {
235 return dot(x[i],x[j]);
236 }
237 double kernel_poly(int i, int j) const
238 {
239 return powi(gamma*dot(x[i],x[j])+coef0,degree);
240 }
241 double kernel_rbf(int i, int j) const
242 {
243 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
244 }
245 double kernel_sigmoid(int i, int j) const
246 {
247 return tanh(gamma*dot(x[i],x[j])+coef0);
248 }
249 double kernel_precomputed(int i, int j) const
250 {
251 return x[i][(int)(x[j][0].value)].value;
252 }
253};
254
255Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
256:kernel_type(param.kernel_type), degree(param.degree),
257 gamma(param.gamma), coef0(param.coef0)
258{
259 switch(kernel_type)
260 {
261 case LINEAR:
262 kernel_function = &Kernel::kernel_linear;
263 break;
264 case POLY:
265 kernel_function = &Kernel::kernel_poly;
266 break;
267 case RBF:
268 kernel_function = &Kernel::kernel_rbf;
269 break;
270 case SIGMOID:
271 kernel_function = &Kernel::kernel_sigmoid;
272 break;
273 case PRECOMPUTED:
274 kernel_function = &Kernel::kernel_precomputed;
275 break;
276 }
277
278 clone(x,x_,l);
279
280 if(kernel_type == RBF)
281 {
282 x_square = new double[l];
283 for(int i=0;i<l;i++)
284 x_square[i] = dot(x[i],x[i]);
285 }
286 else
287 x_square = 0;
288}
289
290Kernel::~Kernel()
291{
292 delete[] x;
293 delete[] x_square;
294}
295
296double Kernel::dot(const svm_node *px, const svm_node *py)
297{
298 double sum = 0;
299 while(px->index != -1 && py->index != -1)
300 {
301 if(px->index == py->index)
302 {
303 sum += px->value * py->value;
304 ++px;
305 ++py;
306 }
307 else
308 {
309 if(px->index > py->index)
310 ++py;
311 else
312 ++px;
313 }
314 }
315 return sum;
316}
317
318double Kernel::k_function(const svm_node *x, const svm_node *y,
319 const svm_parameter& param)
320{
321 switch(param.kernel_type)
322 {
323 case LINEAR:
324 return dot(x,y);
325 case POLY:
326 return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
327 case RBF:
328 {
329 double sum = 0;
330 while(x->index != -1 && y->index !=-1)
331 {
332 if(x->index == y->index)
333 {
334 double d = x->value - y->value;
335 sum += d*d;
336 ++x;
337 ++y;
338 }
339 else
340 {
341 if(x->index > y->index)
342 {
343 sum += y->value * y->value;
344 ++y;
345 }
346 else
347 {
348 sum += x->value * x->value;
349 ++x;
350 }
351 }
352 }
353
354 while(x->index != -1)
355 {
356 sum += x->value * x->value;
357 ++x;
358 }
359
360 while(y->index != -1)
361 {
362 sum += y->value * y->value;
363 ++y;
364 }
365
366 return exp(-param.gamma*sum);
367 }
368 case SIGMOID:
369 return tanh(param.gamma*dot(x,y)+param.coef0);
370 case PRECOMPUTED: //x: test (validation), y: SV
371 return x[(int)(y->value)].value;
372 default:
373 return 0; // Unreachable
374 }
375}
376
377// An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
378// Solves:
379//
380// min 0.5(\alpha^T Q \alpha) + p^T \alpha
381//
382// y^T \alpha = \delta
383// y_i = +1 or -1
384// 0 <= alpha_i <= Cp for y_i = 1
385// 0 <= alpha_i <= Cn for y_i = -1
386//
387// Given:
388//
389// Q, p, y, Cp, Cn, and an initial feasible point \alpha
390// l is the size of vectors and matrices
391// eps is the stopping tolerance
392//
393// solution will be put in \alpha, objective value will be put in obj
394//
395class Solver {
396public:
397 Solver() {};
398 virtual ~Solver() {};
399
401 double obj;
402 double rho;
403 double upper_bound_p;
404 double upper_bound_n;
405 double r; // for Solver_NU
406 };
407
408 void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
409 double *alpha_, double Cp, double Cn, double eps,
410 SolutionInfo* si, int shrinking, bool verbose=false);
411protected:
412 int active_size;
413 schar *y;
414 double *G; // gradient of objective function
415 enum { LOWER_BOUND, UPPER_BOUND, FREE };
416 char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
417 double *alpha;
418 const QMatrix *Q;
419 const double *QD;
420 double eps;
421 double Cp,Cn;
422 double *p;
423 int *active_set;
424 double *G_bar; // gradient, if we treat free variables as 0
425 int l;
426 bool unshrink; // XXX
427
428 double get_C(int i)
429 {
430 return (y[i] > 0)? Cp : Cn;
431 }
432 void update_alpha_status(int i)
433 {
434 if(alpha[i] >= get_C(i))
435 alpha_status[i] = UPPER_BOUND;
436 else if(alpha[i] <= 0)
437 alpha_status[i] = LOWER_BOUND;
438 else alpha_status[i] = FREE;
439 }
440 bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
441 bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
442 bool is_free(int i) { return alpha_status[i] == FREE; }
443 void swap_index(int i, int j);
444 void reconstruct_gradient();
445 virtual int select_working_set(int &i, int &j);
446 virtual double calculate_rho();
447 virtual void do_shrinking();
448private:
449 bool be_shrunk(int i, double Gmax1, double Gmax2);
450};
451
452void Solver::swap_index(int i, int j)
453{
454 Q->swap_index(i,j);
455 swap(y[i],y[j]);
456 swap(G[i],G[j]);
457 swap(alpha_status[i],alpha_status[j]);
458 swap(alpha[i],alpha[j]);
459 swap(p[i],p[j]);
460 swap(active_set[i],active_set[j]);
461 swap(G_bar[i],G_bar[j]);
462}
463
464void Solver::reconstruct_gradient()
465{
466 // reconstruct inactive elements of G from G_bar and free variables
467
468 if(active_size == l) return;
469
470 int i,j;
471 int nr_free = 0;
472
473 for(j=active_size;j<l;j++)
474 G[j] = G_bar[j] + p[j];
475
476 for(j=0;j<active_size;j++)
477 if(is_free(j))
478 nr_free++;
479
480 if(2*nr_free < active_size)
481 info("\nWARNING: using -h 0 may be faster\n");
482
483 if (nr_free*l > 2*active_size*(l-active_size))
484 {
485 for(i=active_size;i<l;i++)
486 {
487 const Qfloat *Q_i = Q->get_Q(i,active_size);
488 for(j=0;j<active_size;j++)
489 if(is_free(j))
490 G[i] += alpha[j] * Q_i[j];
491 }
492 }
493 else
494 {
495 for(i=0;i<active_size;i++)
496 if(is_free(i))
497 {
498 const Qfloat *Q_i = Q->get_Q(i,l);
499 double alpha_i = alpha[i];
500 for(j=active_size;j<l;j++)
501 G[j] += alpha_i * Q_i[j];
502 }
503 }
504}
505
506void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
507 double *alpha_, double Cp, double Cn, double eps,
508 SolutionInfo* si, int shrinking,
509 bool verbose)//pk
510{
511 this->l = l;
512 this->Q = &Q;
513 QD=Q.get_QD();
514 clone(p, p_,l);
515 clone(y, y_,l);
516 clone(alpha,alpha_,l);
517 this->Cp = Cp;
518 this->Cn = Cn;
519 this->eps = eps;
520 unshrink = false;
521
522 // initialize alpha_status
523 {
524 alpha_status = new char[l];
525 for(int i=0;i<l;i++)
526 update_alpha_status(i);
527 }
528
529 // initialize active set (for shrinking)
530 {
531 active_set = new int[l];
532 for(int i=0;i<l;i++)
533 active_set[i] = i;
534 active_size = l;
535 }
536
537 // initialize gradient
538 {
539 G = new double[l];
540 G_bar = new double[l];
541 int i;
542 for(i=0;i<l;i++)
543 {
544 G[i] = p[i];
545 G_bar[i] = 0;
546 }
547 for(i=0;i<l;i++)
548 if(!is_lower_bound(i))
549 {
550 const Qfloat *Q_i = Q.get_Q(i,l);
551 double alpha_i = alpha[i];
552 int j;
553 for(j=0;j<l;j++)
554 G[j] += alpha_i*Q_i[j];
555 if(is_upper_bound(i))
556 for(j=0;j<l;j++)
557 G_bar[j] += get_C(i) * Q_i[j];
558 }
559 }
560
561 // optimization step
562
563 int iter = 0;
564 int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
565 int counter = min(l,1000)+1;
566
567 while(iter < max_iter)
568 {
569 // show progress and do shrinking
570
571 if(--counter == 0)
572 {
573 counter = min(l,1000);
574 if(shrinking) do_shrinking();
575 if(verbose)//pk
576 info(".");
577 }
578
579 int i,j;
580 if(select_working_set(i,j)!=0)
581 {
582 // reconstruct the whole gradient
583 reconstruct_gradient();
584 // reset active set size and check
585 active_size = l;
586 if(verbose)//pk
587 info("*");
588 if(select_working_set(i,j)!=0)
589 break;
590 else
591 counter = 1; // do shrinking next iteration
592 }
593
594 ++iter;
595
596 // update alpha[i] and alpha[j], handle bounds carefully
597
598 const Qfloat *Q_i = Q.get_Q(i,active_size);
599 const Qfloat *Q_j = Q.get_Q(j,active_size);
600
601 double C_i = get_C(i);
602 double C_j = get_C(j);
603
604 double old_alpha_i = alpha[i];
605 double old_alpha_j = alpha[j];
606
607 if(y[i]!=y[j])
608 {
609 double quad_coef = QD[i]+QD[j]+2*Q_i[j];
610 if (quad_coef <= 0)
611 quad_coef = TAU;
612 double delta = (-G[i]-G[j])/quad_coef;
613 double diff = alpha[i] - alpha[j];
614 alpha[i] += delta;
615 alpha[j] += delta;
616
617 if(diff > 0)
618 {
619 if(alpha[j] < 0)
620 {
621 alpha[j] = 0;
622 alpha[i] = diff;
623 }
624 }
625 else
626 {
627 if(alpha[i] < 0)
628 {
629 alpha[i] = 0;
630 alpha[j] = -diff;
631 }
632 }
633 if(diff > C_i - C_j)
634 {
635 if(alpha[i] > C_i)
636 {
637 alpha[i] = C_i;
638 alpha[j] = C_i - diff;
639 }
640 }
641 else
642 {
643 if(alpha[j] > C_j)
644 {
645 alpha[j] = C_j;
646 alpha[i] = C_j + diff;
647 }
648 }
649 }
650 else
651 {
652 double quad_coef = QD[i]+QD[j]-2*Q_i[j];
653 if (quad_coef <= 0)
654 quad_coef = TAU;
655 double delta = (G[i]-G[j])/quad_coef;
656 double sum = alpha[i] + alpha[j];
657 alpha[i] -= delta;
658 alpha[j] += delta;
659
660 if(sum > C_i)
661 {
662 if(alpha[i] > C_i)
663 {
664 alpha[i] = C_i;
665 alpha[j] = sum - C_i;
666 }
667 }
668 else
669 {
670 if(alpha[j] < 0)
671 {
672 alpha[j] = 0;
673 alpha[i] = sum;
674 }
675 }
676 if(sum > C_j)
677 {
678 if(alpha[j] > C_j)
679 {
680 alpha[j] = C_j;
681 alpha[i] = sum - C_j;
682 }
683 }
684 else
685 {
686 if(alpha[i] < 0)
687 {
688 alpha[i] = 0;
689 alpha[j] = sum;
690 }
691 }
692 }
693
694 // update G
695
696 double delta_alpha_i = alpha[i] - old_alpha_i;
697 double delta_alpha_j = alpha[j] - old_alpha_j;
698
699 for(int k=0;k<active_size;k++)
700 {
701 G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
702 }
703
704 // update alpha_status and G_bar
705
706 {
707 bool ui = is_upper_bound(i);
708 bool uj = is_upper_bound(j);
709 update_alpha_status(i);
710 update_alpha_status(j);
711 int k;
712 if(ui != is_upper_bound(i))
713 {
714 Q_i = Q.get_Q(i,l);
715 if(ui)
716 for(k=0;k<l;k++)
717 G_bar[k] -= C_i * Q_i[k];
718 else
719 for(k=0;k<l;k++)
720 G_bar[k] += C_i * Q_i[k];
721 }
722
723 if(uj != is_upper_bound(j))
724 {
725 Q_j = Q.get_Q(j,l);
726 if(uj)
727 for(k=0;k<l;k++)
728 G_bar[k] -= C_j * Q_j[k];
729 else
730 for(k=0;k<l;k++)
731 G_bar[k] += C_j * Q_j[k];
732 }
733 }
734 }
735
736 if(iter >= max_iter)
737 {
738 if(active_size < l)
739 {
740 // reconstruct the whole gradient to calculate objective value
741 reconstruct_gradient();
742 active_size = l;
743 if(verbose)//pk
744 info("*");
745 }
746 info("\nWARNING: reaching max number of iterations");
747 }
748
749 // calculate rho
750
751 si->rho = calculate_rho();
752
753 // calculate objective value
754 {
755 double v = 0;
756 int i;
757 for(i=0;i<l;i++)
758 v += alpha[i] * (G[i] + p[i]);
759
760 si->obj = v/2;
761 }
762
763 // put back the solution
764 {
765 for(int i=0;i<l;i++)
766 alpha_[active_set[i]] = alpha[i];
767 }
768
769 // juggle everything back
770 /*{
771 for(int i=0;i<l;i++)
772 while(active_set[i] != i)
773 swap_index(i,active_set[i]);
774 // or Q.swap_index(i,active_set[i]);
775 }*/
776
777 si->upper_bound_p = Cp;
778 si->upper_bound_n = Cn;
779
780 if(verbose)//pk
781 info("\noptimization finished, #iter = %d\n",iter);
782
783 delete[] p;
784 delete[] y;
785 delete[] alpha;
786 delete[] alpha_status;
787 delete[] active_set;
788 delete[] G;
789 delete[] G_bar;
790}
791
792// return 1 if already optimal, return 0 otherwise
793int Solver::select_working_set(int &out_i, int &out_j)
794{
795 // return i,j such that
796 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
797 // j: minimizes the decrease of obj value
798 // (if quadratic coefficeint <= 0, replace it with tau)
799 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
800
801 double Gmax = -INF;
802 double Gmax2 = -INF;
803 int Gmax_idx = -1;
804 int Gmin_idx = -1;
805 double obj_diff_min = INF;
806
807 for(int t=0;t<active_size;t++)
808 if(y[t]==+1)
809 {
810 if(!is_upper_bound(t))
811 if(-G[t] >= Gmax)
812 {
813 Gmax = -G[t];
814 Gmax_idx = t;
815 }
816 }
817 else
818 {
819 if(!is_lower_bound(t))
820 if(G[t] >= Gmax)
821 {
822 Gmax = G[t];
823 Gmax_idx = t;
824 }
825 }
826
827 int i = Gmax_idx;
828 const Qfloat *Q_i = NULL;
829 if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
830 Q_i = Q->get_Q(i,active_size);
831
832 for(int j=0;j<active_size;j++)
833 {
834 if(y[j]==+1)
835 {
836 if (!is_lower_bound(j))
837 {
838 double grad_diff=Gmax+G[j];
839 if (G[j] >= Gmax2)
840 Gmax2 = G[j];
841 if (grad_diff > 0)
842 {
843 double obj_diff;
844 double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
845 if (quad_coef > 0)
846 obj_diff = -(grad_diff*grad_diff)/quad_coef;
847 else
848 obj_diff = -(grad_diff*grad_diff)/TAU;
849
850 if (obj_diff <= obj_diff_min)
851 {
852 Gmin_idx=j;
853 obj_diff_min = obj_diff;
854 }
855 }
856 }
857 }
858 else
859 {
860 if (!is_upper_bound(j))
861 {
862 double grad_diff= Gmax-G[j];
863 if (-G[j] >= Gmax2)
864 Gmax2 = -G[j];
865 if (grad_diff > 0)
866 {
867 double obj_diff;
868 double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
869 if (quad_coef > 0)
870 obj_diff = -(grad_diff*grad_diff)/quad_coef;
871 else
872 obj_diff = -(grad_diff*grad_diff)/TAU;
873
874 if (obj_diff <= obj_diff_min)
875 {
876 Gmin_idx=j;
877 obj_diff_min = obj_diff;
878 }
879 }
880 }
881 }
882 }
883
884 if(Gmax+Gmax2 < eps)
885 return 1;
886
887 out_i = Gmax_idx;
888 out_j = Gmin_idx;
889 return 0;
890}
891
892bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
893{
894 if(is_upper_bound(i))
895 {
896 if(y[i]==+1)
897 return(-G[i] > Gmax1);
898 else
899 return(-G[i] > Gmax2);
900 }
901 else if(is_lower_bound(i))
902 {
903 if(y[i]==+1)
904 return(G[i] > Gmax2);
905 else
906 return(G[i] > Gmax1);
907 }
908 else
909 return(false);
910}
911
912void Solver::do_shrinking()
913{
914 int i;
915 double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
916 double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
917
918 // find maximal violating pair first
919 for(i=0;i<active_size;i++)
920 {
921 if(y[i]==+1)
922 {
923 if(!is_upper_bound(i))
924 {
925 if(-G[i] >= Gmax1)
926 Gmax1 = -G[i];
927 }
928 if(!is_lower_bound(i))
929 {
930 if(G[i] >= Gmax2)
931 Gmax2 = G[i];
932 }
933 }
934 else
935 {
936 if(!is_upper_bound(i))
937 {
938 if(-G[i] >= Gmax2)
939 Gmax2 = -G[i];
940 }
941 if(!is_lower_bound(i))
942 {
943 if(G[i] >= Gmax1)
944 Gmax1 = G[i];
945 }
946 }
947 }
948
949 if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
950 {
951 unshrink = true;
952 reconstruct_gradient();
953 active_size = l;
954 info("*");
955 }
956
957 for(i=0;i<active_size;i++)
958 if (be_shrunk(i, Gmax1, Gmax2))
959 {
960 active_size--;
961 while (active_size > i)
962 {
963 if (!be_shrunk(active_size, Gmax1, Gmax2))
964 {
965 swap_index(i,active_size);
966 break;
967 }
968 active_size--;
969 }
970 }
971}
972
973double Solver::calculate_rho()
974{
975 double r;
976 int nr_free = 0;
977 double ub = INF, lb = -INF, sum_free = 0;
978 for(int i=0;i<active_size;i++)
979 {
980 double yG = y[i]*G[i];
981
982 if(is_upper_bound(i))
983 {
984 if(y[i]==-1)
985 ub = min(ub,yG);
986 else
987 lb = max(lb,yG);
988 }
989 else if(is_lower_bound(i))
990 {
991 if(y[i]==+1)
992 ub = min(ub,yG);
993 else
994 lb = max(lb,yG);
995 }
996 else
997 {
998 ++nr_free;
999 sum_free += yG;
1000 }
1001 }
1002
1003 if(nr_free>0)
1004 r = sum_free/nr_free;
1005 else
1006 r = (ub+lb)/2;
1007
1008 return r;
1009}
1010
1011//
1012// Solver for nu-svm classification and regression
1013//
1014// additional constraint: e^T \alpha = constant
1015//
1016class Solver_NU : public Solver
1017{
1018public:
1019 Solver_NU() {}
1020 void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1021 double *alpha, double Cp, double Cn, double eps,
1022 SolutionInfo* si, int shrinking, bool verbose=false)
1023 {
1024 this->si = si;
1025 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking,verbose);
1026 }
1027private:
1028 SolutionInfo *si;
1029 int select_working_set(int &i, int &j);
1030 double calculate_rho();
1031 bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1032 void do_shrinking();
1033};
1034
1035// return 1 if already optimal, return 0 otherwise
1036int Solver_NU::select_working_set(int &out_i, int &out_j)
1037{
1038 // return i,j such that y_i = y_j and
1039 // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1040 // j: minimizes the decrease of obj value
1041 // (if quadratic coefficeint <= 0, replace it with tau)
1042 // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1043
1044 double Gmaxp = -INF;
1045 double Gmaxp2 = -INF;
1046 int Gmaxp_idx = -1;
1047
1048 double Gmaxn = -INF;
1049 double Gmaxn2 = -INF;
1050 int Gmaxn_idx = -1;
1051
1052 int Gmin_idx = -1;
1053 double obj_diff_min = INF;
1054
1055 for(int t=0;t<active_size;t++)
1056 if(y[t]==+1)
1057 {
1058 if(!is_upper_bound(t))
1059 if(-G[t] >= Gmaxp)
1060 {
1061 Gmaxp = -G[t];
1062 Gmaxp_idx = t;
1063 }
1064 }
1065 else
1066 {
1067 if(!is_lower_bound(t))
1068 if(G[t] >= Gmaxn)
1069 {
1070 Gmaxn = G[t];
1071 Gmaxn_idx = t;
1072 }
1073 }
1074
1075 int ip = Gmaxp_idx;
1076 int in = Gmaxn_idx;
1077 const Qfloat *Q_ip = NULL;
1078 const Qfloat *Q_in = NULL;
1079 if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1080 Q_ip = Q->get_Q(ip,active_size);
1081 if(in != -1)
1082 Q_in = Q->get_Q(in,active_size);
1083
1084 for(int j=0;j<active_size;j++)
1085 {
1086 if(y[j]==+1)
1087 {
1088 if (!is_lower_bound(j))
1089 {
1090 double grad_diff=Gmaxp+G[j];
1091 if (G[j] >= Gmaxp2)
1092 Gmaxp2 = G[j];
1093 if (grad_diff > 0)
1094 {
1095 double obj_diff;
1096 double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1097 if (quad_coef > 0)
1098 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1099 else
1100 obj_diff = -(grad_diff*grad_diff)/TAU;
1101
1102 if (obj_diff <= obj_diff_min)
1103 {
1104 Gmin_idx=j;
1105 obj_diff_min = obj_diff;
1106 }
1107 }
1108 }
1109 }
1110 else
1111 {
1112 if (!is_upper_bound(j))
1113 {
1114 double grad_diff=Gmaxn-G[j];
1115 if (-G[j] >= Gmaxn2)
1116 Gmaxn2 = -G[j];
1117 if (grad_diff > 0)
1118 {
1119 double obj_diff;
1120 double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1121 if (quad_coef > 0)
1122 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1123 else
1124 obj_diff = -(grad_diff*grad_diff)/TAU;
1125
1126 if (obj_diff <= obj_diff_min)
1127 {
1128 Gmin_idx=j;
1129 obj_diff_min = obj_diff;
1130 }
1131 }
1132 }
1133 }
1134 }
1135
1136 if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
1137 return 1;
1138
1139 if (y[Gmin_idx] == +1)
1140 out_i = Gmaxp_idx;
1141 else
1142 out_i = Gmaxn_idx;
1143 out_j = Gmin_idx;
1144
1145 return 0;
1146}
1147
1148bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1149{
1150 if(is_upper_bound(i))
1151 {
1152 if(y[i]==+1)
1153 return(-G[i] > Gmax1);
1154 else
1155 return(-G[i] > Gmax4);
1156 }
1157 else if(is_lower_bound(i))
1158 {
1159 if(y[i]==+1)
1160 return(G[i] > Gmax2);
1161 else
1162 return(G[i] > Gmax3);
1163 }
1164 else
1165 return(false);
1166}
1167
1168void Solver_NU::do_shrinking()
1169{
1170 double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1171 double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1172 double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1173 double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1174
1175 // find maximal violating pair first
1176 int i;
1177 for(i=0;i<active_size;i++)
1178 {
1179 if(!is_upper_bound(i))
1180 {
1181 if(y[i]==+1)
1182 {
1183 if(-G[i] > Gmax1) Gmax1 = -G[i];
1184 }
1185 else if(-G[i] > Gmax4) Gmax4 = -G[i];
1186 }
1187 if(!is_lower_bound(i))
1188 {
1189 if(y[i]==+1)
1190 {
1191 if(G[i] > Gmax2) Gmax2 = G[i];
1192 }
1193 else if(G[i] > Gmax3) Gmax3 = G[i];
1194 }
1195 }
1196
1197 if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1198 {
1199 unshrink = true;
1200 reconstruct_gradient();
1201 active_size = l;
1202 }
1203
1204 for(i=0;i<active_size;i++)
1205 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1206 {
1207 active_size--;
1208 while (active_size > i)
1209 {
1210 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1211 {
1212 swap_index(i,active_size);
1213 break;
1214 }
1215 active_size--;
1216 }
1217 }
1218}
1219
1220double Solver_NU::calculate_rho()
1221{
1222 int nr_free1 = 0,nr_free2 = 0;
1223 double ub1 = INF, ub2 = INF;
1224 double lb1 = -INF, lb2 = -INF;
1225 double sum_free1 = 0, sum_free2 = 0;
1226
1227 for(int i=0;i<active_size;i++)
1228 {
1229 if(y[i]==+1)
1230 {
1231 if(is_upper_bound(i))
1232 lb1 = max(lb1,G[i]);
1233 else if(is_lower_bound(i))
1234 ub1 = min(ub1,G[i]);
1235 else
1236 {
1237 ++nr_free1;
1238 sum_free1 += G[i];
1239 }
1240 }
1241 else
1242 {
1243 if(is_upper_bound(i))
1244 lb2 = max(lb2,G[i]);
1245 else if(is_lower_bound(i))
1246 ub2 = min(ub2,G[i]);
1247 else
1248 {
1249 ++nr_free2;
1250 sum_free2 += G[i];
1251 }
1252 }
1253 }
1254
1255 double r1,r2;
1256 if(nr_free1 > 0)
1257 r1 = sum_free1/nr_free1;
1258 else
1259 r1 = (ub1+lb1)/2;
1260
1261 if(nr_free2 > 0)
1262 r2 = sum_free2/nr_free2;
1263 else
1264 r2 = (ub2+lb2)/2;
1265
1266 si->r = (r1+r2)/2;
1267 return (r1-r2)/2;
1268}
1269
1270//
1271// Q matrices for various formulations
1272//
1273class SVC_Q: public Kernel
1274{
1275public:
1276 SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1277 :Kernel(prob.l, prob.x, param)
1278 {
1279 clone(y,y_,prob.l);
1280 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1281 QD = new double[prob.l];
1282 for(int i=0;i<prob.l;i++)
1283 QD[i] = (this->*kernel_function)(i,i);
1284 }
1285
1286 Qfloat *get_Q(int i, int len) const
1287 {
1288 Qfloat *data;
1289 int start, j;
1290 if((start = cache->get_data(i,&data,len)) < len)
1291 {
1292 for(j=start;j<len;j++)
1293 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1294 }
1295 return data;
1296 }
1297
1298 double *get_QD() const
1299 {
1300 return QD;
1301 }
1302
1303 void swap_index(int i, int j) const
1304 {
1305 cache->swap_index(i,j);
1306 Kernel::swap_index(i,j);
1307 swap(y[i],y[j]);
1308 swap(QD[i],QD[j]);
1309 }
1310
1311 ~SVC_Q()
1312 {
1313 delete[] y;
1314 delete cache;
1315 delete[] QD;
1316 }
1317private:
1318 schar *y;
1319 Cache *cache;
1320 double *QD;
1321};
1322
1323class ONE_CLASS_Q: public Kernel
1324{
1325public:
1326 ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1327 :Kernel(prob.l, prob.x, param)
1328 {
1329 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1330 QD = new double[prob.l];
1331 for(int i=0;i<prob.l;i++)
1332 QD[i] = (this->*kernel_function)(i,i);
1333 }
1334
1335 Qfloat *get_Q(int i, int len) const
1336 {
1337 Qfloat *data;
1338 int start, j;
1339 if((start = cache->get_data(i,&data,len)) < len)
1340 {
1341 for(j=start;j<len;j++)
1342 data[j] = (Qfloat)(this->*kernel_function)(i,j);
1343 }
1344 return data;
1345 }
1346
1347 double *get_QD() const
1348 {
1349 return QD;
1350 }
1351
1352 void swap_index(int i, int j) const
1353 {
1354 cache->swap_index(i,j);
1355 Kernel::swap_index(i,j);
1356 swap(QD[i],QD[j]);
1357 }
1358
1359 ~ONE_CLASS_Q()
1360 {
1361 delete cache;
1362 delete[] QD;
1363 }
1364private:
1365 Cache *cache;
1366 double *QD;
1367};
1368
1369class SVR_Q: public Kernel
1370{
1371public:
1372 SVR_Q(const svm_problem& prob, const svm_parameter& param)
1373 :Kernel(prob.l, prob.x, param)
1374 {
1375 l = prob.l;
1376 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1377 QD = new double[2*l];
1378 sign = new schar[2*l];
1379 index = new int[2*l];
1380 for(int k=0;k<l;k++)
1381 {
1382 sign[k] = 1;
1383 sign[k+l] = -1;
1384 index[k] = k;
1385 index[k+l] = k;
1386 QD[k] = (this->*kernel_function)(k,k);
1387 QD[k+l] = QD[k];
1388 }
1389 buffer[0] = new Qfloat[2*l];
1390 buffer[1] = new Qfloat[2*l];
1391 next_buffer = 0;
1392 }
1393
1394 void swap_index(int i, int j) const
1395 {
1396 swap(sign[i],sign[j]);
1397 swap(index[i],index[j]);
1398 swap(QD[i],QD[j]);
1399 }
1400
1401 Qfloat *get_Q(int i, int len) const
1402 {
1403 Qfloat *data;
1404 int j, real_i = index[i];
1405 if(cache->get_data(real_i,&data,l) < l)
1406 {
1407 for(j=0;j<l;j++)
1408 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1409 }
1410
1411 // reorder and copy
1412 Qfloat *buf = buffer[next_buffer];
1413 next_buffer = 1 - next_buffer;
1414 schar si = sign[i];
1415 for(j=0;j<len;j++)
1416 buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1417 return buf;
1418 }
1419
1420 double *get_QD() const
1421 {
1422 return QD;
1423 }
1424
1425 ~SVR_Q()
1426 {
1427 delete cache;
1428 delete[] sign;
1429 delete[] index;
1430 delete[] buffer[0];
1431 delete[] buffer[1];
1432 delete[] QD;
1433 }
1434private:
1435 int l;
1436 Cache *cache;
1437 schar *sign;
1438 int *index;
1439 mutable int next_buffer;
1440 Qfloat *buffer[2];
1441 double *QD;
1442};
1443
1444//
1445// construct and solve various formulations
1446//
1447static void solve_c_svc(
1448 const svm_problem *prob, const svm_parameter* param,
1449 double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1450{
1451 int l = prob->l;
1452 double *minus_ones = new double[l];
1453 schar *y = new schar[l];
1454
1455 int i;
1456
1457 for(i=0;i<l;i++)
1458 {
1459 alpha[i] = 0;
1460 minus_ones[i] = -1;
1461 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
1462 }
1463
1464 Solver s;
1465 s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1466 alpha, Cp, Cn, param->eps, si, param->shrinking, param->verbose);
1467
1468 double sum_alpha=0;
1469 for(i=0;i<l;i++)
1470 sum_alpha += alpha[i];
1471
1472 if (Cp==Cn)
1473 if(param->verbose)//pk
1474 info("nu = %f\n", sum_alpha/(Cp*prob->l));
1475
1476 for(i=0;i<l;i++)
1477 alpha[i] *= y[i];
1478
1479 delete[] minus_ones;
1480 delete[] y;
1481}
1482
1483static void solve_nu_svc(
1484 const svm_problem *prob, const svm_parameter *param,
1485 double *alpha, Solver::SolutionInfo* si)
1486{
1487 int i;
1488 int l = prob->l;
1489 double nu = param->nu;
1490
1491 schar *y = new schar[l];
1492
1493 for(i=0;i<l;i++)
1494 if(prob->y[i]>0)
1495 y[i] = +1;
1496 else
1497 y[i] = -1;
1498
1499 double sum_pos = nu*l/2;
1500 double sum_neg = nu*l/2;
1501
1502 for(i=0;i<l;i++)
1503 if(y[i] == +1)
1504 {
1505 alpha[i] = min(1.0,sum_pos);
1506 sum_pos -= alpha[i];
1507 }
1508 else
1509 {
1510 alpha[i] = min(1.0,sum_neg);
1511 sum_neg -= alpha[i];
1512 }
1513
1514 double *zeros = new double[l];
1515
1516 for(i=0;i<l;i++)
1517 zeros[i] = 0;
1518
1519 Solver_NU s;
1520 s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1521 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->verbose);
1522 double r = si->r;
1523
1524 if(param->verbose)//pk
1525 info("C = %f\n",1/r);
1526
1527 for(i=0;i<l;i++)
1528 alpha[i] *= y[i]/r;
1529
1530 si->rho /= r;
1531 si->obj /= (r*r);
1532 si->upper_bound_p = 1/r;
1533 si->upper_bound_n = 1/r;
1534
1535 delete[] y;
1536 delete[] zeros;
1537}
1538
1539static void solve_one_class(
1540 const svm_problem *prob, const svm_parameter *param,
1541 double *alpha, Solver::SolutionInfo* si)
1542{
1543 int l = prob->l;
1544 double *zeros = new double[l];
1545 schar *ones = new schar[l];
1546 int i;
1547
1548 int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1549
1550 for(i=0;i<n;i++)
1551 alpha[i] = 1;
1552 if(n<prob->l)
1553 alpha[n] = param->nu * prob->l - n;
1554 for(i=n+1;i<l;i++)
1555 alpha[i] = 0;
1556
1557 for(i=0;i<l;i++)
1558 {
1559 zeros[i] = 0;
1560 ones[i] = 1;
1561 }
1562
1563 Solver s;
1564 s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1565 alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->verbose );
1566
1567 delete[] zeros;
1568 delete[] ones;
1569}
1570
1571static void solve_epsilon_svr(
1572 const svm_problem *prob, const svm_parameter *param,
1573 double *alpha, Solver::SolutionInfo* si)
1574{
1575 int l = prob->l;
1576 double *alpha2 = new double[2*l];
1577 double *linear_term = new double[2*l];
1578 schar *y = new schar[2*l];
1579 int i;
1580
1581 for(i=0;i<l;i++)
1582 {
1583 alpha2[i] = 0;
1584 linear_term[i] = param->p - prob->y[i];
1585 y[i] = 1;
1586
1587 alpha2[i+l] = 0;
1588 linear_term[i+l] = param->p + prob->y[i];
1589 y[i+l] = -1;
1590 }
1591
1592 Solver s;
1593 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1594 alpha2, param->C, param->C, param->eps, si, param->shrinking, param->verbose);
1595
1596 double sum_alpha = 0;
1597 for(i=0;i<l;i++)
1598 {
1599 alpha[i] = alpha2[i] - alpha2[i+l];
1600 sum_alpha += fabs(alpha[i]);
1601 }
1602 if(param->verbose)//pk
1603 info("nu = %f\n",sum_alpha/(param->C*l));
1604
1605 delete[] alpha2;
1606 delete[] linear_term;
1607 delete[] y;
1608}
1609
1610static void solve_nu_svr(
1611 const svm_problem *prob, const svm_parameter *param,
1612 double *alpha, Solver::SolutionInfo* si)
1613{
1614 int l = prob->l;
1615 double C = param->C;
1616 double *alpha2 = new double[2*l];
1617 double *linear_term = new double[2*l];
1618 schar *y = new schar[2*l];
1619 int i;
1620
1621 double sum = C * param->nu * l / 2;
1622 for(i=0;i<l;i++)
1623 {
1624 alpha2[i] = alpha2[i+l] = min(sum,C);
1625 sum -= alpha2[i];
1626
1627 linear_term[i] = - prob->y[i];
1628 y[i] = 1;
1629
1630 linear_term[i+l] = prob->y[i];
1631 y[i+l] = -1;
1632 }
1633
1634 Solver_NU s;
1635 s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1636 alpha2, C, C, param->eps, si, param->shrinking, param->verbose);
1637
1638 if(param->verbose)//pk
1639 info("epsilon = %f\n",-si->r);
1640
1641 for(i=0;i<l;i++)
1642 alpha[i] = alpha2[i] - alpha2[i+l];
1643
1644 delete[] alpha2;
1645 delete[] linear_term;
1646 delete[] y;
1647}
1648
1649//
1650// decision_function
1651//
1653{
1654 double *alpha;
1655 double rho;
1656};
1657
1658static decision_function svm_train_one(
1659 const svm_problem *prob, const svm_parameter *param,
1660 double Cp, double Cn)
1661{
1662 double *alpha = Malloc(double,prob->l);
1664 switch(param->svm_type)
1665 {
1666 case C_SVC:
1667 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1668 break;
1669 case NU_SVC:
1670 solve_nu_svc(prob,param,alpha,&si);
1671 break;
1672 case ONE_CLASS:
1673 solve_one_class(prob,param,alpha,&si);
1674 break;
1675 case EPSILON_SVR:
1676 solve_epsilon_svr(prob,param,alpha,&si);
1677 break;
1678 case NU_SVR:
1679 solve_nu_svr(prob,param,alpha,&si);
1680 break;
1681 }
1682
1683 if(param->verbose)//pk
1684 info("obj = %f, rho = %f\n",si.obj,si.rho);
1685
1686 // output SVs
1687
1688 int nSV = 0;
1689 int nBSV = 0;
1690 for(int i=0;i<prob->l;i++)
1691 {
1692 if(fabs(alpha[i]) > 0)
1693 {
1694 ++nSV;
1695 if(prob->y[i] > 0)
1696 {
1697 if(fabs(alpha[i]) >= si.upper_bound_p)
1698 ++nBSV;
1699 }
1700 else
1701 {
1702 if(fabs(alpha[i]) >= si.upper_bound_n)
1703 ++nBSV;
1704 }
1705 }
1706 }
1707
1708 if(param->verbose)//pk
1709 info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1710
1712 f.alpha = alpha;
1713 f.rho = si.rho;
1714 return f;
1715}
1716
1717// Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1718static void sigmoid_train(
1719 int l, const double *dec_values, const double *labels,
1720 double& A, double& B)
1721{
1722 double prior1=0, prior0 = 0;
1723 int i;
1724
1725 for (i=0;i<l;i++)
1726 if (labels[i] > 0) prior1+=1;
1727 else prior0+=1;
1728
1729 int max_iter=100; // Maximal number of iterations
1730 double min_step=1e-10; // Minimal step taken in line search
1731 double sigma=1e-12; // For numerically strict PD of Hessian
1732 double eps=1e-5;
1733 double hiTarget=(prior1+1.0)/(prior1+2.0);
1734 double loTarget=1/(prior0+2.0);
1735 double *t=Malloc(double,l);
1736 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1737 double newA,newB,newf,d1,d2;
1738 int iter;
1739
1740 // Initial Point and Initial Fun Value
1741 A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1742 double fval = 0.0;
1743
1744 for (i=0;i<l;i++)
1745 {
1746 if (labels[i]>0) t[i]=hiTarget;
1747 else t[i]=loTarget;
1748 fApB = dec_values[i]*A+B;
1749 if (fApB>=0)
1750 fval += t[i]*fApB + log(1+exp(-fApB));
1751 else
1752 fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1753 }
1754 for (iter=0;iter<max_iter;iter++)
1755 {
1756 // Update Gradient and Hessian (use H' = H + sigma I)
1757 h11=sigma; // numerically ensures strict PD
1758 h22=sigma;
1759 h21=0.0;g1=0.0;g2=0.0;
1760 for (i=0;i<l;i++)
1761 {
1762 fApB = dec_values[i]*A+B;
1763 if (fApB >= 0)
1764 {
1765 p=exp(-fApB)/(1.0+exp(-fApB));
1766 q=1.0/(1.0+exp(-fApB));
1767 }
1768 else
1769 {
1770 p=1.0/(1.0+exp(fApB));
1771 q=exp(fApB)/(1.0+exp(fApB));
1772 }
1773 d2=p*q;
1774 h11+=dec_values[i]*dec_values[i]*d2;
1775 h22+=d2;
1776 h21+=dec_values[i]*d2;
1777 d1=t[i]-p;
1778 g1+=dec_values[i]*d1;
1779 g2+=d1;
1780 }
1781
1782 // Stopping Criteria
1783 if (fabs(g1)<eps && fabs(g2)<eps)
1784 break;
1785
1786 // Finding Newton direction: -inv(H') * g
1787 det=h11*h22-h21*h21;
1788 dA=-(h22*g1 - h21 * g2) / det;
1789 dB=-(-h21*g1+ h11 * g2) / det;
1790 gd=g1*dA+g2*dB;
1791
1792
1793 stepsize = 1; // Line Search
1794 while (stepsize >= min_step)
1795 {
1796 newA = A + stepsize * dA;
1797 newB = B + stepsize * dB;
1798
1799 // New function value
1800 newf = 0.0;
1801 for (i=0;i<l;i++)
1802 {
1803 fApB = dec_values[i]*newA+newB;
1804 if (fApB >= 0)
1805 newf += t[i]*fApB + log(1+exp(-fApB));
1806 else
1807 newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1808 }
1809 // Check sufficient decrease
1810 if (newf<fval+0.0001*stepsize*gd)
1811 {
1812 A=newA;B=newB;fval=newf;
1813 break;
1814 }
1815 else
1816 stepsize = stepsize / 2.0;
1817 }
1818
1819 if (stepsize < min_step)
1820 {
1821 info("Line search fails in two-class probability estimates\n");
1822 break;
1823 }
1824 }
1825
1826 if (iter>=max_iter)
1827 info("Reaching maximal iterations in two-class probability estimates\n");
1828 free(t);
1829}
1830
1831static double sigmoid_predict(double decision_value, double A, double B)
1832{
1833 double fApB = decision_value*A+B;
1834 // 1-p used later; avoid catastrophic cancellation
1835 if (fApB >= 0)
1836 return exp(-fApB)/(1.0+exp(-fApB));
1837 else
1838 return 1.0/(1+exp(fApB)) ;
1839}
1840
1841// Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1842static void multiclass_probability(int k, double **r, double *p)
1843{
1844 int t,j;
1845 int iter = 0, max_iter=max(100,k);
1846 double **Q=Malloc(double *,k);
1847 double *Qp=Malloc(double,k);
1848 double pQp, eps=0.005/k;
1849
1850 for (t=0;t<k;t++)
1851 {
1852 p[t]=1.0/k; // Valid if k = 1
1853 Q[t]=Malloc(double,k);
1854 Q[t][t]=0;
1855 for (j=0;j<t;j++)
1856 {
1857 Q[t][t]+=r[j][t]*r[j][t];
1858 Q[t][j]=Q[j][t];
1859 }
1860 for (j=t+1;j<k;j++)
1861 {
1862 Q[t][t]+=r[j][t]*r[j][t];
1863 Q[t][j]=-r[j][t]*r[t][j];
1864 }
1865 }
1866 for (iter=0;iter<max_iter;iter++)
1867 {
1868 // stopping condition, recalculate QP,pQP for numerical accuracy
1869 pQp=0;
1870 for (t=0;t<k;t++)
1871 {
1872 Qp[t]=0;
1873 for (j=0;j<k;j++)
1874 Qp[t]+=Q[t][j]*p[j];
1875 pQp+=p[t]*Qp[t];
1876 }
1877 double max_error=0;
1878 for (t=0;t<k;t++)
1879 {
1880 double error=fabs(Qp[t]-pQp);
1881 if (error>max_error)
1882 max_error=error;
1883 }
1884 if (max_error<eps) break;
1885
1886 for (t=0;t<k;t++)
1887 {
1888 double diff=(-Qp[t]+pQp)/Q[t][t];
1889 p[t]+=diff;
1890 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1891 for (j=0;j<k;j++)
1892 {
1893 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1894 p[j]/=(1+diff);
1895 }
1896 }
1897 }
1898 if (iter>=max_iter)
1899 info("Exceeds max_iter in multiclass_prob\n");
1900 for(t=0;t<k;t++) free(Q[t]);
1901 free(Q);
1902 free(Qp);
1903}
1904
1905// Cross-validation decision values for probability estimates
1906static void svm_binary_svc_probability(
1907 const svm_problem *prob, const svm_parameter *param,
1908 double Cp, double Cn, double& probA, double& probB)
1909{
1910 int i;
1911 int nr_fold = 5;
1912 int *perm = Malloc(int,prob->l);
1913 double *dec_values = Malloc(double,prob->l);
1914
1915 // random shuffle
1916 for(i=0;i<prob->l;i++) perm[i]=i;
1917 for(i=0;i<prob->l;i++)
1918 {
1919 int j = i+rand()%(prob->l-i);
1920 swap(perm[i],perm[j]);
1921 }
1922 for(i=0;i<nr_fold;i++)
1923 {
1924 int begin = i*prob->l/nr_fold;
1925 int end = (i+1)*prob->l/nr_fold;
1926 int j,k;
1927 struct svm_problem subprob;
1928
1929 subprob.l = prob->l-(end-begin);
1930 subprob.x = Malloc(struct svm_node*,subprob.l);
1931 subprob.y = Malloc(double,subprob.l);
1932
1933 k=0;
1934 for(j=0;j<begin;j++)
1935 {
1936 subprob.x[k] = prob->x[perm[j]];
1937 subprob.y[k] = prob->y[perm[j]];
1938 ++k;
1939 }
1940 for(j=end;j<prob->l;j++)
1941 {
1942 subprob.x[k] = prob->x[perm[j]];
1943 subprob.y[k] = prob->y[perm[j]];
1944 ++k;
1945 }
1946 int p_count=0,n_count=0;
1947 for(j=0;j<k;j++)
1948 if(subprob.y[j]>0)
1949 p_count++;
1950 else
1951 n_count++;
1952
1953 if(p_count==0 && n_count==0)
1954 for(j=begin;j<end;j++)
1955 dec_values[perm[j]] = 0;
1956 else if(p_count > 0 && n_count == 0)
1957 for(j=begin;j<end;j++)
1958 dec_values[perm[j]] = 1;
1959 else if(p_count == 0 && n_count > 0)
1960 for(j=begin;j<end;j++)
1961 dec_values[perm[j]] = -1;
1962 else
1963 {
1964 svm_parameter subparam = *param;
1965 subparam.probability=0;
1966 subparam.C=1.0;
1967 subparam.nr_weight=2;
1968 subparam.weight_label = Malloc(int,2);
1969 subparam.weight = Malloc(double,2);
1970 subparam.weight_label[0]=+1;
1971 subparam.weight_label[1]=-1;
1972 subparam.weight[0]=Cp;
1973 subparam.weight[1]=Cn;
1974 struct svm_model *submodel = svm_train(&subprob,&subparam);
1975 for(j=begin;j<end;j++)
1976 {
1977 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
1978 // ensure +1 -1 order; reason not using CV subroutine
1979 dec_values[perm[j]] *= submodel->label[0];
1980 }
1981 svm_free_and_destroy_model(&submodel);
1982 svm_destroy_param(&subparam);
1983 }
1984 free(subprob.x);
1985 free(subprob.y);
1986 }
1987 sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
1988 free(dec_values);
1989 free(perm);
1990}
1991
1992// Return parameter of a Laplace distribution
1993static double svm_svr_probability(
1994 const svm_problem *prob, const svm_parameter *param)
1995{
1996 int i;
1997 int nr_fold = 5;
1998 double *ymv = Malloc(double,prob->l);
1999 double mae = 0;
2000
2001 svm_parameter newparam = *param;
2002 newparam.probability = 0;
2003 svm_cross_validation(prob,&newparam,nr_fold,ymv);
2004 for(i=0;i<prob->l;i++)
2005 {
2006 ymv[i]=prob->y[i]-ymv[i];
2007 mae += fabs(ymv[i]);
2008 }
2009 mae /= prob->l;
2010 double std=sqrt(2*mae*mae);
2011 int count=0;
2012 mae=0;
2013 for(i=0;i<prob->l;i++)
2014 if (fabs(ymv[i]) > 5*std)
2015 count=count+1;
2016 else
2017 mae+=fabs(ymv[i]);
2018 mae /= (prob->l-count);
2019 info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2020 free(ymv);
2021 return mae;
2022}
2023
2024
2025// label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2026// perm, length l, must be allocated before calling this subroutine
2027static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2028{
2029 int l = prob->l;
2030 int max_nr_class = 16;
2031 int nr_class = 0;
2032 int *label = Malloc(int,max_nr_class);
2033 int *count = Malloc(int,max_nr_class);
2034 int *data_label = Malloc(int,l);
2035 int i;
2036
2037 for(i=0;i<l;i++)
2038 {
2039 int this_label = (int)prob->y[i];
2040 int j;
2041 for(j=0;j<nr_class;j++)
2042 {
2043 if(this_label == label[j])
2044 {
2045 ++count[j];
2046 break;
2047 }
2048 }
2049 data_label[i] = j;
2050 if(j == nr_class)
2051 {
2052 if(nr_class == max_nr_class)
2053 {
2054 max_nr_class *= 2;
2055 label = (int *)realloc(label,max_nr_class*sizeof(int));
2056 count = (int *)realloc(count,max_nr_class*sizeof(int));
2057 }
2058 label[nr_class] = this_label;
2059 count[nr_class] = 1;
2060 ++nr_class;
2061 }
2062 }
2063
2064 int *start = Malloc(int,nr_class);
2065 start[0] = 0;
2066 for(i=1;i<nr_class;i++)
2067 start[i] = start[i-1]+count[i-1];
2068 for(i=0;i<l;i++)
2069 {
2070 perm[start[data_label[i]]] = i;
2071 ++start[data_label[i]];
2072 }
2073 start[0] = 0;
2074 for(i=1;i<nr_class;i++)
2075 start[i] = start[i-1]+count[i-1];
2076
2077 *nr_class_ret = nr_class;
2078 *label_ret = label;
2079 *start_ret = start;
2080 *count_ret = count;
2081 free(data_label);
2082}
2083
2084//
2085// Interface functions
2086//
2087svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2088{
2089 svm_model *model = Malloc(svm_model,1);
2090 model->param = *param;
2091 model->free_sv = 0; // XXX
2092
2093 if(param->svm_type == ONE_CLASS ||
2094 param->svm_type == EPSILON_SVR ||
2095 param->svm_type == NU_SVR)
2096 {
2097 // regression or one-class-svm
2098 model->nr_class = 2;
2099 model->label = NULL;
2100 model->nSV = NULL;
2101 model->probA = NULL; model->probB = NULL;
2102 model->sv_coef = Malloc(double *,1);
2103
2104 if(param->probability &&
2105 (param->svm_type == EPSILON_SVR ||
2106 param->svm_type == NU_SVR))
2107 {
2108 model->probA = Malloc(double,1);
2109 model->probA[0] = svm_svr_probability(prob,param);
2110 }
2111
2112 decision_function f = svm_train_one(prob,param,0,0);
2113 model->rho = Malloc(double,1);
2114 model->rho[0] = f.rho;
2115
2116 int nSV = 0;
2117 int i;
2118 for(i=0;i<prob->l;i++)
2119 if(fabs(f.alpha[i]) > 0) ++nSV;
2120 model->l = nSV;
2121 model->SV = Malloc(svm_node *,nSV);
2122 model->sv_coef[0] = Malloc(double,nSV);
2123 int j = 0;
2124 for(i=0;i<prob->l;i++)
2125 if(fabs(f.alpha[i]) > 0)
2126 {
2127 model->SV[j] = prob->x[i];
2128 model->sv_coef[0][j] = f.alpha[i];
2129 ++j;
2130 }
2131
2132 free(f.alpha);
2133 }
2134 else
2135 {
2136 // classification
2137 int l = prob->l;
2138 int nr_class;
2139 int *label = NULL;
2140 int *start = NULL;
2141 int *count = NULL;
2142 int *perm = Malloc(int,l);
2143
2144 // group training data of the same class
2145 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2146 if(nr_class == 1)
2147 info("WARNING: training data in only one class. See README for details.\n");
2148
2149 svm_node **x = Malloc(svm_node *,l);
2150 int i;
2151 for(i=0;i<l;i++)
2152 x[i] = prob->x[perm[i]];
2153
2154 // calculate weighted C
2155
2156 double *weighted_C = Malloc(double, nr_class);
2157 for(i=0;i<nr_class;i++)
2158 weighted_C[i] = param->C;
2159 for(i=0;i<param->nr_weight;i++)
2160 {
2161 int j;
2162 for(j=0;j<nr_class;j++)
2163 if(param->weight_label[i] == label[j])
2164 break;
2165 if(j == nr_class)
2166 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2167 else
2168 weighted_C[j] *= param->weight[i];
2169 }
2170
2171 // train k*(k-1)/2 models
2172
2173 bool *nonzero = Malloc(bool,l);
2174 for(i=0;i<l;i++)
2175 nonzero[i] = false;
2176 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2177
2178 double *probA=NULL,*probB=NULL;
2179 if (param->probability)
2180 {
2181 probA=Malloc(double,nr_class*(nr_class-1)/2);
2182 probB=Malloc(double,nr_class*(nr_class-1)/2);
2183 }
2184
2185 int p = 0;
2186 for(i=0;i<nr_class;i++)
2187 for(int j=i+1;j<nr_class;j++)
2188 {
2189 svm_problem sub_prob;
2190 int si = start[i], sj = start[j];
2191 int ci = count[i], cj = count[j];
2192 sub_prob.l = ci+cj;
2193 sub_prob.x = Malloc(svm_node *,sub_prob.l);
2194 sub_prob.y = Malloc(double,sub_prob.l);
2195 int k;
2196 for(k=0;k<ci;k++)
2197 {
2198 sub_prob.x[k] = x[si+k];
2199 sub_prob.y[k] = +1;
2200 }
2201 for(k=0;k<cj;k++)
2202 {
2203 sub_prob.x[ci+k] = x[sj+k];
2204 sub_prob.y[ci+k] = -1;
2205 }
2206
2207 if(param->probability)
2208 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2209
2210 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2211 for(k=0;k<ci;k++)
2212 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2213 nonzero[si+k] = true;
2214 for(k=0;k<cj;k++)
2215 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2216 nonzero[sj+k] = true;
2217 free(sub_prob.x);
2218 free(sub_prob.y);
2219 ++p;
2220 }
2221
2222 // build output
2223
2224 model->nr_class = nr_class;
2225
2226 model->label = Malloc(int,nr_class);
2227 for(i=0;i<nr_class;i++)
2228 model->label[i] = label[i];
2229
2230 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2231 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2232 model->rho[i] = f[i].rho;
2233
2234 if(param->probability)
2235 {
2236 model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2237 model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2238 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2239 {
2240 model->probA[i] = probA[i];
2241 model->probB[i] = probB[i];
2242 }
2243 }
2244 else
2245 {
2246 model->probA=NULL;
2247 model->probB=NULL;
2248 }
2249
2250 int total_sv = 0;
2251 int *nz_count = Malloc(int,nr_class);
2252 model->nSV = Malloc(int,nr_class);
2253 for(i=0;i<nr_class;i++)
2254 {
2255 int nSV = 0;
2256 for(int j=0;j<count[i];j++)
2257 if(nonzero[start[i]+j])
2258 {
2259 ++nSV;
2260 ++total_sv;
2261 }
2262 model->nSV[i] = nSV;
2263 nz_count[i] = nSV;
2264 }
2265
2266 if(param->verbose)//pk
2267 info("Total nSV = %d\n",total_sv);
2268
2269 model->l = total_sv;
2270 model->SV = Malloc(svm_node *,total_sv);
2271 p = 0;
2272 for(i=0;i<l;i++)
2273 if(nonzero[i]) model->SV[p++] = x[i];
2274
2275 int *nz_start = Malloc(int,nr_class);
2276 nz_start[0] = 0;
2277 for(i=1;i<nr_class;i++)
2278 nz_start[i] = nz_start[i-1]+nz_count[i-1];
2279
2280 model->sv_coef = Malloc(double *,nr_class-1);
2281 for(i=0;i<nr_class-1;i++)
2282 model->sv_coef[i] = Malloc(double,total_sv);
2283
2284 p = 0;
2285 for(i=0;i<nr_class;i++)
2286 for(int j=i+1;j<nr_class;j++)
2287 {
2288 // classifier (i,j): coefficients with
2289 // i are in sv_coef[j-1][nz_start[i]...],
2290 // j are in sv_coef[i][nz_start[j]...]
2291
2292 int si = start[i];
2293 int sj = start[j];
2294 int ci = count[i];
2295 int cj = count[j];
2296
2297 int q = nz_start[i];
2298 int k;
2299 for(k=0;k<ci;k++)
2300 if(nonzero[si+k])
2301 model->sv_coef[j-1][q++] = f[p].alpha[k];
2302 q = nz_start[j];
2303 for(k=0;k<cj;k++)
2304 if(nonzero[sj+k])
2305 model->sv_coef[i][q++] = f[p].alpha[ci+k];
2306 ++p;
2307 }
2308
2309 free(label);
2310 free(probA);
2311 free(probB);
2312 free(count);
2313 free(perm);
2314 free(start);
2315 free(x);
2316 free(weighted_C);
2317 free(nonzero);
2318 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2319 free(f[i].alpha);
2320 free(f);
2321 free(nz_count);
2322 free(nz_start);
2323 }
2324 return model;
2325}
2326
2327// Stratified cross validation
2328void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2329{
2330 int i;
2331 int *fold_start = Malloc(int,nr_fold+1);
2332 int l = prob->l;
2333 int *perm = Malloc(int,l);
2334 int nr_class;
2335
2336 // stratified cv may not give leave-one-out rate
2337 // Each class to l folds -> some folds may have zero elements
2338 if((param->svm_type == C_SVC ||
2339 param->svm_type == NU_SVC) && nr_fold < l)
2340 {
2341 int *start = NULL;
2342 int *label = NULL;
2343 int *count = NULL;
2344 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2345
2346 // random shuffle and then data grouped by fold using the array perm
2347 int *fold_count = Malloc(int,nr_fold);
2348 int c;
2349 int *index = Malloc(int,l);
2350 for(i=0;i<l;i++)
2351 index[i]=perm[i];
2352 for (c=0; c<nr_class; c++)
2353 for(i=0;i<count[c];i++)
2354 {
2355 int j = i+rand()%(count[c]-i);
2356 swap(index[start[c]+j],index[start[c]+i]);
2357 }
2358 for(i=0;i<nr_fold;i++)
2359 {
2360 fold_count[i] = 0;
2361 for (c=0; c<nr_class;c++)
2362 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2363 }
2364 fold_start[0]=0;
2365 for (i=1;i<=nr_fold;i++)
2366 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2367 for (c=0; c<nr_class;c++)
2368 for(i=0;i<nr_fold;i++)
2369 {
2370 int begin = start[c]+i*count[c]/nr_fold;
2371 int end = start[c]+(i+1)*count[c]/nr_fold;
2372 for(int j=begin;j<end;j++)
2373 {
2374 perm[fold_start[i]] = index[j];
2375 fold_start[i]++;
2376 }
2377 }
2378 fold_start[0]=0;
2379 for (i=1;i<=nr_fold;i++)
2380 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2381 free(start);
2382 free(label);
2383 free(count);
2384 free(index);
2385 free(fold_count);
2386 }
2387 else
2388 {
2389 for(i=0;i<l;i++) perm[i]=i;
2390 for(i=0;i<l;i++)
2391 {
2392 int j = i+rand()%(l-i);
2393 swap(perm[i],perm[j]);
2394 }
2395 for(i=0;i<=nr_fold;i++)
2396 fold_start[i]=i*l/nr_fold;
2397 }
2398
2399 for(i=0;i<nr_fold;i++)
2400 {
2401 int begin = fold_start[i];
2402 int end = fold_start[i+1];
2403 int j,k;
2404 struct svm_problem subprob;
2405
2406 subprob.l = l-(end-begin);
2407 subprob.x = Malloc(struct svm_node*,subprob.l);
2408 subprob.y = Malloc(double,subprob.l);
2409
2410 k=0;
2411 for(j=0;j<begin;j++)
2412 {
2413 subprob.x[k] = prob->x[perm[j]];
2414 subprob.y[k] = prob->y[perm[j]];
2415 ++k;
2416 }
2417 for(j=end;j<l;j++)
2418 {
2419 subprob.x[k] = prob->x[perm[j]];
2420 subprob.y[k] = prob->y[perm[j]];
2421 ++k;
2422 }
2423 struct svm_model *submodel = svm_train(&subprob,param);
2424 if(param->probability &&
2425 (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2426 {
2427 double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2428 for(j=begin;j<end;j++)
2429 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2430 free(prob_estimates);
2431 }
2432 else
2433 for(j=begin;j<end;j++)
2434 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2435 svm_free_and_destroy_model(&submodel);
2436 free(subprob.x);
2437 free(subprob.y);
2438 }
2439 free(fold_start);
2440 free(perm);
2441}
2442
2443
2444int svm_get_svm_type(const svm_model *model)
2445{
2446 return model->param.svm_type;
2447}
2448
2449int svm_get_nr_class(const svm_model *model)
2450{
2451 return model->nr_class;
2452}
2453
2454void svm_get_labels(const svm_model *model, int* label)
2455{
2456 if (model->label != NULL)
2457 for(int i=0;i<model->nr_class;i++)
2458 label[i] = model->label[i];
2459}
2460
2461double svm_get_svr_probability(const svm_model *model)
2462{
2463 if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2464 model->probA!=NULL)
2465 return model->probA[0];
2466 else
2467 {
2468 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2469 return 0;
2470 }
2471}
2472
2473double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2474{
2475 int i;
2476 if(model->param.svm_type == ONE_CLASS ||
2477 model->param.svm_type == EPSILON_SVR ||
2478 model->param.svm_type == NU_SVR)
2479 {
2480 double *sv_coef = model->sv_coef[0];
2481 double sum = 0;
2482 for(i=0;i<model->l;i++)
2483 sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2484 sum -= model->rho[0];
2485 *dec_values = sum;
2486
2487 if(model->param.svm_type == ONE_CLASS)
2488 return (sum>0)?1:-1;
2489 else
2490 return sum;
2491 }
2492 else
2493 {
2494 int nr_class = model->nr_class;
2495 int l = model->l;
2496
2497 double *kvalue = Malloc(double,l);
2498 for(i=0;i<l;i++)
2499 kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2500
2501 int *start = Malloc(int,nr_class);
2502 start[0] = 0;
2503 for(i=1;i<nr_class;i++)
2504 start[i] = start[i-1]+model->nSV[i-1];
2505
2506 int *vote = Malloc(int,nr_class);
2507 for(i=0;i<nr_class;i++)
2508 vote[i] = 0;
2509
2510 int p=0;
2511 for(i=0;i<nr_class;i++)
2512 for(int j=i+1;j<nr_class;j++)
2513 {
2514 double sum = 0;
2515 int si = start[i];
2516 int sj = start[j];
2517 int ci = model->nSV[i];
2518 int cj = model->nSV[j];
2519
2520 int k;
2521 double *coef1 = model->sv_coef[j-1];
2522 double *coef2 = model->sv_coef[i];
2523 for(k=0;k<ci;k++)
2524 sum += coef1[si+k] * kvalue[si+k];
2525 for(k=0;k<cj;k++)
2526 sum += coef2[sj+k] * kvalue[sj+k];
2527 sum -= model->rho[p];
2528 dec_values[p] = sum;
2529
2530 if(dec_values[p] > 0)
2531 ++vote[i];
2532 else
2533 ++vote[j];
2534 p++;
2535 }
2536
2537 int vote_max_idx = 0;
2538 for(i=1;i<nr_class;i++)
2539 if(vote[i] > vote[vote_max_idx])
2540 vote_max_idx = i;
2541
2542 free(kvalue);
2543 free(start);
2544 free(vote);
2545 return model->label[vote_max_idx];
2546 }
2547}
2548
2549double svm_predict(const svm_model *model, const svm_node *x)
2550{
2551 int nr_class = model->nr_class;
2552 double *dec_values;
2553 if(model->param.svm_type == ONE_CLASS ||
2554 model->param.svm_type == EPSILON_SVR ||
2555 model->param.svm_type == NU_SVR)
2556 dec_values = Malloc(double, 1);
2557 else
2558 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2559 double pred_result = svm_predict_values(model, x, dec_values);
2560 free(dec_values);
2561 return pred_result;
2562}
2563
2564double svm_predict_probability(
2565 const svm_model *model, const svm_node *x, double *prob_estimates)
2566{
2567 if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2568 model->probA!=NULL && model->probB!=NULL)
2569 {
2570 int i;
2571 int nr_class = model->nr_class;
2572 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2573 svm_predict_values(model, x, dec_values);
2574
2575 double min_prob=1e-7;
2576 double **pairwise_prob=Malloc(double *,nr_class);
2577 for(i=0;i<nr_class;i++)
2578 pairwise_prob[i]=Malloc(double,nr_class);
2579 int k=0;
2580 for(i=0;i<nr_class;i++)
2581 for(int j=i+1;j<nr_class;j++)
2582 {
2583 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2584 pairwise_prob[j][i]=1-pairwise_prob[i][j];
2585 k++;
2586 }
2587 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2588
2589 int prob_max_idx = 0;
2590 for(i=1;i<nr_class;i++)
2591 if(prob_estimates[i] > prob_estimates[prob_max_idx])
2592 prob_max_idx = i;
2593 for(i=0;i<nr_class;i++)
2594 free(pairwise_prob[i]);
2595 free(dec_values);
2596 free(pairwise_prob);
2597 return model->label[prob_max_idx];
2598 }
2599 else
2600 return svm_predict(model, x);
2601}
2602
2603static const char *svm_type_table[] =
2604{
2605 "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2606};
2607
2608static const char *kernel_type_table[]=
2609{
2610 "linear","polynomial","rbf","sigmoid","precomputed",NULL
2611};
2612
2613int svm_save_model(const char *model_file_name, const svm_model *model)
2614{
2615 FILE *fp = fopen(model_file_name,"w");
2616 if(fp==NULL) return -1;
2617
2618 char *old_locale = strdup(setlocale(LC_ALL, NULL));
2619 setlocale(LC_ALL, "C");
2620
2621 const svm_parameter& param = model->param;
2622
2623 fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2624 fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2625
2626 if(param.kernel_type == POLY)
2627 fprintf(fp,"degree %d\n", param.degree);
2628
2629 if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2630 fprintf(fp,"gamma %g\n", param.gamma);
2631
2632 if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2633 fprintf(fp,"coef0 %g\n", param.coef0);
2634
2635 int nr_class = model->nr_class;
2636 int l = model->l;
2637 fprintf(fp, "nr_class %d\n", nr_class);
2638 fprintf(fp, "total_sv %d\n",l);
2639
2640 {
2641 fprintf(fp, "rho");
2642 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2643 fprintf(fp," %g",model->rho[i]);
2644 fprintf(fp, "\n");
2645 }
2646
2647 if(model->label)
2648 {
2649 fprintf(fp, "label");
2650 for(int i=0;i<nr_class;i++)
2651 fprintf(fp," %d",model->label[i]);
2652 fprintf(fp, "\n");
2653 }
2654
2655 if(model->probA) // regression has probA only
2656 {
2657 fprintf(fp, "probA");
2658 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2659 fprintf(fp," %g",model->probA[i]);
2660 fprintf(fp, "\n");
2661 }
2662 if(model->probB)
2663 {
2664 fprintf(fp, "probB");
2665 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2666 fprintf(fp," %g",model->probB[i]);
2667 fprintf(fp, "\n");
2668 }
2669
2670 if(model->nSV)
2671 {
2672 fprintf(fp, "nr_sv");
2673 for(int i=0;i<nr_class;i++)
2674 fprintf(fp," %d",model->nSV[i]);
2675 fprintf(fp, "\n");
2676 }
2677
2678 fprintf(fp, "SV\n");
2679 const double * const *sv_coef = model->sv_coef;
2680 const svm_node * const *SV = model->SV;
2681
2682 for(int i=0;i<l;i++)
2683 {
2684 for(int j=0;j<nr_class-1;j++)
2685 fprintf(fp, "%.16g ",sv_coef[j][i]);
2686
2687 const svm_node *p = SV[i];
2688
2689 if(param.kernel_type == PRECOMPUTED)
2690 fprintf(fp,"0:%d ",(int)(p->value));
2691 else
2692 while(p->index != -1)
2693 {
2694 fprintf(fp,"%d:%.8g ",p->index,p->value);
2695 p++;
2696 }
2697 fprintf(fp, "\n");
2698 }
2699
2700 setlocale(LC_ALL, old_locale);
2701 free(old_locale);
2702
2703 if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2704 else return 0;
2705}
2706
2707static char *line = NULL;
2708static int max_line_len;
2709
2710static char* readline(FILE *input)
2711{
2712 int len;
2713
2714 if(fgets(line,max_line_len,input) == NULL)
2715 return NULL;
2716
2717 while(strrchr(line,'\n') == NULL)
2718 {
2719 max_line_len *= 2;
2720 line = (char *) realloc(line,max_line_len);
2721 len = (int) strlen(line);
2722 if(fgets(line+len,max_line_len-len,input) == NULL)
2723 break;
2724 }
2725 return line;
2726}
2727
2728svm_model *svm_load_model(const char *model_file_name)
2729{
2730 FILE *fp = fopen(model_file_name,"rb");
2731 if(fp==NULL) return NULL;
2732
2733 char *old_locale = strdup(setlocale(LC_ALL, NULL));
2734 setlocale(LC_ALL, "C");
2735
2736 // read parameters
2737
2738 svm_model *model = Malloc(svm_model,1);
2739 svm_parameter& param = model->param;
2740 model->rho = NULL;
2741 model->probA = NULL;
2742 model->probB = NULL;
2743 model->label = NULL;
2744 model->nSV = NULL;
2745
2746 char cmd[81];
2747 while(1)
2748 {
2749 fscanf(fp,"%80s",cmd);
2750
2751 if(strcmp(cmd,"svm_type")==0)
2752 {
2753 fscanf(fp,"%80s",cmd);
2754 int i;
2755 for(i=0;svm_type_table[i];i++)
2756 {
2757 if(strcmp(svm_type_table[i],cmd)==0)
2758 {
2759 param.svm_type=i;
2760 break;
2761 }
2762 }
2763 if(svm_type_table[i] == NULL)
2764 {
2765 fprintf(stderr,"unknown svm type.\n");
2766
2767 setlocale(LC_ALL, old_locale);
2768 free(old_locale);
2769 free(model->rho);
2770 free(model->label);
2771 free(model->nSV);
2772 free(model);
2773 return NULL;
2774 }
2775 }
2776 else if(strcmp(cmd,"kernel_type")==0)
2777 {
2778 fscanf(fp,"%80s",cmd);
2779 int i;
2780 for(i=0;kernel_type_table[i];i++)
2781 {
2782 if(strcmp(kernel_type_table[i],cmd)==0)
2783 {
2784 param.kernel_type=i;
2785 break;
2786 }
2787 }
2788 if(kernel_type_table[i] == NULL)
2789 {
2790 fprintf(stderr,"unknown kernel function.\n");
2791
2792 setlocale(LC_ALL, old_locale);
2793 free(old_locale);
2794 free(model->rho);
2795 free(model->label);
2796 free(model->nSV);
2797 free(model);
2798 return NULL;
2799 }
2800 }
2801 else if(strcmp(cmd,"degree")==0)
2802 fscanf(fp,"%d",&param.degree);
2803 else if(strcmp(cmd,"gamma")==0)
2804 fscanf(fp,"%lf",&param.gamma);
2805 else if(strcmp(cmd,"coef0")==0)
2806 fscanf(fp,"%lf",&param.coef0);
2807 else if(strcmp(cmd,"nr_class")==0)
2808 fscanf(fp,"%d",&model->nr_class);
2809 else if(strcmp(cmd,"total_sv")==0)
2810 fscanf(fp,"%d",&model->l);
2811 else if(strcmp(cmd,"rho")==0)
2812 {
2813 int n = model->nr_class * (model->nr_class-1)/2;
2814 model->rho = Malloc(double,n);
2815 for(int i=0;i<n;i++)
2816 fscanf(fp,"%lf",&model->rho[i]);
2817 }
2818 else if(strcmp(cmd,"label")==0)
2819 {
2820 int n = model->nr_class;
2821 model->label = Malloc(int,n);
2822 for(int i=0;i<n;i++)
2823 fscanf(fp,"%d",&model->label[i]);
2824 }
2825 else if(strcmp(cmd,"probA")==0)
2826 {
2827 int n = model->nr_class * (model->nr_class-1)/2;
2828 model->probA = Malloc(double,n);
2829 for(int i=0;i<n;i++)
2830 fscanf(fp,"%lf",&model->probA[i]);
2831 }
2832 else if(strcmp(cmd,"probB")==0)
2833 {
2834 int n = model->nr_class * (model->nr_class-1)/2;
2835 model->probB = Malloc(double,n);
2836 for(int i=0;i<n;i++)
2837 fscanf(fp,"%lf",&model->probB[i]);
2838 }
2839 else if(strcmp(cmd,"nr_sv")==0)
2840 {
2841 int n = model->nr_class;
2842 model->nSV = Malloc(int,n);
2843 for(int i=0;i<n;i++)
2844 fscanf(fp,"%d",&model->nSV[i]);
2845 }
2846 else if(strcmp(cmd,"SV")==0)
2847 {
2848 while(1)
2849 {
2850 int c = getc(fp);
2851 if(c==EOF || c=='\n') break;
2852 }
2853 break;
2854 }
2855 else
2856 {
2857 fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2858
2859 setlocale(LC_ALL, old_locale);
2860 free(old_locale);
2861 free(model->rho);
2862 free(model->label);
2863 free(model->nSV);
2864 free(model);
2865 return NULL;
2866 }
2867 }
2868
2869 // read sv_coef and SV
2870
2871 int elements = 0;
2872 long pos = ftell(fp);
2873
2874 max_line_len = 1024;
2875 line = Malloc(char,max_line_len);
2876 char *p,*endptr,*idx,*val;
2877
2878 while(readline(fp)!=NULL)
2879 {
2880 p = strtok(line,":");
2881 while(1)
2882 {
2883 p = strtok(NULL,":");
2884 if(p == NULL)
2885 break;
2886 ++elements;
2887 }
2888 }
2889 elements += model->l;
2890
2891 fseek(fp,pos,SEEK_SET);
2892
2893 int m = model->nr_class - 1;
2894 int l = model->l;
2895 model->sv_coef = Malloc(double *,m);
2896 int i;
2897 for(i=0;i<m;i++)
2898 model->sv_coef[i] = Malloc(double,l);
2899 model->SV = Malloc(svm_node*,l);
2900 svm_node *x_space = NULL;
2901 if(l>0) x_space = Malloc(svm_node,elements);
2902
2903 int j=0;
2904 for(i=0;i<l;i++)
2905 {
2906 readline(fp);
2907 model->SV[i] = &x_space[j];
2908
2909 p = strtok(line, " \t");
2910 model->sv_coef[0][i] = strtod(p,&endptr);
2911 for(int k=1;k<m;k++)
2912 {
2913 p = strtok(NULL, " \t");
2914 model->sv_coef[k][i] = strtod(p,&endptr);
2915 }
2916
2917 while(1)
2918 {
2919 idx = strtok(NULL, ":");
2920 val = strtok(NULL, " \t");
2921
2922 if(val == NULL)
2923 break;
2924 x_space[j].index = (int) strtol(idx,&endptr,10);
2925 x_space[j].value = strtod(val,&endptr);
2926
2927 ++j;
2928 }
2929 x_space[j++].index = -1;
2930 }
2931 free(line);
2932
2933 setlocale(LC_ALL, old_locale);
2934 free(old_locale);
2935
2936 if (ferror(fp) != 0 || fclose(fp) != 0)
2937 return NULL;
2938
2939 model->free_sv = 1; // XXX
2940 return model;
2941}
2942
2943void svm_free_model_content(svm_model* model_ptr)
2944{
2945 if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
2946 free((void *)(model_ptr->SV[0]));
2947 if(model_ptr->sv_coef)
2948 {
2949 for(int i=0;i<model_ptr->nr_class-1;i++)
2950 free(model_ptr->sv_coef[i]);
2951 }
2952
2953 free(model_ptr->SV);
2954 model_ptr->SV = NULL;
2955
2956 free(model_ptr->sv_coef);
2957 model_ptr->sv_coef = NULL;
2958
2959 free(model_ptr->rho);
2960 model_ptr->rho = NULL;
2961
2962 free(model_ptr->label);
2963 model_ptr->label= NULL;
2964
2965 free(model_ptr->probA);
2966 model_ptr->probA = NULL;
2967
2968 free(model_ptr->probB);
2969 model_ptr->probB= NULL;
2970
2971 free(model_ptr->nSV);
2972 model_ptr->nSV = NULL;
2973}
2974
2975void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
2976{
2977 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
2978 {
2979 svm_free_model_content(*model_ptr_ptr);
2980 free(*model_ptr_ptr);
2981 *model_ptr_ptr = NULL;
2982 }
2983}
2984
2985void svm_destroy_param(svm_parameter* param)
2986{
2987 free(param->weight_label);
2988 free(param->weight);
2989}
2990
2991const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
2992{
2993 // svm_type
2994
2995 int svm_type = param->svm_type;
2996 if(svm_type != C_SVC &&
2997 svm_type != NU_SVC &&
2998 svm_type != ONE_CLASS &&
2999 svm_type != EPSILON_SVR &&
3000 svm_type != NU_SVR)
3001 return "unknown svm type";
3002
3003 // kernel_type, degree
3004
3005 int kernel_type = param->kernel_type;
3006 if(kernel_type != LINEAR &&
3007 kernel_type != POLY &&
3008 kernel_type != RBF &&
3009 kernel_type != SIGMOID &&
3010 kernel_type != PRECOMPUTED)
3011 return "unknown kernel type";
3012
3013 if(param->gamma < 0)
3014 return "gamma < 0";
3015
3016 if(param->degree < 0)
3017 return "degree of polynomial kernel < 0";
3018
3019 // cache_size,eps,C,nu,p,shrinking
3020
3021 if(param->cache_size <= 0)
3022 return "cache_size <= 0";
3023
3024 if(param->eps <= 0)
3025 return "eps <= 0";
3026
3027 if(svm_type == C_SVC ||
3028 svm_type == EPSILON_SVR ||
3029 svm_type == NU_SVR)
3030 if(param->C <= 0)
3031 return "C <= 0";
3032
3033 if(svm_type == NU_SVC ||
3034 svm_type == ONE_CLASS ||
3035 svm_type == NU_SVR)
3036 if(param->nu <= 0 || param->nu > 1)
3037 return "nu <= 0 or nu > 1";
3038
3039 if(svm_type == EPSILON_SVR)
3040 if(param->p < 0)
3041 return "p < 0";
3042
3043 if(param->shrinking != 0 &&
3044 param->shrinking != 1)
3045 return "shrinking != 0 and shrinking != 1";
3046
3047 if(param->probability != 0 &&
3048 param->probability != 1)
3049 return "probability != 0 and probability != 1";
3050
3051 if(param->probability == 1 &&
3052 svm_type == ONE_CLASS)
3053 return "one-class SVM probability output not supported yet";
3054
3055
3056 // check whether nu-svc is feasible
3057
3058 if(svm_type == NU_SVC)
3059 {
3060 int l = prob->l;
3061 int max_nr_class = 16;
3062 int nr_class = 0;
3063 int *label = Malloc(int,max_nr_class);
3064 int *count = Malloc(int,max_nr_class);
3065
3066 int i;
3067 for(i=0;i<l;i++)
3068 {
3069 int this_label = (int)prob->y[i];
3070 int j;
3071 for(j=0;j<nr_class;j++)
3072 if(this_label == label[j])
3073 {
3074 ++count[j];
3075 break;
3076 }
3077 if(j == nr_class)
3078 {
3079 if(nr_class == max_nr_class)
3080 {
3081 max_nr_class *= 2;
3082 label = (int *)realloc(label,max_nr_class*sizeof(int));
3083 count = (int *)realloc(count,max_nr_class*sizeof(int));
3084 }
3085 label[nr_class] = this_label;
3086 count[nr_class] = 1;
3087 ++nr_class;
3088 }
3089 }
3090
3091 for(i=0;i<nr_class;i++)
3092 {
3093 int n1 = count[i];
3094 for(int j=i+1;j<nr_class;j++)
3095 {
3096 int n2 = count[j];
3097 if(param->nu*(n1+n2)/2 > min(n1,n2))
3098 {
3099 free(label);
3100 free(count);
3101 return "specified nu is infeasible";
3102 }
3103 }
3104 }
3105 free(label);
3106 free(count);
3107 }
3108
3109 return NULL;
3110}
3111
3112int svm_check_probability_model(const svm_model *model)
3113{
3114 return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3115 model->probA!=NULL && model->probB!=NULL) ||
3116 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3117 model->probA!=NULL);
3118}
3119
3120void svm_set_print_string_function(void (*print_func)(const char *))
3121{
3122 if(print_func == NULL)
3123 svm_print_string = &print_string_stdout;
3124 else
3125 svm_print_string = print_func;
3126}
Definition: svm.cpp:70
Definition: svm.cpp:204
Definition: svm.cpp:196
Definition: svm.cpp:1274
Definition: svm.cpp:1370
Definition: svm.cpp:395
Definition: svm.h:54
Definition: svm.h:13