001    /*
002    NGramJ - n-gram based text classification
003    Copyright (C) 2001- Frank S. Nestel (frank at spieleck.de)
004    
005    This program is free software; you can redistribute it and/or modify
006    it under the terms of the GNU Lesser General Public License as published 
007    by the Free Software Foundation; either version 2.1 of the License, or
008    (at your option) any later version.
009    
010    This program is distributed in the hope that it will be useful,
011    but WITHOUT ANY WARRANTY; without even the implied warranty of
012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013    GNU General Public License for more details.
014    
015    You should have received a copy of the GNU Lesser General Public License
016    along with this program (lesser.txt); if not, write to the Free Software
017    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
018    */
019    package de.spieleck.app.cngram;
020    
021    import java.util.Iterator;
022    
023    /**
024     * Cosine Metric
025     * This is nicely valued between zero and one
026     * @author frank nestel
027     * @author $Author: nestefan $
028     * @version $Revision: 2 $ $Date: 2006-03-27 23:00:21 +0200 (Mo, 27 Mrz 2006) $ $Author: nestefan $
029     */
030    public class CosMetric
031      implements NGramMetric
032    {
033      public double diff(NGramProfile p1, NGramProfile p2)
034      {
035        double sum = 0.0;
036        double s1  = 0.0;
037        double s2  = 0.0;
038    
039        // Treat all NGrams contained in p1;
040        Iterator i = p1.getSorted();
041        while (i.hasNext())
042        {
043          NGram ng1 = (NGram) i.next();
044          NGram ng2 = p2.get(ng1);
045          double c1 = ng1.getCount();
046          s1  += c1 * c1;
047          if ( ng2 != null )
048          {
049            double c2 = ng2.getCount();
050            sum += c1 * c2;
051            s2  += c2 * c2;
052          }
053        }
054        // Treat NGrams contained ONLY in p2
055        i = p2.getSorted();
056        while (i.hasNext())
057        {
058          NGram ng2 = (NGram) i.next();
059          if ( p1.get(ng2) == null )
060          {
061            double c2 = ng2.getCount();
062            s2  += c2 * c2;
063          }
064        }
065        return 1.0 - sum / Math.sqrt(s1*s2);
066      }
067    }