001    /*
002    NGramJ - n-gram based text classification
003    Copyright (C) 2001- Frank S. Nestel (frank at spieleck.de)
004    
005    This program is free software; you can redistribute it and/or modify
006    it under the terms of the GNU Lesser General Public License as published 
007    by the Free Software Foundation; either version 2.1 of the License, or
008    (at your option) any later version.
009    
010    This program is distributed in the hope that it will be useful,
011    but WITHOUT ANY WARRANTY; without even the implied warranty of
012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013    GNU General Public License for more details.
014    
015    You should have received a copy of the GNU Lesser General Public License
016    along with this program (lesser.txt); if not, write to the Free Software
017    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
018    */
019    package de.spieleck.app.cngram;
020    
021    import java.util.Iterator;
022    
023    /**
024     * Squared raw metric for distance between profiles.
025     * @author frank nestel
026     * @author $Author: nestefan $
027     * @version $Revision: 2 $ $Date: 2006-03-27 23:00:21 +0200 (Mo, 27 Mrz 2006) $ $Author: nestefan $
028     */
029    public class SqMetric
030      implements NGramMetric
031    {
032      public double diff(NGramProfile p1, NGramProfile p2)
033      {
034        double sum = 0.0;
035        int norm1 = p1.getNormalization();
036        int norm2 = p2.getNormalization();
037    
038        // Treat all NGrams contained in p1;
039        Iterator i = p1.getSorted();
040        while (i.hasNext())
041        {
042          NGram ng1 = (NGram) i.next();
043          NGram ng2 = p2.get(ng1);
044          double c1 = (double) ng1.getCount() / norm1;
045          if ( ng2 != null )
046          {
047            double c2 = (double) ng2.getCount() / norm2;
048            sum += (c1 - c2)*(c1-c2);
049          } else {
050            sum += c1*c1;
051          }
052        }
053    
054        // Treat NGrams contained ONLY in p2
055        i = p2.getSorted();
056        while (i.hasNext())
057        {
058          NGram ng2 = (NGram) i.next();
059          if ( p1.get(ng2) == null )
060          {
061            double c2 = (double) ng2.getCount() / norm2;
062            sum += c2*c2;
063          }
064        }
065    
066        return Math.sqrt(sum);
067      }
068    }