001    /*
002    NGramJ - n-gram based text classification
003    Copyright (C) 2001- Frank S. Nestel (frank at spieleck.de)
004    
005    This program is free software; you can redistribute it and/or modify
006    it under the terms of the GNU Lesser General Public License as published 
007    by the Free Software Foundation; either version 2.1 of the License, or
008    (at your option) any later version.
009    
010    This program is distributed in the hope that it will be useful,
011    but WITHOUT ANY WARRANTY; without even the implied warranty of
012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013    GNU General Public License for more details.
014    
015    You should have received a copy of the GNU Lesser General Public License
016    along with this program (lesser.txt); if not, write to the Free Software
017    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
018    */
019    package de.spieleck.app.cngram;
020    
021    import java.util.Iterator;
022    
023    /**
024     * Raw (Delta-count) based difference between profiles.
025     * Calculate a score how well NGramProfiles match each other.
026     *
027     * @author frank nestel
028     * @author $Author: nestefan $
029     * @version $Revision: 2 $ $Date: 2006-03-27 23:00:21 +0200 (Mo, 27 Mrz 2006) $ $Author: nestefan $
030     */
031    public class RawMetric
032      implements NGramMetric
033    {
034      public double diff(NGramProfile p1, NGramProfile p2)
035      {
036        double sum = 0.0;
037        int norm1 = p1.getNormalization();
038        int norm2 = p2.getNormalization();
039    // System.out.println(". "+norm1+" "+norm2);
040    
041        // Treat all NGrams contained in p1;
042        Iterator i = p1.getSorted();
043        while (i.hasNext())
044        {
045          NGram ng1 = (NGram) i.next();
046          NGram ng2 = p2.get(ng1);
047          double c1 = (double) ng1.getCount() / norm1;
048          if ( ng2 != null )
049          {
050    // System.out.println("-"+ng1+" "+ng1.getCount()+" "+ng2.getCount());        
051            double c2 = (double) ng2.getCount() / norm2;
052            sum += Math.abs(c1 - c2);
053          } else {
054    // System.out.println("="+ng1+" "+ng1.getCount());        
055            sum += c1;
056          }
057        }
058    
059        // Treat NGrams contained ONLY in p2
060        i = p2.getSorted();
061        while (i.hasNext())
062        {
063          NGram ng2 = (NGram) i.next();
064          if ( p1.get(ng2) == null )
065          {
066    // System.out.println(">"+ng2+" "+ng2.getCount());        
067            sum += (double) ng2.getCount() / norm2;
068          }
069        }
070    
071        return sum;
072      }
073    }