001 /*
002 NGramJ - n-gram based text classification
003 Copyright (C) 2001 Frank S. Nestel (frank at spieleck.de)
004
005 This program is free software; you can redistribute it and/or modify
006 it under the terms of the GNU Lesser General Public License as published
007 by the Free Software Foundation; either version 2.1 of the License, or
008 (at your option) any later version.
009
010 This program is distributed in the hope that it will be useful,
011 but WITHOUT ANY WARRANTY; without even the implied warranty of
012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
013 GNU General Public License for more details.
014
015 You should have received a copy of the GNU Lesser General Public License
016 along with this program (lesser.txt); if not, write to the Free Software
017 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
018 */
019
020 package de.spieleck.app.ngramj.phoner;
021
022 import java.io.*;
023
024 import java.util.Comparator;
025 import java.util.Iterator;
026
027 import de.spieleck.app.ngramj.*;
028 import de.spieleck.util.PriorityQueue;
029 import de.spieleck.app.ngramj.lm.LMDataProfile;
030
031 /**
032 * Mini-Application to create a "useful" word from
033 * a phonenumber.
034 * <P>
035 * This uses a brute force approach to evaluate various options
036 * to convert a number in a dialable string. To make bf feasible,
037 * one had to be slightly careful...
038 * </P>
039 */
040 public class Phoner
041 implements Comparator
042 {
043 public final static int LIMIT = 200;
044
045 public final static String DEF_RESOURCE = "frank.lm";
046
047 protected String pnumber;
048 protected PriorityQueue order;
049 protected double min, max;
050 protected int count = 0;
051
052 public Phoner(IterableProfile prof, String pnumber)
053 throws IOException, NGramException
054 {
055 this.pnumber = pnumber;
056 min = Double.MAX_VALUE;
057 max = Double.MIN_VALUE;
058 PhonerProfileEnum pen = new PhonerProfileEnum(pnumber);
059 //
060 order = new PriorityQueue(LIMIT, wrongComp);
061 Profile ep;
062 while ( ( ep = pen.next() ) != null )
063 {
064 double dr = deltaRank(prof, ep);
065 if ( ++count < LIMIT+LIMIT )
066 order.put(new ScoredRes(pen.getRes(), dr));
067 else if ( dr < ((ScoredRes)order.top()).getVal() )
068 order.setTop(new ScoredRes(pen.getRes(), dr));
069 if ( dr < min )
070 min = dr;
071 if ( dr > max )
072 max = dr;
073 }
074 }
075
076 public static Comparator wrongComp = new Comparator()
077 {
078 public int compare(Object a, Object b)
079 {
080 double h = ((ScoredRes)a).getVal()-((ScoredRes)b).getVal();
081 if ( h > 0.0 )
082 return -1;
083 else if ( h < 0.0 )
084 return +1;
085 else
086 return 0;
087 }
088 };
089
090 public void show()
091 {
092 PrintWriter pw = new PrintWriter(System.out, true);
093 show(pw);
094 }
095
096 public void show(PrintWriter pw)
097 {
098 pw.println(pnumber+" ... "+count+" combinations considered."
099 +" score range ["+min+","+max+"]");
100 ScoredRes[] ress = new ScoredRes[order.getSize()];
101 int i = order.getSize();
102 while ( order.getSize() > 0 )
103 ress[--i] = (ScoredRes)order.pop();
104 for (i = 0; i < ress.length; i++)
105 pw.println((i+1)+". "+ress[i]+" "+ress[i].getVal());
106 }
107
108 public double deltaRank(IterableProfile prof1, Profile prof2)
109 {
110 double delta = 0.0;
111 Iterator iter = prof1.ngrams();
112 int j = 0;
113 while ( iter.hasNext() )
114 {
115 j++;
116 double rank = prof2.getRank((NGram)iter.next());
117 if ( rank != 0.0 )
118 delta += Math.abs(rank - j );
119 else
120 delta += 401; // XXX fixed!
121 }
122 return delta;
123 }
124
125 public int compare(Object a, Object b)
126 {
127 double h = ((ScoredRes)a).getVal() - ((ScoredRes)b).getVal();
128 if ( h < 0.0 )
129 return -1;
130 else if ( h > 0.0 )
131 return +1;
132 else
133 return 0;
134 }
135
136 public double getMin()
137 {
138 return min;
139 }
140
141 public double getMax()
142 {
143 return max;
144 }
145
146 /**
147 * Sample commandline implementation
148 */
149 public static void main(String[] args)
150 throws Exception
151 {
152 if ( args.length == 1 )
153 {
154 InputStream in = Phoner.class.getResourceAsStream(DEF_RESOURCE);
155 IterableProfile prof = new LMDataProfile(DEF_RESOURCE, in);
156 Phoner p = new Phoner(prof, args[0]);
157 p.show();
158 System.err.println("**** #ngram="+NGramImpl.getKnownCount());
159 }
160 else
161 {
162 File ifi = new File(args[0]);
163 InputStream in = new FileInputStream(ifi);
164 IterableProfile prof = new LMDataProfile(args[0], in);
165 //
166 for (int i = 1; i < args.length; i++ )
167 {
168 Phoner p = new Phoner(prof, args[i]);
169 p.show();
170 System.err.println("**** #ngram="+NGramImpl.getKnownCount());
171 }
172 }
173 }
174 }