001 /* 002 NGramJ - n-gram based text classification 003 Copyright (C) 2001 Frank S. Nestel (frank at spieleck.de) 004 005 This program is free software; you can redistribute it and/or modify 006 it under the terms of the GNU Lesser General Public License as published 007 by the Free Software Foundation; either version 2.1 of the License, or 008 (at your option) any later version. 009 010 This program is distributed in the hope that it will be useful, 011 but WITHOUT ANY WARRANTY; without even the implied warranty of 012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 GNU General Public License for more details. 014 015 You should have received a copy of the GNU Lesser General Public License 016 along with this program (lesser.txt); if not, write to the Free Software 017 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 018 */ 019 020 package de.spieleck.app.ngramj.phoner; 021 022 import java.io.*; 023 024 import java.util.Comparator; 025 import java.util.Iterator; 026 027 import de.spieleck.app.ngramj.*; 028 import de.spieleck.util.PriorityQueue; 029 import de.spieleck.app.ngramj.lm.LMDataProfile; 030 031 /** 032 * Mini-Application to create a "useful" word from 033 * a phonenumber. 034 * <P> 035 * This uses a brute force approach to evaluate various options 036 * to convert a number in a dialable string. To make bf feasible, 037 * one had to be slightly careful... 038 * </P> 039 */ 040 public class Phoner 041 implements Comparator 042 { 043 public final static int LIMIT = 200; 044 045 public final static String DEF_RESOURCE = "frank.lm"; 046 047 protected String pnumber; 048 protected PriorityQueue order; 049 protected double min, max; 050 protected int count = 0; 051 052 public Phoner(IterableProfile prof, String pnumber) 053 throws IOException, NGramException 054 { 055 this.pnumber = pnumber; 056 min = Double.MAX_VALUE; 057 max = Double.MIN_VALUE; 058 PhonerProfileEnum pen = new PhonerProfileEnum(pnumber); 059 // 060 order = new PriorityQueue(LIMIT, wrongComp); 061 Profile ep; 062 while ( ( ep = pen.next() ) != null ) 063 { 064 double dr = deltaRank(prof, ep); 065 if ( ++count < LIMIT+LIMIT ) 066 order.put(new ScoredRes(pen.getRes(), dr)); 067 else if ( dr < ((ScoredRes)order.top()).getVal() ) 068 order.setTop(new ScoredRes(pen.getRes(), dr)); 069 if ( dr < min ) 070 min = dr; 071 if ( dr > max ) 072 max = dr; 073 } 074 } 075 076 public static Comparator wrongComp = new Comparator() 077 { 078 public int compare(Object a, Object b) 079 { 080 double h = ((ScoredRes)a).getVal()-((ScoredRes)b).getVal(); 081 if ( h > 0.0 ) 082 return -1; 083 else if ( h < 0.0 ) 084 return +1; 085 else 086 return 0; 087 } 088 }; 089 090 public void show() 091 { 092 PrintWriter pw = new PrintWriter(System.out, true); 093 show(pw); 094 } 095 096 public void show(PrintWriter pw) 097 { 098 pw.println(pnumber+" ... "+count+" combinations considered." 099 +" score range ["+min+","+max+"]"); 100 ScoredRes[] ress = new ScoredRes[order.getSize()]; 101 int i = order.getSize(); 102 while ( order.getSize() > 0 ) 103 ress[--i] = (ScoredRes)order.pop(); 104 for (i = 0; i < ress.length; i++) 105 pw.println((i+1)+". "+ress[i]+" "+ress[i].getVal()); 106 } 107 108 public double deltaRank(IterableProfile prof1, Profile prof2) 109 { 110 double delta = 0.0; 111 Iterator iter = prof1.ngrams(); 112 int j = 0; 113 while ( iter.hasNext() ) 114 { 115 j++; 116 double rank = prof2.getRank((NGram)iter.next()); 117 if ( rank != 0.0 ) 118 delta += Math.abs(rank - j ); 119 else 120 delta += 401; // XXX fixed! 121 } 122 return delta; 123 } 124 125 public int compare(Object a, Object b) 126 { 127 double h = ((ScoredRes)a).getVal() - ((ScoredRes)b).getVal(); 128 if ( h < 0.0 ) 129 return -1; 130 else if ( h > 0.0 ) 131 return +1; 132 else 133 return 0; 134 } 135 136 public double getMin() 137 { 138 return min; 139 } 140 141 public double getMax() 142 { 143 return max; 144 } 145 146 /** 147 * Sample commandline implementation 148 */ 149 public static void main(String[] args) 150 throws Exception 151 { 152 if ( args.length == 1 ) 153 { 154 InputStream in = Phoner.class.getResourceAsStream(DEF_RESOURCE); 155 IterableProfile prof = new LMDataProfile(DEF_RESOURCE, in); 156 Phoner p = new Phoner(prof, args[0]); 157 p.show(); 158 System.err.println("**** #ngram="+NGramImpl.getKnownCount()); 159 } 160 else 161 { 162 File ifi = new File(args[0]); 163 InputStream in = new FileInputStream(ifi); 164 IterableProfile prof = new LMDataProfile(args[0], in); 165 // 166 for (int i = 1; i < args.length; i++ ) 167 { 168 Phoner p = new Phoner(prof, args[i]); 169 p.show(); 170 System.err.println("**** #ngram="+NGramImpl.getKnownCount()); 171 } 172 } 173 } 174 }