001 package ps6.tigerdb; 002 003 import java.io.*; 004 import java.util.*; 005 import java.util.zip.ZipFile; 006 import java.util.zip.ZipEntry; 007 008 public class DatabaseReader { 009 010 public static boolean INFO = false; 011 012 // private Map type1 = new HashMap(); // TLID -> TigerRT1 013 private Map<Integer, TigerRT1> type1 = new TreeMap<Integer, TigerRT1>(); // TLID -> TigerRT1 014 private ToBag<Integer, TigerRT6> type6 = new ToBag<Integer, TigerRT6>(); // TLID ->* TigerRT6 015 016 // tracks tlids that we've thrown out as useless for our purposes 017 // (b/c their TigerRT1's weren't Roads, they were Rails or 018 // something...) 019 private Set<Integer> trashTLIDS = new HashSet<Integer>(); 020 021 public class GeoChain { 022 private final TigerRT1 rt1; 023 private final Collection<TigerRT6> rt6s; // Set[TigerRT2] 024 025 private IntSet leftSet = null; //cached street numbers on the left 026 private IntSet rightSet = null; //cached street numbers on the right 027 028 GeoChain(TigerRT1 rt1, Collection<TigerRT6> rt6s) { 029 this.rt1 = rt1; 030 this.rt6s = rt6s; 031 } 032 /** 033 * @return the overall Route information (the RT1) 034 */ 035 public TigerRT1 getRT1() { 036 return rt1; 037 } 038 039 /** 040 * @return the small pieces of Route information in a list of RT6s 041 */ 042 public Collection<TigerRT6> getRT6s() { 043 return Collections.unmodifiableCollection(rt6s); 044 } 045 046 /** 047 * Get the set of addresses on the left 048 */ 049 public String getLeftAddresses() { 050 cacheLeftAddresses(); 051 return leftSet.unparse(); 052 } 053 054 /** 055 * Get the set of addresses on the right 056 */ 057 public String getRightAddresses() { 058 cacheRightAddresses(); 059 return rightSet.unparse(); 060 } 061 062 //store the addresses on the right, if they havn't yet been stored 063 private void cacheRightAddresses() { 064 if (rightSet == null) { 065 rightSet = getRT1().getRightRange().getSet(); 066 for (TigerRT6 rt6 : getRT6s()) { 067 rightSet = rightSet.union(rt6.getRightRange().getSet()); 068 } 069 } 070 } 071 072 //store the addresses on the left, if they havn't yet been stored 073 private void cacheLeftAddresses() { 074 if (leftSet == null) { 075 leftSet = getRT1().getLeftRange().getSet(); 076 for (TigerRT6 rt6 : getRT6s()) { 077 leftSet = leftSet.union(rt6.getLeftRange().getSet()); 078 } 079 } 080 } 081 082 /** 083 * @return true iff the street numbers on the sides of this 084 * chain contain no common elements. 085 */ 086 public boolean sidesDisjoint() { 087 cacheLeftAddresses(); 088 cacheRightAddresses(); 089 return leftSet.isDisjoint(rightSet); 090 } 091 } 092 093 /** 094 * @effects Returns an Iterator[GeoChain] over the GeoChains currently 095 * stored in this 096 * @requires this is not modified while the returned iterator is 097 * in use 098 */ 099 public Iterator<GeoChain> geoChains() { 100 return new ImmIterator<GeoChain>() { 101 Iterator<Integer> tlids = type1.keySet().iterator(); 102 public boolean hasNext() { 103 return tlids.hasNext(); 104 } 105 public GeoChain next() { 106 Integer tlid = tlids.next(); 107 TigerRT1 rt1 = type1.get(tlid); 108 109 Collection<TigerRT6> rt6s = type6.get(tlid); 110 rt6s = Collections.unmodifiableCollection(rt6s); 111 112 return new GeoChain(rt1, rt6s); 113 } 114 }; 115 } 116 117 public Iterator<GeoChain> geoChains(File zf) throws IOException { 118 return geoChains(new ZipFile(zf)); 119 } 120 public Iterator<GeoChain> geoChains(String zf) throws IOException { 121 return geoChains(new ZipFile(zf)); 122 } 123 private Iterator<GeoChain> geoChains(ZipFile zf) { 124 // prep by reading type6 records first 125 try { 126 Enumeration<? extends ZipEntry> entries = zf.entries(); 127 while(entries.hasMoreElements()) { 128 ZipEntry entry = entries.nextElement(); 129 if (entry.getName().toLowerCase().endsWith("rt6")) { 130 this.readRecords(zf.getInputStream(entry)); 131 break; 132 } 133 } 134 135 entries = zf.entries(); 136 ZipEntry mainEntry = null; 137 while(entries.hasMoreElements()) { 138 ZipEntry entry = entries.nextElement(); 139 if (entry.getName().toLowerCase().endsWith("rt1")) { 140 mainEntry = entry; 141 break; 142 } 143 } 144 if (mainEntry == null) { 145 return Collections.<GeoChain>emptyList().iterator(); 146 } 147 final LineNumberReader lnr = 148 new LineNumberReader 149 (new InputStreamReader(zf.getInputStream(mainEntry))); 150 151 // now return the nifty read-and-throw-away GeoChain iterator 152 return new ImmIterator<GeoChain>() { 153 TigerRT1 rt1 = null; 154 private void advance() { 155 while (rt1 == null) { 156 String line; 157 try { 158 line = lnr.readLine(); 159 } catch (IOException ioe) { 160 line = null; 161 } 162 if (line == null) { 163 return; 164 } 165 try { 166 rt1 = new TigerRT1(line); 167 if (rt1.getCfc().charAt(0) != 'a' && 168 rt1.getCfc().charAt(0) != 'A') { 169 trashTLIDS.add(new Integer(rt1.getTLID())); 170 type6.remove(new Integer(rt1.getTLID())); 171 // System.out.println("1: ("+type1.size()+") Skipping "+rt1); 172 rt1 = null; 173 } 174 } catch (BadRecordException bre) { 175 // System.out.println("bad record: " + bre.getMessage()); 176 rt1 = null; 177 } 178 } 179 } 180 public boolean hasNext() { 181 advance(); 182 return (rt1 != null); 183 } 184 public GeoChain next() { 185 GeoChain gc = 186 new GeoChain(rt1,type6.get(new Integer(rt1.getTLID()))); 187 rt1 = null; 188 advance(); 189 return gc; 190 } 191 }; 192 } catch (IOException ioe) { 193 throw new RuntimeException(ioe.getMessage()); 194 } 195 } 196 197 public DatabaseReader() { 198 199 } 200 201 // checks internal state of this to increase confidence that the 202 // data set isn't screwy 203 public void checkMappingInv() { 204 checkOneMappingInv(type6); 205 } 206 207 // checks that every key in typeT has an RT1 in type1 208 private void checkOneMappingInv(Map<Integer,?> typeT) { 209 for (Iterator<Integer> tki = typeT.keySet().iterator();tki.hasNext();) { 210 Integer tlid = (Integer) tki.next(); 211 212 assert type1.containsKey(tlid) : "No record found for tlid: " + tlid + 213 " with records: +" + typeT.get(tlid); 214 } 215 } 216 217 public static void main(String[] args) { 218 DatabaseReader db = new DatabaseReader(); 219 220 // each arg is a ZIP file 221 for (int i=0; i<args.length; i++) { 222 String zfstr = args[i]; 223 try { 224 db.readZipFile(zfstr); 225 } catch (IOException e) { 226 System.out.println("IOEXCEPTION?"); 227 228 } catch (OutOfMemoryError e) { 229 230 System.out.println("OUT OF MEMORY"); 231 232 System.out.println("1 "+db.type1.size()); 233 System.out.println("6 "+db.type6.size()); 234 235 e.printStackTrace(); 236 System.exit(-1); 237 } 238 } 239 db.checkMappingInv(); 240 } 241 242 public void readZipFile(File zf) throws IOException { 243 readZipFile(new ZipFile(zf)); 244 } 245 246 public void readZipFile(String zstr) throws IOException { 247 readZipFile(new ZipFile(zstr)); 248 if (INFO) 249 System.out.println("finished with "+zstr); 250 } 251 252 private void readZipFile(ZipFile zf) throws IOException { 253 Enumeration<? extends ZipEntry> entries = zf.entries(); 254 // build the type6 map for 255 while(entries.hasMoreElements()) { 256 ZipEntry entry = entries.nextElement(); 257 this.readRecords(zf.getInputStream(entry)); 258 } 259 zf.close(); 260 } 261 262 public void readRecords(InputStream is) throws IOException { 263 LineNumberReader lnr = new LineNumberReader(new InputStreamReader(is)); 264 for (String line=lnr.readLine(); line!=null; line=lnr.readLine() ) { 265 // System.out.print("*"); 266 try { 267 switch(line.charAt(0)) { 268 case '1': 269 TigerRT1 rt1 = new TigerRT1(line); 270 271 if (rt1.getCfc().charAt(0) != 'a' && 272 rt1.getCfc().charAt(0) != 'A') { 273 trashTLIDS.add(new Integer(rt1.getTLID())); 274 type6.remove(new Integer(rt1.getTLID())); 275 // Skip record 276 } else { 277 if (type1.containsKey(new Integer(rt1.getTLID()))) 278 throw new RuntimeException("1: SOMETHING'S WRONG"); 279 //Add record 280 type1.put(new Integer(rt1.getTLID()), rt1); 281 } 282 break; 283 case '6': 284 TigerRT6 rt6 = new TigerRT6(line); 285 if (!trashTLIDS.contains(new Integer(rt6.getTLID()))) { 286 //Add record 287 type6.get(new Integer(rt6.getTLID())).add(rt6); 288 } else { 289 // Skip record 290 } 291 break; 292 default: 293 // Skip record 294 } 295 } catch (BadRecordException e) { 296 // Skip record 297 } 298 } 299 } 300 }