001 package ps6.tigerdb; 002 003 import java.io.*; 004 import java.util.*; 005 import java.util.zip.ZipFile; 006 import java.util.zip.ZipEntry; 007 008 public class DatabaseReader { 009 010 public static boolean INFO = false; 011 012 // private Map type1 = new HashMap(); // TLID -> TigerRT1 013 private Map<Integer, TigerRT1> type1 = new TreeMap<Integer, TigerRT1>(); // TLID -> TigerRT1 014 private Map<Integer, List<TigerRT6>> type6 = new HashMap<Integer, List<TigerRT6>>(); // TLID ->* TigerRT6 015 016 // tracks tlids that we've thrown out as useless for our purposes 017 // (b/c their TigerRT1's weren't Roads, they were Rails or 018 // something...) 019 private Set<Integer> trashTLIDS = new HashSet<Integer>(); 020 021 public class GeoChain { 022 private final TigerRT1 rt1; 023 private final Collection<TigerRT6> rt6s; // Set[TigerRT2] 024 025 private /*@LazyNonNull*/ IntSet leftSet = null; //cached street numbers on the left 026 private /*@LazyNonNull*/ IntSet rightSet = null; //cached street numbers on the right 027 028 GeoChain(TigerRT1 rt1, Collection<TigerRT6> rt6s) { 029 this.rt1 = rt1; 030 this.rt6s = rt6s; 031 } 032 /** 033 * @return the overall Route information (the RT1) 034 */ 035 public TigerRT1 getRT1() { 036 return rt1; 037 } 038 039 /** 040 * @return the small pieces of Route information in a list of RT6s 041 */ 042 public Collection<TigerRT6> getRT6s() { 043 return Collections.unmodifiableCollection(rt6s); 044 } 045 046 /** 047 * Get the set of addresses on the left 048 */ 049 public String getLeftAddresses() { 050 cacheLeftAddresses(); 051 assert leftSet != null : "@SuppressWarnings(nullness)"; //assume cacheLeftAddresses() works 052 return leftSet.unparse(); 053 } 054 055 /** 056 * Get the set of addresses on the right 057 */ 058 public String getRightAddresses() { 059 cacheRightAddresses(); 060 assert rightSet != null : "@SuppressWarnings(nullness)"; //assume cacheRightAddresses() works 061 return rightSet.unparse(); 062 } 063 064 //store the addresses on the right, if they havn't yet been stored 065 private void cacheRightAddresses() { 066 if (rightSet == null) { 067 rightSet = getRT1().getRightRange().getSet(); 068 for (TigerRT6 rt6 : getRT6s()) { 069 rightSet = rightSet.union(rt6.getRightRange().getSet()); 070 } 071 } 072 } 073 074 //store the addresses on the left, if they havn't yet been stored 075 private void cacheLeftAddresses() { 076 if (leftSet == null) { 077 leftSet = getRT1().getLeftRange().getSet(); 078 for (TigerRT6 rt6 : getRT6s()) { 079 leftSet = leftSet.union(rt6.getLeftRange().getSet()); 080 } 081 } 082 } 083 084 /** 085 * @return true iff the street numbers on the sides of this 086 * chain contain no common elements. 087 */ 088 public boolean sidesDisjoint() { 089 cacheLeftAddresses(); 090 cacheRightAddresses(); 091 assert leftSet != null : "@SuppressWarnings(nullness)"; //assume cacheLeftAddresses() works 092 assert rightSet != null : "@SuppressWarnings(nullness)"; //assume cacheRightAddresses() works 093 return leftSet.isDisjoint(rightSet); 094 } 095 } 096 097 /** 098 * @effects Returns an Iterator[GeoChain] over the GeoChains currently 099 * stored in this 100 * @requires this is not modified while the returned iterator is 101 * in use 102 */ 103 public Iterator<GeoChain> geoChains() { 104 return new ImmIterator<GeoChain>() { 105 Iterator</*@KeyFor("type1")*/ Integer> tlids = type1.keySet().iterator(); 106 public boolean hasNext() { 107 return tlids.hasNext(); 108 } 109 public GeoChain next() { 110 Integer tlid = tlids.next(); 111 TigerRT1 rt1 = type1.get(tlid); 112 assert rt1 != null : "@SuppressWarnings(nullness)"; //Guaranteed since tlids is the keyset of type1 113 114 Collection<TigerRT6> rt6s = type6.containsKey(tlid) ? type6.get(tlid) : new HashSet<TigerRT6>(); 115 rt6s = Collections.unmodifiableCollection(rt6s); 116 117 return new GeoChain(rt1, rt6s); 118 } 119 }; 120 } 121 122 public Iterator<GeoChain> geoChains(File zf) throws IOException { 123 return geoChains(new ZipFile(zf)); 124 } 125 public Iterator<GeoChain> geoChains(String zf) throws IOException { 126 return geoChains(new ZipFile(zf)); 127 } 128 private Iterator<GeoChain> geoChains(ZipFile zf) { 129 // prep by reading type6 records first 130 try { 131 Enumeration<? extends ZipEntry> entries = zf.entries(); 132 while (entries.hasMoreElements()) { 133 ZipEntry entry = entries.nextElement(); 134 if (entry.getName().toLowerCase().endsWith("rt6")) { 135 this.readRecords(zf.getInputStream(entry)); 136 break; 137 } 138 } 139 140 entries = zf.entries(); 141 ZipEntry mainEntry = null; 142 while (entries.hasMoreElements()) { 143 ZipEntry entry = entries.nextElement(); 144 if (entry.getName().toLowerCase().endsWith("rt1")) { 145 mainEntry = entry; 146 break; 147 } 148 } 149 if (mainEntry == null) { 150 return Collections.<GeoChain>emptyList().iterator(); 151 } 152 final LineNumberReader lnr = 153 new LineNumberReader 154 (new InputStreamReader(zf.getInputStream(mainEntry))); 155 156 // now return the nifty read-and-throw-away GeoChain iterator 157 return new ImmIterator<GeoChain>() { 158 /*@Nullable*/ TigerRT1 rt1 = null; 159 private void advance() { 160 while (rt1 == null) { 161 String line; 162 try { 163 line = lnr.readLine(); 164 } catch (IOException ioe) { 165 line = null; 166 } 167 if (line == null) { 168 return; 169 } 170 try { 171 rt1 = new TigerRT1(line); 172 assert rt1 != null : "@SuppressWarnings(nullness)";//Guaranteed by constructor 173 174 if (rt1.getCfc().charAt(0) != 'a' && 175 rt1.getCfc().charAt(0) != 'A') { 176 trashTLIDS.add(new Integer(rt1.getTLID())); 177 type6.remove(new Integer(rt1.getTLID())); 178 // System.out.println("1: ("+type1.size()+") Skipping "+rt1); 179 rt1 = null; 180 } 181 } catch (BadRecordException bre) { 182 // System.out.println("bad record: " + bre.getMessage()); 183 rt1 = null; 184 } 185 } 186 } 187 public boolean hasNext() { 188 advance(); 189 return (rt1 != null); 190 } 191 192 public GeoChain next() { 193 if (rt1 == null) { 194 throw new NoSuchElementException(); 195 } else { 196 int tlid = rt1.getTLID(); 197 198 GeoChain gc = type6.containsKey(tlid) ? new GeoChain(rt1,type6.get(tlid)) : 199 new GeoChain(rt1,new ArrayList<TigerRT6>()); 200 201 rt1 = null; 202 advance(); 203 return gc; 204 } 205 } 206 }; 207 } catch (IOException ioe) { 208 throw new RuntimeException(ioe.getMessage()); 209 } 210 } 211 212 public DatabaseReader() { 213 214 } 215 216 // checks internal state of this to increase confidence that the 217 // data set isn't screwy 218 public void checkMappingInv() { 219 checkOneMappingInv(type6); 220 } 221 222 // checks that every key in typeT has an RT1 in type1 223 private void checkOneMappingInv(Map<Integer,?> typeT) { 224 for (Integer tlid : typeT.keySet()) { 225 226 assert type1.containsKey(tlid) : "No record found for tlid: " + tlid + 227 " with records: +" + typeT.get(tlid); 228 } 229 } 230 231 public static void main(String[] args) { 232 DatabaseReader db = new DatabaseReader(); 233 234 // each arg is a ZIP file 235 for (int i=0; i<args.length; i++) { 236 String zfstr = args[i]; 237 try { 238 db.readZipFile(zfstr); 239 } catch (IOException e) { 240 System.out.println("IOEXCEPTION?"); 241 242 } catch (OutOfMemoryError e) { 243 244 System.out.println("OUT OF MEMORY"); 245 246 System.out.println("1 "+db.type1.size()); 247 System.out.println("6 "+db.type6.size()); 248 249 e.printStackTrace(); 250 System.exit(-1); 251 } 252 } 253 db.checkMappingInv(); 254 } 255 256 public void readZipFile(File zf) throws IOException { 257 readZipFile(new ZipFile(zf)); 258 } 259 260 public void readZipFile(String zstr) throws IOException { 261 readZipFile(new ZipFile(zstr)); 262 if (INFO) 263 System.out.println("finished with "+zstr); 264 } 265 266 private void readZipFile(ZipFile zf) throws IOException { 267 Enumeration<? extends ZipEntry> entries = zf.entries(); 268 // build the type6 map for 269 while (entries.hasMoreElements()) { 270 ZipEntry entry = entries.nextElement(); 271 this.readRecords(zf.getInputStream(entry)); 272 } 273 zf.close(); 274 } 275 276 public void readRecords(InputStream is) throws IOException { 277 LineNumberReader lnr = new LineNumberReader(new InputStreamReader(is)); 278 for (String line=lnr.readLine(); line!=null; line=lnr.readLine() ) { 279 // System.out.print("*"); 280 try { 281 switch(line.charAt(0)) { 282 case '1': 283 TigerRT1 rt1 = new TigerRT1(line); 284 285 if (rt1.getCfc().charAt(0) != 'a' && 286 rt1.getCfc().charAt(0) != 'A') { 287 trashTLIDS.add(new Integer(rt1.getTLID())); 288 type6.remove(new Integer(rt1.getTLID())); 289 // Skip record 290 } else { 291 if (type1.containsKey(new Integer(rt1.getTLID()))) 292 throw new RuntimeException("1: SOMETHING'S WRONG"); 293 //Add record 294 type1.put(new Integer(rt1.getTLID()), rt1); 295 } 296 break; 297 case '6': 298 TigerRT6 rt6 = new TigerRT6(line); 299 if (!trashTLIDS.contains(new Integer(rt6.getTLID()))) { 300 //Add record 301 int tlid = rt6.getTLID(); 302 if (type6.containsKey(tlid)) { 303 type6.get(tlid).add(rt6); 304 } else { 305 List<TigerRT6> x = new ArrayList<TigerRT6>(); 306 x.add(rt6); 307 type6.put(tlid,x); 308 } 309 } else { 310 // Skip record 311 } 312 break; 313 default: 314 // Skip record 315 } 316 } catch (BadRecordException e) { 317 // Skip record 318 } 319 } 320 } 321 }