001 package ps6.tigerdb;
002
003 import java.io.*;
004 import java.util.*;
005 import java.util.zip.ZipFile;
006 import java.util.zip.ZipEntry;
007
008 public class DatabaseReader {
009
010 public static boolean INFO = false;
011
012 // private Map type1 = new HashMap(); // TLID -> TigerRT1
013 private Map<Integer, TigerRT1> type1 = new TreeMap<Integer, TigerRT1>(); // TLID -> TigerRT1
014 private Map<Integer, List<TigerRT6>> type6 = new HashMap<Integer, List<TigerRT6>>(); // TLID ->* TigerRT6
015
016 // tracks tlids that we've thrown out as useless for our purposes
017 // (b/c their TigerRT1's weren't Roads, they were Rails or
018 // something...)
019 private Set<Integer> trashTLIDS = new HashSet<Integer>();
020
021 public class GeoChain {
022 private final TigerRT1 rt1;
023 private final Collection<TigerRT6> rt6s; // Set[TigerRT2]
024
025 private /*@LazyNonNull*/ IntSet leftSet = null; //cached street numbers on the left
026 private /*@LazyNonNull*/ IntSet rightSet = null; //cached street numbers on the right
027
028 GeoChain(TigerRT1 rt1, Collection<TigerRT6> rt6s) {
029 this.rt1 = rt1;
030 this.rt6s = rt6s;
031 }
032 /**
033 * @return the overall Route information (the RT1)
034 */
035 public TigerRT1 getRT1() {
036 return rt1;
037 }
038
039 /**
040 * @return the small pieces of Route information in a list of RT6s
041 */
042 public Collection<TigerRT6> getRT6s() {
043 return Collections.unmodifiableCollection(rt6s);
044 }
045
046 /**
047 * Get the set of addresses on the left
048 */
049 public String getLeftAddresses() {
050 cacheLeftAddresses();
051 assert leftSet != null : "@SuppressWarnings(nullness)"; //assume cacheLeftAddresses() works
052 return leftSet.unparse();
053 }
054
055 /**
056 * Get the set of addresses on the right
057 */
058 public String getRightAddresses() {
059 cacheRightAddresses();
060 assert rightSet != null : "@SuppressWarnings(nullness)"; //assume cacheRightAddresses() works
061 return rightSet.unparse();
062 }
063
064 //store the addresses on the right, if they havn't yet been stored
065 private void cacheRightAddresses() {
066 if (rightSet == null) {
067 rightSet = getRT1().getRightRange().getSet();
068 for (TigerRT6 rt6 : getRT6s()) {
069 rightSet = rightSet.union(rt6.getRightRange().getSet());
070 }
071 }
072 }
073
074 //store the addresses on the left, if they havn't yet been stored
075 private void cacheLeftAddresses() {
076 if (leftSet == null) {
077 leftSet = getRT1().getLeftRange().getSet();
078 for (TigerRT6 rt6 : getRT6s()) {
079 leftSet = leftSet.union(rt6.getLeftRange().getSet());
080 }
081 }
082 }
083
084 /**
085 * @return true iff the street numbers on the sides of this
086 * chain contain no common elements.
087 */
088 public boolean sidesDisjoint() {
089 cacheLeftAddresses();
090 cacheRightAddresses();
091 assert leftSet != null : "@SuppressWarnings(nullness)"; //assume cacheLeftAddresses() works
092 assert rightSet != null : "@SuppressWarnings(nullness)"; //assume cacheRightAddresses() works
093 return leftSet.isDisjoint(rightSet);
094 }
095 }
096
097 /**
098 * @effects Returns an Iterator[GeoChain] over the GeoChains currently
099 * stored in this
100 * @requires this is not modified while the returned iterator is
101 * in use
102 */
103 public Iterator<GeoChain> geoChains() {
104 return new ImmIterator<GeoChain>() {
105 Iterator</*@KeyFor("type1")*/ Integer> tlids = type1.keySet().iterator();
106 public boolean hasNext() {
107 return tlids.hasNext();
108 }
109 public GeoChain next() {
110 Integer tlid = tlids.next();
111 TigerRT1 rt1 = type1.get(tlid);
112 assert rt1 != null : "@SuppressWarnings(nullness)"; //Guaranteed since tlids is the keyset of type1
113
114 Collection<TigerRT6> rt6s = type6.containsKey(tlid) ? type6.get(tlid) : new HashSet<TigerRT6>();
115 rt6s = Collections.unmodifiableCollection(rt6s);
116
117 return new GeoChain(rt1, rt6s);
118 }
119 };
120 }
121
122 public Iterator<GeoChain> geoChains(File zf) throws IOException {
123 return geoChains(new ZipFile(zf));
124 }
125 public Iterator<GeoChain> geoChains(String zf) throws IOException {
126 return geoChains(new ZipFile(zf));
127 }
128 private Iterator<GeoChain> geoChains(ZipFile zf) {
129 // prep by reading type6 records first
130 try {
131 Enumeration<? extends ZipEntry> entries = zf.entries();
132 while (entries.hasMoreElements()) {
133 ZipEntry entry = entries.nextElement();
134 if (entry.getName().toLowerCase().endsWith("rt6")) {
135 this.readRecords(zf.getInputStream(entry));
136 break;
137 }
138 }
139
140 entries = zf.entries();
141 ZipEntry mainEntry = null;
142 while (entries.hasMoreElements()) {
143 ZipEntry entry = entries.nextElement();
144 if (entry.getName().toLowerCase().endsWith("rt1")) {
145 mainEntry = entry;
146 break;
147 }
148 }
149 if (mainEntry == null) {
150 return Collections.<GeoChain>emptyList().iterator();
151 }
152 final LineNumberReader lnr =
153 new LineNumberReader
154 (new InputStreamReader(zf.getInputStream(mainEntry)));
155
156 // now return the nifty read-and-throw-away GeoChain iterator
157 return new ImmIterator<GeoChain>() {
158 /*@Nullable*/ TigerRT1 rt1 = null;
159 private void advance() {
160 while (rt1 == null) {
161 String line;
162 try {
163 line = lnr.readLine();
164 } catch (IOException ioe) {
165 line = null;
166 }
167 if (line == null) {
168 return;
169 }
170 try {
171 rt1 = new TigerRT1(line);
172 assert rt1 != null : "@SuppressWarnings(nullness)";//Guaranteed by constructor
173
174 if (rt1.getCfc().charAt(0) != 'a' &&
175 rt1.getCfc().charAt(0) != 'A') {
176 trashTLIDS.add(new Integer(rt1.getTLID()));
177 type6.remove(new Integer(rt1.getTLID()));
178 // System.out.println("1: ("+type1.size()+") Skipping "+rt1);
179 rt1 = null;
180 }
181 } catch (BadRecordException bre) {
182 // System.out.println("bad record: " + bre.getMessage());
183 rt1 = null;
184 }
185 }
186 }
187 public boolean hasNext() {
188 advance();
189 return (rt1 != null);
190 }
191
192 public GeoChain next() {
193 if (rt1 == null) {
194 throw new NoSuchElementException();
195 } else {
196 int tlid = rt1.getTLID();
197
198 GeoChain gc = type6.containsKey(tlid) ? new GeoChain(rt1,type6.get(tlid)) :
199 new GeoChain(rt1,new ArrayList<TigerRT6>());
200
201 rt1 = null;
202 advance();
203 return gc;
204 }
205 }
206 };
207 } catch (IOException ioe) {
208 throw new RuntimeException(ioe.getMessage());
209 }
210 }
211
212 public DatabaseReader() {
213
214 }
215
216 // checks internal state of this to increase confidence that the
217 // data set isn't screwy
218 public void checkMappingInv() {
219 checkOneMappingInv(type6);
220 }
221
222 // checks that every key in typeT has an RT1 in type1
223 private void checkOneMappingInv(Map<Integer,?> typeT) {
224 for (Integer tlid : typeT.keySet()) {
225
226 assert type1.containsKey(tlid) : "No record found for tlid: " + tlid +
227 " with records: +" + typeT.get(tlid);
228 }
229 }
230
231 public static void main(String[] args) {
232 DatabaseReader db = new DatabaseReader();
233
234 // each arg is a ZIP file
235 for (int i=0; i<args.length; i++) {
236 String zfstr = args[i];
237 try {
238 db.readZipFile(zfstr);
239 } catch (IOException e) {
240 System.out.println("IOEXCEPTION?");
241
242 } catch (OutOfMemoryError e) {
243
244 System.out.println("OUT OF MEMORY");
245
246 System.out.println("1 "+db.type1.size());
247 System.out.println("6 "+db.type6.size());
248
249 e.printStackTrace();
250 System.exit(-1);
251 }
252 }
253 db.checkMappingInv();
254 }
255
256 public void readZipFile(File zf) throws IOException {
257 readZipFile(new ZipFile(zf));
258 }
259
260 public void readZipFile(String zstr) throws IOException {
261 readZipFile(new ZipFile(zstr));
262 if (INFO)
263 System.out.println("finished with "+zstr);
264 }
265
266 private void readZipFile(ZipFile zf) throws IOException {
267 Enumeration<? extends ZipEntry> entries = zf.entries();
268 // build the type6 map for
269 while (entries.hasMoreElements()) {
270 ZipEntry entry = entries.nextElement();
271 this.readRecords(zf.getInputStream(entry));
272 }
273 zf.close();
274 }
275
276 public void readRecords(InputStream is) throws IOException {
277 LineNumberReader lnr = new LineNumberReader(new InputStreamReader(is));
278 for (String line=lnr.readLine(); line!=null; line=lnr.readLine() ) {
279 // System.out.print("*");
280 try {
281 switch(line.charAt(0)) {
282 case '1':
283 TigerRT1 rt1 = new TigerRT1(line);
284
285 if (rt1.getCfc().charAt(0) != 'a' &&
286 rt1.getCfc().charAt(0) != 'A') {
287 trashTLIDS.add(new Integer(rt1.getTLID()));
288 type6.remove(new Integer(rt1.getTLID()));
289 // Skip record
290 } else {
291 if (type1.containsKey(new Integer(rt1.getTLID())))
292 throw new RuntimeException("1: SOMETHING'S WRONG");
293 //Add record
294 type1.put(new Integer(rt1.getTLID()), rt1);
295 }
296 break;
297 case '6':
298 TigerRT6 rt6 = new TigerRT6(line);
299 if (!trashTLIDS.contains(new Integer(rt6.getTLID()))) {
300 //Add record
301 int tlid = rt6.getTLID();
302 if (type6.containsKey(tlid)) {
303 type6.get(tlid).add(rt6);
304 } else {
305 List<TigerRT6> x = new ArrayList<TigerRT6>();
306 x.add(rt6);
307 type6.put(tlid,x);
308 }
309 } else {
310 // Skip record
311 }
312 break;
313 default:
314 // Skip record
315 }
316 } catch (BadRecordException e) {
317 // Skip record
318 }
319 }
320 }
321 }