001 package ps6.tigerdb;
002
003 import java.io.*;
004 import java.util.*;
005 import java.util.zip.ZipFile;
006 import java.util.zip.ZipEntry;
007
008 public class DatabaseReader {
009
010 public static boolean INFO = false;
011
012 // private Map type1 = new HashMap(); // TLID -> TigerRT1
013 private Map<Integer, TigerRT1> type1 = new TreeMap<Integer, TigerRT1>(); // TLID -> TigerRT1
014 private ToBag<Integer, TigerRT6> type6 = new ToBag<Integer, TigerRT6>(); // TLID ->* TigerRT6
015
016 // tracks tlids that we've thrown out as useless for our purposes
017 // (b/c their TigerRT1's weren't Roads, they were Rails or
018 // something...)
019 private Set<Integer> trashTLIDS = new HashSet<Integer>();
020
021 public class GeoChain {
022 private final TigerRT1 rt1;
023 private final Collection<TigerRT6> rt6s; // Set[TigerRT2]
024
025 private IntSet leftSet = null; //cached street numbers on the left
026 private IntSet rightSet = null; //cached street numbers on the right
027
028 GeoChain(TigerRT1 rt1, Collection<TigerRT6> rt6s) {
029 this.rt1 = rt1;
030 this.rt6s = rt6s;
031 }
032 /**
033 * @return the overall Route information (the RT1)
034 */
035 public TigerRT1 getRT1() {
036 return rt1;
037 }
038
039 /**
040 * @return the small pieces of Route information in a list of RT6s
041 */
042 public Collection<TigerRT6> getRT6s() {
043 return Collections.unmodifiableCollection(rt6s);
044 }
045
046 /**
047 * Get the set of addresses on the left
048 */
049 public String getLeftAddresses() {
050 cacheLeftAddresses();
051 return leftSet.unparse();
052 }
053
054 /**
055 * Get the set of addresses on the right
056 */
057 public String getRightAddresses() {
058 cacheRightAddresses();
059 return rightSet.unparse();
060 }
061
062 //store the addresses on the right, if they havn't yet been stored
063 private void cacheRightAddresses() {
064 if (rightSet == null) {
065 rightSet = getRT1().getRightRange().getSet();
066 for (TigerRT6 rt6 : getRT6s()) {
067 rightSet = rightSet.union(rt6.getRightRange().getSet());
068 }
069 }
070 }
071
072 //store the addresses on the left, if they havn't yet been stored
073 private void cacheLeftAddresses() {
074 if (leftSet == null) {
075 leftSet = getRT1().getLeftRange().getSet();
076 for (TigerRT6 rt6 : getRT6s()) {
077 leftSet = leftSet.union(rt6.getLeftRange().getSet());
078 }
079 }
080 }
081
082 /**
083 * @return true iff the street numbers on the sides of this
084 * chain contain no common elements.
085 */
086 public boolean sidesDisjoint() {
087 cacheLeftAddresses();
088 cacheRightAddresses();
089 return leftSet.isDisjoint(rightSet);
090 }
091 }
092
093 /**
094 * @effects Returns an Iterator[GeoChain] over the GeoChains currently
095 * stored in this
096 * @requires this is not modified while the returned iterator is
097 * in use
098 */
099 public Iterator<GeoChain> geoChains() {
100 return new ImmIterator<GeoChain>() {
101 Iterator<Integer> tlids = type1.keySet().iterator();
102 public boolean hasNext() {
103 return tlids.hasNext();
104 }
105 public GeoChain next() {
106 Integer tlid = tlids.next();
107 TigerRT1 rt1 = type1.get(tlid);
108
109 Collection<TigerRT6> rt6s = type6.get(tlid);
110 rt6s = Collections.unmodifiableCollection(rt6s);
111
112 return new GeoChain(rt1, rt6s);
113 }
114 };
115 }
116
117 public Iterator<GeoChain> geoChains(File zf) throws IOException {
118 return geoChains(new ZipFile(zf));
119 }
120 public Iterator<GeoChain> geoChains(String zf) throws IOException {
121 return geoChains(new ZipFile(zf));
122 }
123 private Iterator<GeoChain> geoChains(ZipFile zf) {
124 // prep by reading type6 records first
125 try {
126 Enumeration<? extends ZipEntry> entries = zf.entries();
127 while(entries.hasMoreElements()) {
128 ZipEntry entry = entries.nextElement();
129 if (entry.getName().toLowerCase().endsWith("rt6")) {
130 this.readRecords(zf.getInputStream(entry));
131 break;
132 }
133 }
134
135 entries = zf.entries();
136 ZipEntry mainEntry = null;
137 while(entries.hasMoreElements()) {
138 ZipEntry entry = entries.nextElement();
139 if (entry.getName().toLowerCase().endsWith("rt1")) {
140 mainEntry = entry;
141 break;
142 }
143 }
144 if (mainEntry == null) {
145 return Collections.<GeoChain>emptyList().iterator();
146 }
147 final LineNumberReader lnr =
148 new LineNumberReader
149 (new InputStreamReader(zf.getInputStream(mainEntry)));
150
151 // now return the nifty read-and-throw-away GeoChain iterator
152 return new ImmIterator<GeoChain>() {
153 TigerRT1 rt1 = null;
154 private void advance() {
155 while (rt1 == null) {
156 String line;
157 try {
158 line = lnr.readLine();
159 } catch (IOException ioe) {
160 line = null;
161 }
162 if (line == null) {
163 return;
164 }
165 try {
166 rt1 = new TigerRT1(line);
167 if (rt1.getCfc().charAt(0) != 'a' &&
168 rt1.getCfc().charAt(0) != 'A') {
169 trashTLIDS.add(new Integer(rt1.getTLID()));
170 type6.remove(new Integer(rt1.getTLID()));
171 // System.out.println("1: ("+type1.size()+") Skipping "+rt1);
172 rt1 = null;
173 }
174 } catch (BadRecordException bre) {
175 // System.out.println("bad record: " + bre.getMessage());
176 rt1 = null;
177 }
178 }
179 }
180 public boolean hasNext() {
181 advance();
182 return (rt1 != null);
183 }
184 public GeoChain next() {
185 GeoChain gc =
186 new GeoChain(rt1,type6.get(new Integer(rt1.getTLID())));
187 rt1 = null;
188 advance();
189 return gc;
190 }
191 };
192 } catch (IOException ioe) {
193 throw new RuntimeException(ioe.getMessage());
194 }
195 }
196
197 public DatabaseReader() {
198
199 }
200
201 // checks internal state of this to increase confidence that the
202 // data set isn't screwy
203 public void checkMappingInv() {
204 checkOneMappingInv(type6);
205 }
206
207 // checks that every key in typeT has an RT1 in type1
208 private void checkOneMappingInv(Map<Integer,?> typeT) {
209 for (Iterator<Integer> tki = typeT.keySet().iterator();tki.hasNext();) {
210 Integer tlid = (Integer) tki.next();
211
212 assert type1.containsKey(tlid) : "No record found for tlid: " + tlid +
213 " with records: +" + typeT.get(tlid);
214 }
215 }
216
217 public static void main(String[] args) {
218 DatabaseReader db = new DatabaseReader();
219
220 // each arg is a ZIP file
221 for (int i=0; i<args.length; i++) {
222 String zfstr = args[i];
223 try {
224 db.readZipFile(zfstr);
225 } catch (IOException e) {
226 System.out.println("IOEXCEPTION?");
227
228 } catch (OutOfMemoryError e) {
229
230 System.out.println("OUT OF MEMORY");
231
232 System.out.println("1 "+db.type1.size());
233 System.out.println("6 "+db.type6.size());
234
235 e.printStackTrace();
236 System.exit(-1);
237 }
238 }
239 db.checkMappingInv();
240 }
241
242 public void readZipFile(File zf) throws IOException {
243 readZipFile(new ZipFile(zf));
244 }
245
246 public void readZipFile(String zstr) throws IOException {
247 readZipFile(new ZipFile(zstr));
248 if (INFO)
249 System.out.println("finished with "+zstr);
250 }
251
252 private void readZipFile(ZipFile zf) throws IOException {
253 Enumeration<? extends ZipEntry> entries = zf.entries();
254 // build the type6 map for
255 while(entries.hasMoreElements()) {
256 ZipEntry entry = entries.nextElement();
257 this.readRecords(zf.getInputStream(entry));
258 }
259 zf.close();
260 }
261
262 public void readRecords(InputStream is) throws IOException {
263 LineNumberReader lnr = new LineNumberReader(new InputStreamReader(is));
264 for (String line=lnr.readLine(); line!=null; line=lnr.readLine() ) {
265 // System.out.print("*");
266 try {
267 switch(line.charAt(0)) {
268 case '1':
269 TigerRT1 rt1 = new TigerRT1(line);
270
271 if (rt1.getCfc().charAt(0) != 'a' &&
272 rt1.getCfc().charAt(0) != 'A') {
273 trashTLIDS.add(new Integer(rt1.getTLID()));
274 type6.remove(new Integer(rt1.getTLID()));
275 // Skip record
276 } else {
277 if (type1.containsKey(new Integer(rt1.getTLID())))
278 throw new RuntimeException("1: SOMETHING'S WRONG");
279 //Add record
280 type1.put(new Integer(rt1.getTLID()), rt1);
281 }
282 break;
283 case '6':
284 TigerRT6 rt6 = new TigerRT6(line);
285 if (!trashTLIDS.contains(new Integer(rt6.getTLID()))) {
286 //Add record
287 type6.get(new Integer(rt6.getTLID())).add(rt6);
288 } else {
289 // Skip record
290 }
291 break;
292 default:
293 // Skip record
294 }
295 } catch (BadRecordException e) {
296 // Skip record
297 }
298 }
299 }
300 }