001    package ps6.tigerdb;
002    
003    import java.io.*;
004    import java.util.*;
005    import java.util.zip.ZipFile;
006    import java.util.zip.ZipEntry;
007    
008    public class DatabaseReader {
009    
010        public static boolean INFO = false;
011    
012        // private Map type1 = new HashMap(); // TLID -> TigerRT1
013        private Map<Integer, TigerRT1> type1 = new TreeMap<Integer, TigerRT1>(); // TLID -> TigerRT1
014        private Map<Integer, List<TigerRT6>> type6 = new HashMap<Integer, List<TigerRT6>>(); // TLID ->* TigerRT6
015    
016        // tracks tlids that we've thrown out as useless for our purposes
017        // (b/c their TigerRT1's weren't Roads, they were Rails or
018        // something...)
019        private Set<Integer> trashTLIDS = new HashSet<Integer>();
020    
021        public class GeoChain {
022            private final TigerRT1 rt1;
023            private final Collection<TigerRT6> rt6s; // Set[TigerRT2]
024    
025            private /*@LazyNonNull*/ IntSet leftSet = null; //cached street numbers on the left
026            private /*@LazyNonNull*/ IntSet rightSet = null; //cached street numbers on the right
027    
028            GeoChain(TigerRT1 rt1, Collection<TigerRT6> rt6s) {
029                this.rt1  = rt1;
030                this.rt6s = rt6s;
031            }
032            /**
033             * @return the overall Route information (the RT1)
034             */
035            public TigerRT1 getRT1() {
036                return rt1;
037            }
038    
039            /**
040             * @return the small pieces of Route information in a list of RT6s
041             */
042            public Collection<TigerRT6> getRT6s() {
043                return Collections.unmodifiableCollection(rt6s);
044            }
045    
046            /**
047             * Get the set of addresses on the left
048             */
049            public String getLeftAddresses() {
050                cacheLeftAddresses();
051                assert leftSet != null : "@SuppressWarnings(nullness)"; //assume cacheLeftAddresses() works
052                return leftSet.unparse();
053            }
054    
055            /**
056             * Get the set of addresses on the right
057             */
058            public String getRightAddresses() {
059                cacheRightAddresses();
060                assert rightSet != null : "@SuppressWarnings(nullness)"; //assume cacheRightAddresses() works
061                return rightSet.unparse();
062            }
063    
064            //store the addresses on the right, if they havn't yet been stored
065            private void cacheRightAddresses() {
066                if (rightSet == null) {
067                    rightSet = getRT1().getRightRange().getSet();
068                    for (TigerRT6 rt6 : getRT6s()) {
069                        rightSet = rightSet.union(rt6.getRightRange().getSet());
070                    }
071                }
072            }
073    
074            //store the addresses on the left, if they havn't yet been stored
075            private void cacheLeftAddresses() {
076                if (leftSet == null) {
077                    leftSet = getRT1().getLeftRange().getSet();
078                    for (TigerRT6 rt6 : getRT6s()) {
079                        leftSet = leftSet.union(rt6.getLeftRange().getSet());
080                    }
081                }
082            }
083    
084            /**
085             * @return true iff the street numbers on the sides of this
086             * chain contain no common elements.
087             */
088            public boolean sidesDisjoint() {
089                cacheLeftAddresses();
090                cacheRightAddresses();
091                assert leftSet != null : "@SuppressWarnings(nullness)"; //assume cacheLeftAddresses() works
092                assert rightSet != null : "@SuppressWarnings(nullness)"; //assume cacheRightAddresses() works
093                return leftSet.isDisjoint(rightSet);
094            }
095        }
096    
097        /**
098         * @effects  Returns an Iterator[GeoChain] over the GeoChains currently
099         *           stored in this
100         * @requires this is not modified while the returned iterator is
101         *          in use
102         */
103        public Iterator<GeoChain> geoChains() {
104            return new ImmIterator<GeoChain>() {
105                    Iterator</*@KeyFor("type1")*/ Integer> tlids = type1.keySet().iterator();
106                    public boolean hasNext() {
107                        return tlids.hasNext();
108                    }
109                    public GeoChain next() {
110                        Integer tlid = tlids.next();
111                        TigerRT1 rt1 = type1.get(tlid);
112                        assert rt1 != null : "@SuppressWarnings(nullness)"; //Guaranteed since tlids is the keyset of type1
113    
114                        Collection<TigerRT6> rt6s = type6.containsKey(tlid) ? type6.get(tlid) : new HashSet<TigerRT6>();
115                        rt6s = Collections.unmodifiableCollection(rt6s);
116    
117                        return new GeoChain(rt1, rt6s);
118                    }
119                };
120        }
121    
122        public Iterator<GeoChain> geoChains(File zf) throws IOException {
123            return geoChains(new ZipFile(zf));
124        }
125        public Iterator<GeoChain> geoChains(String zf) throws IOException {
126            return geoChains(new ZipFile(zf));
127        }
128        private Iterator<GeoChain> geoChains(ZipFile zf) {
129            // prep by reading type6 records first
130            try {
131                Enumeration<? extends ZipEntry> entries = zf.entries();
132                while (entries.hasMoreElements()) {
133                    ZipEntry entry = entries.nextElement();
134                    if (entry.getName().toLowerCase().endsWith("rt6")) {
135                        this.readRecords(zf.getInputStream(entry));
136                        break;
137                    }
138                }
139    
140                entries = zf.entries();
141                ZipEntry mainEntry = null;
142                while (entries.hasMoreElements()) {
143                    ZipEntry entry = entries.nextElement();
144                    if (entry.getName().toLowerCase().endsWith("rt1")) {
145                        mainEntry = entry;
146                        break;
147                    }
148                }
149                if (mainEntry == null) {
150                    return Collections.<GeoChain>emptyList().iterator();
151                }
152                final LineNumberReader lnr =
153                    new LineNumberReader
154                    (new InputStreamReader(zf.getInputStream(mainEntry)));
155    
156                // now return the nifty read-and-throw-away GeoChain iterator
157                return new ImmIterator<GeoChain>() {
158                        /*@Nullable*/ TigerRT1 rt1 = null;
159                        private void advance() {
160                            while (rt1 == null) {
161                                String line;
162                                try {
163                                    line = lnr.readLine();
164                                } catch (IOException ioe) {
165                                    line = null;
166                                }
167                                if (line == null) {
168                                    return;
169                                }
170                                try {
171                                    rt1 = new TigerRT1(line);
172                                    assert rt1 != null : "@SuppressWarnings(nullness)";//Guaranteed by constructor
173    
174                                    if (rt1.getCfc().charAt(0) != 'a' &&
175                                        rt1.getCfc().charAt(0) != 'A') {
176                                        trashTLIDS.add(new Integer(rt1.getTLID()));
177                                        type6.remove(new Integer(rt1.getTLID()));
178                                        // System.out.println("1: ("+type1.size()+") Skipping "+rt1);
179                                        rt1 = null;
180                                    }
181                                } catch (BadRecordException bre) {
182                                    // System.out.println("bad record: " + bre.getMessage());
183                                    rt1 = null;
184                                }
185                            }
186                        }
187                        public boolean hasNext() {
188                            advance();
189                            return (rt1 != null);
190                        }
191    
192                        public GeoChain next() {
193                            if (rt1 == null) {
194                                throw new NoSuchElementException();
195                            } else {
196                                int tlid = rt1.getTLID();
197    
198                                GeoChain gc = type6.containsKey(tlid) ? new GeoChain(rt1,type6.get(tlid)) :
199                                    new GeoChain(rt1,new ArrayList<TigerRT6>());
200    
201                                rt1 = null;
202                                advance();
203                                return gc;
204                            }
205                        }
206                    };
207            } catch (IOException ioe) {
208                throw new RuntimeException(ioe.getMessage());
209            }
210        }
211    
212        public DatabaseReader() {
213    
214        }
215    
216        // checks internal state of this to increase confidence that the
217        // data set isn't screwy
218        public void checkMappingInv() {
219            checkOneMappingInv(type6);
220        }
221    
222        // checks that every key in typeT has an RT1 in type1
223        private void checkOneMappingInv(Map<Integer,?> typeT) {
224            for (Integer tlid : typeT.keySet()) {
225    
226                assert type1.containsKey(tlid) : "No record found for tlid: " + tlid +
227                    " with records: +" + typeT.get(tlid);
228            }
229        }
230    
231        public static void main(String[] args) {
232            DatabaseReader db = new DatabaseReader();
233    
234            // each arg is a ZIP file
235            for (int i=0; i<args.length; i++) {
236                String zfstr = args[i];
237                try {
238                    db.readZipFile(zfstr);
239                } catch (IOException e) {
240                    System.out.println("IOEXCEPTION?");
241    
242                } catch (OutOfMemoryError e) {
243    
244                    System.out.println("OUT OF MEMORY");
245    
246                    System.out.println("1 "+db.type1.size());
247                    System.out.println("6 "+db.type6.size());
248    
249                    e.printStackTrace();
250                    System.exit(-1);
251                }
252            }
253            db.checkMappingInv();
254        }
255    
256        public void readZipFile(File zf) throws IOException {
257            readZipFile(new ZipFile(zf));
258        }
259    
260        public void readZipFile(String zstr) throws IOException {
261            readZipFile(new ZipFile(zstr));
262            if (INFO)
263                System.out.println("finished with "+zstr);
264        }
265    
266        private void readZipFile(ZipFile zf) throws IOException {
267            Enumeration<? extends ZipEntry> entries = zf.entries();
268            // build the type6 map for
269            while (entries.hasMoreElements()) {
270                ZipEntry entry = entries.nextElement();
271                this.readRecords(zf.getInputStream(entry));
272            }
273            zf.close();
274        }
275    
276        public void readRecords(InputStream is) throws IOException {
277            LineNumberReader lnr = new LineNumberReader(new InputStreamReader(is));
278            for (String line=lnr.readLine(); line!=null; line=lnr.readLine() ) {
279                // System.out.print("*");
280                try {
281                    switch(line.charAt(0)) {
282                    case '1':
283                        TigerRT1 rt1 = new TigerRT1(line);
284    
285                        if (rt1.getCfc().charAt(0) != 'a' &&
286                            rt1.getCfc().charAt(0) != 'A') {
287                            trashTLIDS.add(new Integer(rt1.getTLID()));
288                            type6.remove(new Integer(rt1.getTLID()));
289                         // Skip record
290                        } else {
291                            if (type1.containsKey(new Integer(rt1.getTLID())))
292                                throw new RuntimeException("1: SOMETHING'S WRONG");
293                            //Add record
294                            type1.put(new Integer(rt1.getTLID()), rt1);
295                        }
296                        break;
297                    case '6':
298                        TigerRT6 rt6 = new TigerRT6(line);
299                        if (!trashTLIDS.contains(new Integer(rt6.getTLID()))) {
300                            //Add record
301                            int tlid = rt6.getTLID();
302                            if (type6.containsKey(tlid)) {
303                                type6.get(tlid).add(rt6);
304                            } else {
305                                List<TigerRT6> x = new ArrayList<TigerRT6>();
306                                x.add(rt6);
307                                type6.put(tlid,x);
308                            }
309                        } else {
310                            // Skip record
311                        }
312                        break;
313                    default:
314                        // Skip record
315                    }
316                } catch (BadRecordException e) {
317                    // Skip record
318                }
319            }
320        }
321    }