001    package ps6.tigerdb;
002    
003    import java.io.*;
004    import java.util.*;
005    import java.util.zip.ZipFile;
006    import java.util.zip.ZipEntry;
007    
008    public class DatabaseReader {
009    
010        public static boolean INFO = false;
011    
012        // private Map type1 = new HashMap(); // TLID -> TigerRT1
013        private Map<Integer, TigerRT1> type1 = new TreeMap<Integer, TigerRT1>(); // TLID -> TigerRT1
014        private ToBag<Integer, TigerRT6> type6 = new ToBag<Integer, TigerRT6>(); // TLID ->* TigerRT6
015    
016        // tracks tlids that we've thrown out as useless for our purposes
017        // (b/c their TigerRT1's weren't Roads, they were Rails or
018        // something...)
019        private Set<Integer> trashTLIDS = new HashSet<Integer>(); 
020    
021        public class GeoChain {
022            private final TigerRT1 rt1;
023            private final Collection<TigerRT6> rt6s; // Set[TigerRT2]
024    
025            private IntSet leftSet = null; //cached street numbers on the left
026            private IntSet rightSet = null; //cached street numbers on the right
027    
028            GeoChain(TigerRT1 rt1, Collection<TigerRT6> rt6s) {
029                this.rt1  = rt1;
030                this.rt6s = rt6s;
031            }
032            /**
033             * @return the overall Route information (the RT1)
034             */
035            public TigerRT1 getRT1() {
036                return rt1;
037            }
038    
039            /**
040             * @return the small pieces of Route information in a list of RT6s
041             */
042            public Collection<TigerRT6> getRT6s() {
043                return Collections.unmodifiableCollection(rt6s);
044            }
045    
046            /**
047             * Get the set of addresses on the left
048             */
049            public String getLeftAddresses() {
050                cacheLeftAddresses();
051                return leftSet.unparse();
052            }
053    
054            /**
055             * Get the set of addresses on the right
056             */
057            public String getRightAddresses() {
058                cacheRightAddresses();
059                return rightSet.unparse();
060            }
061            
062            //store the addresses on the right, if they havn't yet been stored
063            private void cacheRightAddresses() {
064                if (rightSet == null) {
065                    rightSet = getRT1().getRightRange().getSet();
066                    for (TigerRT6 rt6 : getRT6s()) {
067                        rightSet = rightSet.union(rt6.getRightRange().getSet());
068                    }
069                }
070            }
071    
072            //store the addresses on the left, if they havn't yet been stored
073            private void cacheLeftAddresses() {
074                if (leftSet == null) {
075                    leftSet = getRT1().getLeftRange().getSet();
076                    for (TigerRT6 rt6 : getRT6s()) {
077                        leftSet = leftSet.union(rt6.getLeftRange().getSet());
078                    }
079                }
080            }
081            
082            /**
083             * @return true iff the street numbers on the sides of this
084             * chain contain no common elements.
085             */
086            public boolean sidesDisjoint() {
087                cacheLeftAddresses();
088                cacheRightAddresses();
089                return leftSet.isDisjoint(rightSet);
090            }
091        }
092    
093        /**
094         * @effects  Returns an Iterator[GeoChain] over the GeoChains currently
095         *           stored in this
096         * @requires this is not modified while the returned iterator is
097         *          in use
098         */
099        public Iterator<GeoChain> geoChains() {
100            return new ImmIterator<GeoChain>() {
101                    Iterator<Integer> tlids = type1.keySet().iterator();
102                    public boolean hasNext() {
103                        return tlids.hasNext();
104                    }
105                    public GeoChain next() {
106                        Integer tlid = tlids.next();
107                        TigerRT1 rt1 = type1.get(tlid);
108    
109                        Collection<TigerRT6> rt6s = type6.get(tlid);
110                        rt6s = Collections.unmodifiableCollection(rt6s);
111    
112                        return new GeoChain(rt1, rt6s);
113                    }
114                };
115        }
116    
117        public Iterator<GeoChain> geoChains(File zf) throws IOException { 
118            return geoChains(new ZipFile(zf));
119        }
120        public Iterator<GeoChain> geoChains(String zf) throws IOException { 
121            return geoChains(new ZipFile(zf));
122        }
123        private Iterator<GeoChain> geoChains(ZipFile zf) {
124            // prep by reading type6 records first
125            try {
126                Enumeration<? extends ZipEntry> entries = zf.entries();
127                while(entries.hasMoreElements()) {
128                    ZipEntry entry = entries.nextElement();
129                    if (entry.getName().toLowerCase().endsWith("rt6")) {
130                        this.readRecords(zf.getInputStream(entry));
131                        break;
132                    }
133                }
134            
135                entries = zf.entries();
136                ZipEntry mainEntry = null;
137                while(entries.hasMoreElements()) {
138                    ZipEntry entry = entries.nextElement();
139                    if (entry.getName().toLowerCase().endsWith("rt1")) {
140                        mainEntry = entry;
141                        break;
142                    }
143                }
144                if (mainEntry == null) {
145                    return Collections.<GeoChain>emptyList().iterator();
146                }
147                final LineNumberReader lnr = 
148                    new LineNumberReader
149                    (new InputStreamReader(zf.getInputStream(mainEntry))); 
150            
151                // now return the nifty read-and-throw-away GeoChain iterator 
152                return new ImmIterator<GeoChain>() {
153                        TigerRT1 rt1 = null;
154                        private void advance() {
155                            while (rt1 == null) {
156                                String line;
157                                try {
158                                    line = lnr.readLine();
159                                } catch (IOException ioe) {
160                                    line = null;
161                                }
162                                if (line == null) {
163                                    return;
164                                }
165                                try {
166                                    rt1 = new TigerRT1(line);
167                                    if (rt1.getCfc().charAt(0) != 'a' &&
168                                        rt1.getCfc().charAt(0) != 'A') {
169                                        trashTLIDS.add(new Integer(rt1.getTLID()));
170                                        type6.remove(new Integer(rt1.getTLID()));
171                                        // System.out.println("1: ("+type1.size()+") Skipping "+rt1);
172                                        rt1 = null;
173                                    }
174                                } catch (BadRecordException bre) {
175                                    // System.out.println("bad record: " + bre.getMessage());
176                                    rt1 = null;
177                                }
178                            }
179                        }
180                        public boolean hasNext() {
181                            advance(); 
182                            return (rt1 != null);
183                        }
184                        public GeoChain next() {
185                            GeoChain gc = 
186                                new GeoChain(rt1,type6.get(new Integer(rt1.getTLID())));
187                            rt1 = null;
188                            advance();
189                            return gc;
190                        }
191                    };
192            } catch (IOException ioe) {
193                throw new RuntimeException(ioe.getMessage());
194            }
195        }
196    
197        public DatabaseReader() {
198    
199        }
200    
201        // checks internal state of this to increase confidence that the
202        // data set isn't screwy 
203        public void checkMappingInv() {
204            checkOneMappingInv(type6);
205        }
206    
207        // checks that every key in typeT has an RT1 in type1
208        private void checkOneMappingInv(Map<Integer,?> typeT) {
209            for (Iterator<Integer> tki = typeT.keySet().iterator();tki.hasNext();) {
210                Integer tlid = (Integer) tki.next();
211                
212                assert type1.containsKey(tlid) : "No record found for tlid: " + tlid +
213                    " with records: +" + typeT.get(tlid);
214            }
215        }
216    
217        public static void main(String[] args) {
218            DatabaseReader db = new DatabaseReader();
219    
220            // each arg is a ZIP file
221            for (int i=0; i<args.length; i++) {
222                String zfstr = args[i];
223                try {
224                    db.readZipFile(zfstr);
225                } catch (IOException e) {
226                    System.out.println("IOEXCEPTION?");
227            
228                } catch (OutOfMemoryError e) {
229            
230                    System.out.println("OUT OF MEMORY");
231    
232                    System.out.println("1 "+db.type1.size());
233                    System.out.println("6 "+db.type6.size());
234            
235                    e.printStackTrace();
236                    System.exit(-1);
237                }
238            }
239            db.checkMappingInv();
240        }
241    
242        public void readZipFile(File zf) throws IOException {
243            readZipFile(new ZipFile(zf));
244        }
245    
246        public void readZipFile(String zstr) throws IOException {
247            readZipFile(new ZipFile(zstr));
248            if (INFO) 
249                System.out.println("finished with "+zstr);
250        }
251        
252        private void readZipFile(ZipFile zf) throws IOException {
253            Enumeration<? extends ZipEntry> entries = zf.entries();
254            // build the type6 map for 
255            while(entries.hasMoreElements()) {
256                ZipEntry entry = entries.nextElement();
257                this.readRecords(zf.getInputStream(entry));
258            }
259            zf.close();
260        }
261    
262        public void readRecords(InputStream is) throws IOException {
263            LineNumberReader lnr = new LineNumberReader(new InputStreamReader(is));
264            for (String line=lnr.readLine(); line!=null; line=lnr.readLine() ) {
265                // System.out.print("*");
266                try {
267                    switch(line.charAt(0)) {
268                    case '1': 
269                        TigerRT1 rt1 = new TigerRT1(line);
270                
271                        if (rt1.getCfc().charAt(0) != 'a' && 
272                            rt1.getCfc().charAt(0) != 'A') {
273                            trashTLIDS.add(new Integer(rt1.getTLID()));
274                            type6.remove(new Integer(rt1.getTLID()));
275                         // Skip record
276                        } else {
277                            if (type1.containsKey(new Integer(rt1.getTLID()))) 
278                                throw new RuntimeException("1: SOMETHING'S WRONG");
279                            //Add record
280                            type1.put(new Integer(rt1.getTLID()), rt1);
281                        }
282                        break;
283                    case '6': 
284                        TigerRT6 rt6 = new TigerRT6(line);
285                        if (!trashTLIDS.contains(new Integer(rt6.getTLID()))) {
286                            //Add record
287                            type6.get(new Integer(rt6.getTLID())).add(rt6);               
288                        } else {
289                            // Skip record
290                        }
291                        break; 
292                    default:
293                            // Skip record
294                    }
295                } catch (BadRecordException e) {
296                    // Skip record
297                }
298            }
299        }
300    }