001 package ps6; 002 003 import java.io.*; 004 import java.util.*; 005 006 import ps2.GeoPoint; 007 008 import ps4.*; 009 import ps6.tigerdb.*; 010 import ps6.tigerdb.DatabaseReader.GeoChain; 011 012 013 /** 014 * Functions as an iterator over the set of StreetSegments represented 015 * by the contents of the .zip files in a directory. 016 **/ 017 public class StreetSegIterator 018 extends ImmIterator<StreetSegment> 019 { 020 /** When true, zero-length street segments will be removed. 021 * Default is true. */ 022 private boolean filter_zero_length = true; 023 024 /** When true, filtering will be performed as determined by the 025 * killfile. Default is true. */ 026 private boolean filter_killfile = true; 027 028 /** When true, progress messages are sent to System.err as 029 * segments are read. Default is false. */ 030 private boolean mention_progress = false; 031 032 /** When true, warning messages are sent to System.err if segments 033 * are filtered. Default is false. */ 034 private boolean mention_filter = false; 035 036 /** When true, warning messages are sent to System.err if numbers 037 * are not disjoint. Default is false. */ 038 private boolean mention_non_disjoint = false; 039 040 /** 041 * @requires files != null && 042 * elements of files are of type java.io.File && 043 * elements of files are .zip files 044 * 045 * @effects creates a new iterator that produces all segments 046 * from the given files that are accepted by the filter 047 **/ 048 public StreetSegIterator(Iterator<File> files, StreetSegmentFilter filter) { 049 this.files = files; 050 this.filter = filter; 051 } 052 053 private boolean initialized = false; 054 055 private StreetSegmentFilter filter; 056 057 /** .zip files to be read */ 058 private Iterator<File> files; 059 060 /** chains from the current file */ 061 private Iterator<GeoChain> chains; 062 063 /** next segment to be returned or null if there are no more */ 064 private StreetSegment next; 065 066 /** number of segments returned so far */ 067 private long total = 0; 068 069 public boolean hasNext() { 070 if (!initialized) { 071 initialized = true; 072 next = nextSegment(); 073 } 074 075 return (next != null); 076 } 077 078 public StreetSegment next() { 079 // standard iterator behavior 080 if (!hasNext()) { 081 throw new NoSuchElementException(); 082 } 083 084 // grab the segment to be returned, then advance to the next one 085 StreetSegment result = next; 086 next = nextSegment(); 087 088 // instrument reading process, because it's a bit slow 089 total++; 090 if (mention_progress && ((total % 10000) == 0)) { 091 System.err.println("Returning "+total+"th StreetSegment"); 092 System.err.flush(); 093 } 094 095 return result; 096 } 097 098 /** 099 * @return the next segment from the files (post-filtering), or null if none exist 100 */ 101 private StreetSegment nextSegment() { 102 // grab the next chain from the file 103 GeoChain chain = nextChain(); 104 if (chain == null) { 105 return null; 106 } 107 108 // make a segment from it 109 StreetSegment candidate = makeSegment(chain); 110 111 // if segment could not be made, try again 112 if (candidate == null) { 113 return nextSegment(); 114 } 115 116 // if segment isn't accepted by the filter, try again 117 if (filter_killfile && !filter.apply(candidate)) { 118 if (mention_filter) { 119 System.err.println("Filtered out: " + candidate); 120 } 121 return nextSegment(); 122 } 123 124 // otherwise, it was a good segment 125 return candidate; 126 } 127 128 129 /** 130 * Retrieve the next GeoChain contained in the file(s) 131 * 132 * @return the next GeoChain contained in the file(s), 133 * or null if there are no more files left 134 */ 135 private GeoChain nextChain() { 136 // return a chain if we have one ... 137 if (chains != null && chains.hasNext()) { 138 return chains.next(); 139 } 140 141 // else, advance to the next file... 142 if (!files.hasNext()) { 143 return null; 144 } 145 File fileToRead = files.next(); 146 if (mention_progress) { 147 System.err.println("Reading from " + fileToRead); 148 System.err.flush(); 149 } 150 151 // ... and open it ... 152 try { 153 DatabaseReader dr = new DatabaseReader(); 154 chains = dr.geoChains(fileToRead); 155 } catch (IOException ioe) { 156 throw new RuntimeException("IOException: " + ioe.getMessage()); 157 } 158 159 // ... and try again 160 return nextChain(); 161 } 162 163 /** 164 * Create a Street Segment from a Geo Chain 165 * @param chain 166 * @return a segment created from the chain, 167 * or null if the segment is not desirable 168 */ 169 private StreetSegment makeSegment(GeoChain chain) { 170 GeoPoint p1 = chain.getRT1().getStart(); 171 GeoPoint p2 = chain.getRT1().getEnd(); 172 String name = chain.getRT1().getFeature().fullName(); 173 174 if (filter_zero_length && p1.equals(p2)) { 175 if (mention_filter) { 176 System.err.println("Filtered out zero-length segment named " + name); 177 System.err.flush(); 178 } 179 return null; 180 } 181 182 String lftAddr = chain.getLeftAddresses(); 183 String rgtAddr = chain.getRightAddresses(); 184 if (!chain.sidesDisjoint()) { 185 if (mention_non_disjoint) { 186 System.err.println("Numbers on " + name + " were not disjoint, so were changed to empty sets"); 187 System.err.flush(); 188 } 189 rgtAddr = lftAddr = ""; 190 } 191 192 StreetNumberSet leftSns = makeSNS(lftAddr); 193 StreetNumberSet rightSns = makeSNS(rgtAddr); 194 195 String leftZip = chain.getRT1().getLeftZip(); 196 String rightZip = chain.getRT1().getRightZip(); 197 198 StreetClassification streetClass = getStreetClass(chain); 199 boolean incAddr = areAddressesIncreasing(chain); 200 201 return new StreetSegment(p1, p2, name.intern(), leftSns, rightSns, 202 leftZip, rightZip, streetClass, incAddr); 203 } 204 205 206 private static final StreetNumberSet EMPTY_SNS = new StreetNumberSet(""); 207 private static StreetNumberSet makeSNS(String s) 208 { 209 if (s.length() == 0) return EMPTY_SNS; 210 return new StreetNumberSet(s); 211 } 212 213 private static StreetClassification getStreetClass(GeoChain gc) { 214 String s = gc.getRT1().getCfc().toLowerCase(); 215 216 if (s.charAt(0) == 'a' || s.charAt(0) == 'A') { 217 switch (s.charAt(1)) { 218 case '1': 219 case '2': 220 return StreetClassification.PRIM_HWY; 221 case '3': 222 return StreetClassification.SEC_HWY; 223 case '4': 224 return StreetClassification.LOCAL_ROAD; 225 default: 226 return StreetClassification.UNKNOWN; 227 } 228 } else { 229 return StreetClassification.UNKNOWN; 230 } 231 } 232 233 private static boolean areAddressesIncreasing(GeoChain gc) { 234 return gc.getRT1().getLeftRange().couldBeLowToHigh(); 235 } 236 }