1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6import logging 

7import os 

8from collections import namedtuple 

9import os.path as op 

10 

11import numpy as num 

12 

13from pyrocko import util, config 

14from pyrocko import orthodrome as od 

15from pyrocko.util import urlopen 

16 

17from itertools import chain 

18 

19 

20logger = logging.getLogger('pyrocko.dataset.geonames') 

21 

22GeoName = namedtuple('GeoName', ''' 

23geonameid 

24name 

25asciiname 

26alt_names 

27lat 

28lon 

29feature_class 

30feature_code 

31country_code 

32alt_country_code 

33admin1_code 

34admin2_code 

35admin3_code 

36admin4_code 

37population 

38elevation 

39dem 

40timezone 

41modification_date'''.split()) 

42 

43GeoName2 = namedtuple('GeoName2', ''' 

44name 

45asciiname 

46lat 

47lon 

48population 

49feature_code 

50'''.split()) 

51 

52 

53Country = namedtuple('Country', ''' 

54ISO 

55name 

56capital 

57population 

58area 

59feature_code 

60lat 

61lon 

62'''.split()) 

63 

64 

65base_url = 'https://mirror.pyrocko.org/download.geonames.org/export/dump/' 

66fallback_url = 'https://download.geonames.org/export/dump/' 

67 

68 

69def download_file(fn, dirpath): 

70 url = base_url + fn 

71 fpath = op.join(dirpath, fn) 

72 logger.info('starting download of %s' % url) 

73 

74 util.ensuredirs(fpath) 

75 try: 

76 f = urlopen(url) 

77 except OSError: 

78 logger.info('File not found in pyrocko mirror, ' 

79 'falling back to original source.') 

80 url = fallback_url + fn 

81 f = urlopen(url) 

82 

83 fpath_tmp = fpath + '.%i.temp' % os.getpid() 

84 g = open(fpath_tmp, 'wb') 

85 while True: 

86 data = f.read(1024) 

87 if not data: 

88 break 

89 g.write(data) 

90 

91 g.close() 

92 f.close() 

93 

94 os.rename(fpath_tmp, fpath) 

95 

96 logger.info('finished download of %s' % url) 

97 

98 

99def positive_region(region): 

100 west, east, south, north = [float(x) for x in region] 

101 

102 assert -180. - 360. <= west < 180. 

103 assert -180. < east <= 180. + 360. 

104 assert -90. <= south < 90. 

105 assert -90. < north <= 90. 

106 

107 if east < west: 

108 east += 360. 

109 

110 if west < -180.: 

111 west += 360. 

112 east += 360. 

113 

114 return (west, east, south, north) 

115 

116 

117def ascii_str(u): 

118 return u.encode('ascii', 'replace') 

119 

120 

121def get_file(zfn, fn): 

122 

123 geonames_dir = config.config().geonames_dir 

124 filepath = op.join(geonames_dir, zfn or fn) 

125 if not os.path.exists(filepath): 

126 download_file(zfn or fn, geonames_dir) 

127 

128 if zfn is not None: 

129 import zipfile 

130 z = zipfile.ZipFile(filepath, 'r') 

131 f = z.open(fn, 'r') 

132 else: 

133 z = None 

134 f = open(filepath, 'rb') 

135 

136 return f, z 

137 

138 

139def load_country_info(zfn, fn, minpop=1000000, minarea=10000): 

140 

141 f, z = get_file(zfn, fn) 

142 

143 countries = {} 

144 for line in f: 

145 t = line.split(b'\t') 

146 start = t[0].decode('utf8')[0] 

147 if start != '#': 

148 population = int(t[7]) 

149 area = float(t[6]) 

150 if minpop <= population and minarea <= area: 

151 iso = t[0].decode('utf8') 

152 name = t[4].decode('utf8') 

153 capital = t[5].decode('utf8') 

154 feature_code = str(t[16].decode('ascii')) 

155 countries[feature_code] = ( 

156 iso, name, capital, population, area, feature_code) 

157 

158 f.close() 

159 if z is not None: 

160 z.close() 

161 

162 return countries 

163 

164 

165def load_country_shapes_json(zfn, fn): 

166 

167 f, z = get_file(zfn, fn) 

168 

169 import json 

170 

171 data = json.load(f) 

172 fts = data["features"] 

173 

174 mid_points_countries = {} 

175 for ft in fts: 

176 geonameid = ft["properties"]["geoNameId"] 

177 coords = ft["geometry"]["coordinates"] 

178 coords_arr = num.vstack( 

179 [num.array(sco) for co in chain(coords) for sco in chain(co)]) 

180 latlon = od.geographic_midpoint(coords_arr[:, 1], coords_arr[:, 0]) 

181 mid_points_countries[geonameid] = latlon 

182 

183 return mid_points_countries 

184 

185 

186def get_countries_region(minpop=0, minarea=0, region=None): 

187 

188 country_infos = load_country_info( 

189 None, "countryInfo.txt", minpop=minpop, minarea=minarea) 

190 mid_points = load_country_shapes_json( 

191 "shapes_simplified_low.json.zip", "shapes_simplified_low.json") 

192 

193 if region: 

194 w, e, s, n = positive_region(region) 

195 

196 for geonameid, infos in country_infos.items(): 

197 try: 

198 lat, lon = mid_points[geonameid] 

199 except KeyError: 

200 lat = lon = None 

201 

202 if not region or ( 

203 (w <= lon <= e or w <= lon + 360. <= e) 

204 and (s <= lat <= n)): 

205 yield Country(*infos, lat, lon) 

206 

207 

208def load_cities(zfn, fn, minpop=1000000, region=None): 

209 

210 f, z = get_file(zfn, fn) 

211 

212 if region: 

213 w, e, s, n = positive_region(region) 

214 

215 for line in f: 

216 t = line.split(b'\t') 

217 pop = int(t[14]) 

218 if minpop <= pop: 

219 lat = float(t[4]) 

220 lon = float(t[5]) 

221 feature_code = str(t[7].decode('ascii')) 

222 if not region or ( 

223 (w <= lon <= e or w <= lon + 360. <= e) 

224 and (s <= lat <= n)): 

225 

226 yield GeoName2( 

227 t[1].decode('utf8'), 

228 str(t[2].decode('utf8').encode('ascii', 'replace') 

229 .decode('ascii')), 

230 lat, lon, pop, feature_code) 

231 

232 f.close() 

233 if z is not None: 

234 z.close() 

235 

236 

237g_cities = {} 

238 

239 

240def load_all_keep(zfn, fn, minpop=1000000, region=None, exclude=()): 

241 if (zfn, fn) not in g_cities: 

242 g_cities[zfn, fn] = list(load_cities(zfn, fn, minpop=0)) 

243 

244 if region: 

245 w, e, s, n = positive_region(region) 

246 return [c for c in g_cities[zfn, fn] 

247 if (minpop <= c.population and 

248 ((w <= c.lon <= e or w <= c.lon + 360. <= e) 

249 and (s <= c.lat <= n)) and 

250 c.feature_code not in exclude)] 

251 

252 else: 

253 return [c for c in g_cities[zfn, fn] if ( 

254 minpop <= c.population and c.feature_code not in exclude)] 

255 

256 

257def get_cities_region(region=None, minpop=0): 

258 return load_all_keep( 

259 'cities1000.zip', 'cities1000.txt', 

260 region=region, 

261 minpop=minpop, 

262 exclude=('PPLX',)) 

263 

264 

265def get_cities_by_name(name): 

266 cities = get_cities_region() 

267 candidates = [] 

268 for c in cities: 

269 if c.asciiname.lower() == name.lower(): 

270 candidates.append(c) 

271 

272 return candidates 

273 

274 

275def get_cities(lat, lon, radius, minpop=0): 

276 region = od.radius_to_region(lat, lon, radius) 

277 cities = get_cities_region(region, minpop=minpop) 

278 

279 clats = num.array([c.lat for c in cities]) 

280 clons = num.array([c.lon for c in cities]) 

281 

282 dists = od.distance_accurate50m_numpy(lat, lon, clats, clons) 

283 order = num.argsort(dists) 

284 cities_sorted = [cities[i] for i in order if dists[i] < radius] 

285 return cities_sorted