Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/dataset/geonames.py: 91%

152 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-01-15 12:05 +0000

1# http://pyrocko.org - GPLv3 

2# 

3# The Pyrocko Developers, 21st Century 

4# ---|P------/S----------~Lg---------- 

5 

6''' 

7Access to the cities and population data of 

8`GeoNames <https://www.geonames.org/>`_. 

9 

10The GeoNames geographical database covers all countries and contains over 

11eleven million placenames. 

12 

13This module provides quick access to a subset of GeoNames containing the cities 

14with a population size exceeding 1000. 

15''' 

16 

17import logging 

18import os 

19from collections import namedtuple 

20import os.path as op 

21 

22import numpy as num 

23 

24from pyrocko import util, config 

25from pyrocko import orthodrome as od 

26from pyrocko.util import urlopen 

27 

28from itertools import chain 

29 

30 

31logger = logging.getLogger('pyrocko.dataset.geonames') 

32 

33GeoName = namedtuple('GeoName', ''' 

34geonameid 

35name 

36asciiname 

37alt_names 

38lat 

39lon 

40feature_class 

41feature_code 

42country_code 

43alt_country_code 

44admin1_code 

45admin2_code 

46admin3_code 

47admin4_code 

48population 

49elevation 

50dem 

51timezone 

52modification_date'''.split()) 

53 

54GeoName2 = namedtuple('GeoName2', ''' 

55name 

56asciiname 

57lat 

58lon 

59population 

60feature_code 

61'''.split()) 

62 

63 

64Country = namedtuple('Country', ''' 

65ISO 

66name 

67capital 

68population 

69area 

70feature_code 

71lat 

72lon 

73'''.split()) 

74 

75 

76base_url = 'https://mirror.pyrocko.org/download.geonames.org/export/dump/' 

77fallback_url = 'https://download.geonames.org/export/dump/' 

78 

79 

80def download_file(fn, dirpath): 

81 url = base_url + fn 

82 fpath = op.join(dirpath, fn) 

83 logger.info('starting download of %s' % url) 

84 

85 util.ensuredirs(fpath) 

86 try: 

87 f = urlopen(url) 

88 except OSError: 

89 logger.info('File not found in pyrocko mirror, ' 

90 'falling back to original source.') 

91 url = fallback_url + fn 

92 f = urlopen(url) 

93 

94 fpath_tmp = fpath + '.%i.temp' % os.getpid() 

95 g = open(fpath_tmp, 'wb') 

96 while True: 

97 data = f.read(1024) 

98 if not data: 

99 break 

100 g.write(data) 

101 

102 g.close() 

103 f.close() 

104 

105 os.rename(fpath_tmp, fpath) 

106 

107 logger.info('finished download of %s' % url) 

108 

109 

110def positive_region(region): 

111 west, east, south, north = [float(x) for x in region] 

112 

113 assert -180. - 360. <= west < 180. 

114 assert -180. < east <= 180. + 360. 

115 assert -90. <= south < 90. 

116 assert -90. < north <= 90. 

117 

118 if east < west: 

119 east += 360. 

120 

121 if west < -180.: 

122 west += 360. 

123 east += 360. 

124 

125 return (west, east, south, north) 

126 

127 

128def ascii_str(u): 

129 return u.encode('ascii', 'replace') 

130 

131 

132def get_file(zfn, fn): 

133 

134 geonames_dir = config.config().geonames_dir 

135 filepath = op.join(geonames_dir, zfn or fn) 

136 if not os.path.exists(filepath): 

137 download_file(zfn or fn, geonames_dir) 

138 

139 if zfn is not None: 

140 import zipfile 

141 z = zipfile.ZipFile(filepath, 'r') 

142 f = z.open(fn, 'r') 

143 else: 

144 z = None 

145 f = open(filepath, 'rb') 

146 

147 return f, z 

148 

149 

150def load_country_info(zfn, fn, minpop=1000000, minarea=10000): 

151 

152 f, z = get_file(zfn, fn) 

153 

154 countries = {} 

155 for line in f: 

156 t = line.split(b'\t') 

157 start = t[0].decode('utf8')[0] 

158 if start != '#': 

159 population = int(t[7]) 

160 area = float(t[6]) 

161 if minpop <= population and minarea <= area: 

162 iso = t[0].decode('utf8') 

163 name = t[4].decode('utf8') 

164 capital = t[5].decode('utf8') 

165 feature_code = str(t[16].decode('ascii')) 

166 countries[feature_code] = ( 

167 iso, name, capital, population, area, feature_code) 

168 

169 f.close() 

170 if z is not None: 

171 z.close() 

172 

173 return countries 

174 

175 

176def load_country_shapes_json(zfn, fn): 

177 

178 f, z = get_file(zfn, fn) 

179 

180 import json 

181 

182 data = json.load(f) 

183 fts = data["features"] 

184 

185 mid_points_countries = {} 

186 for ft in fts: 

187 geonameid = ft["properties"]["geoNameId"] 

188 coords = ft["geometry"]["coordinates"] 

189 coords_arr = num.vstack( 

190 [num.array(sco) for co in chain(coords) for sco in chain(co)]) 

191 latlon = od.geographic_midpoint(coords_arr[:, 1], coords_arr[:, 0]) 

192 mid_points_countries[geonameid] = latlon 

193 

194 return mid_points_countries 

195 

196 

197def load_all_countries_keep(minpop=0, minarea=0, region=None): 

198 

199 country_infos = load_country_info( 

200 None, "countryInfo.txt", minpop=minpop, minarea=minarea) 

201 mid_points = load_country_shapes_json( 

202 "shapes_simplified_low.json.zip", "shapes_simplified_low.json") 

203 

204 if region: 

205 w, e, s, n = positive_region(region) 

206 

207 for geonameid, infos in country_infos.items(): 

208 try: 

209 lat, lon = mid_points[geonameid] 

210 except KeyError: 

211 lat = lon = None 

212 

213 if not region or ( 

214 (w <= lon <= e or w <= lon + 360. <= e) 

215 and (s <= lat <= n)): 

216 yield Country(*infos, lat, lon) 

217 

218 

219def get_countries_region(minpop=0, minarea=0, region=None): 

220 return list(load_all_countries_keep( 

221 minpop=minpop, minarea=minarea, region=region)) 

222 

223 

224def load_cities(zfn, fn, minpop=1000000, region=None): 

225 

226 f, z = get_file(zfn, fn) 

227 

228 if region: 

229 w, e, s, n = positive_region(region) 

230 

231 for line in f: 

232 t = line.split(b'\t') 

233 pop = int(t[14]) 

234 if minpop <= pop: 

235 lat = float(t[4]) 

236 lon = float(t[5]) 

237 feature_code = str(t[7].decode('ascii')) 

238 if not region or ( 

239 (w <= lon <= e or w <= lon + 360. <= e) 

240 and (s <= lat <= n)): 

241 

242 yield GeoName2( 

243 t[1].decode('utf8'), 

244 str(t[2].decode('utf8').encode('ascii', 'replace') 

245 .decode('ascii')), 

246 lat, lon, pop, feature_code) 

247 

248 f.close() 

249 if z is not None: 

250 z.close() 

251 

252 

253g_cities = {} 

254 

255 

256def load_all_keep(zfn, fn, minpop=1000000, region=None, exclude=()): 

257 if (zfn, fn) not in g_cities: 

258 g_cities[zfn, fn] = list(load_cities(zfn, fn, minpop=0)) 

259 

260 if region: 

261 w, e, s, n = positive_region(region) 

262 return [c for c in g_cities[zfn, fn] 

263 if (minpop <= c.population and 

264 ((w <= c.lon <= e or w <= c.lon + 360. <= e) 

265 and (s <= c.lat <= n)) and 

266 c.feature_code not in exclude)] 

267 

268 else: 

269 return [c for c in g_cities[zfn, fn] if ( 

270 minpop <= c.population and c.feature_code not in exclude)] 

271 

272 

273def get_cities_region(region=None, minpop=0): 

274 return load_all_keep( 

275 'cities1000.zip', 'cities1000.txt', 

276 region=region, 

277 minpop=minpop, 

278 exclude=('PPLX',)) 

279 

280 

281def get_cities_by_name(name): 

282 ''' 

283 Lookup city by name. 

284 

285 The comparison is done case-insensitive. 

286 

287 :param name: 

288 Name of the city to look for. 

289 :type name: 

290 str 

291 

292 :returns: 

293 Zero or more matching city entries. 

294 :rtype: 

295 :py:class:`list` of :py:class:`GeoName2` 

296 ''' 

297 cities = get_cities_region() 

298 candidates = [] 

299 for c in cities: 

300 if c.asciiname.lower() == name.lower(): 

301 candidates.append(c) 

302 

303 return candidates 

304 

305 

306def get_cities(lat, lon, radius, minpop=0): 

307 ''' 

308 Get cities in a given circular area. 

309 

310 :param lat: 

311 Latitude [deg]. 

312 :type lat: 

313 float 

314 

315 :param lon: 

316 Longitude [deg]. 

317 :type lon: 

318 float 

319 

320 :param radius: 

321 Search radius [m]. 

322 :type radius: 

323 float 

324 

325 :param minpop: 

326 Skip entries with population lower than this. 

327 :type minpop: 

328 int 

329 

330 :returns: 

331 Matching city entries. 

332 :rtype: 

333 :py:class:`list` of :py:class:`GeoName2` 

334 ''' 

335 region = od.radius_to_region(lat, lon, radius) 

336 cities = get_cities_region(region, minpop=minpop) 

337 

338 clats = num.array([c.lat for c in cities]) 

339 clons = num.array([c.lon for c in cities]) 

340 

341 dists = od.distance_accurate50m_numpy(lat, lon, clats, clons) 

342 order = num.argsort(dists) 

343 cities_sorted = [cities[i] for i in order if dists[i] < radius] 

344 return cities_sorted