Coverage for /usr/local/lib/python3.11/dist-packages/pyrocko/dataset/geonames.py: 91%
152 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-01-15 12:05 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-01-15 12:05 +0000
1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Access to the cities and population data of
8`GeoNames <https://www.geonames.org/>`_.
10The GeoNames geographical database covers all countries and contains over
11eleven million placenames.
13This module provides quick access to a subset of GeoNames containing the cities
14with a population size exceeding 1000.
15'''
17import logging
18import os
19from collections import namedtuple
20import os.path as op
22import numpy as num
24from pyrocko import util, config
25from pyrocko import orthodrome as od
26from pyrocko.util import urlopen
28from itertools import chain
31logger = logging.getLogger('pyrocko.dataset.geonames')
33GeoName = namedtuple('GeoName', '''
34geonameid
35name
36asciiname
37alt_names
38lat
39lon
40feature_class
41feature_code
42country_code
43alt_country_code
44admin1_code
45admin2_code
46admin3_code
47admin4_code
48population
49elevation
50dem
51timezone
52modification_date'''.split())
54GeoName2 = namedtuple('GeoName2', '''
55name
56asciiname
57lat
58lon
59population
60feature_code
61'''.split())
64Country = namedtuple('Country', '''
65ISO
66name
67capital
68population
69area
70feature_code
71lat
72lon
73'''.split())
76base_url = 'https://mirror.pyrocko.org/download.geonames.org/export/dump/'
77fallback_url = 'https://download.geonames.org/export/dump/'
80def download_file(fn, dirpath):
81 url = base_url + fn
82 fpath = op.join(dirpath, fn)
83 logger.info('starting download of %s' % url)
85 util.ensuredirs(fpath)
86 try:
87 f = urlopen(url)
88 except OSError:
89 logger.info('File not found in pyrocko mirror, '
90 'falling back to original source.')
91 url = fallback_url + fn
92 f = urlopen(url)
94 fpath_tmp = fpath + '.%i.temp' % os.getpid()
95 g = open(fpath_tmp, 'wb')
96 while True:
97 data = f.read(1024)
98 if not data:
99 break
100 g.write(data)
102 g.close()
103 f.close()
105 os.rename(fpath_tmp, fpath)
107 logger.info('finished download of %s' % url)
110def positive_region(region):
111 west, east, south, north = [float(x) for x in region]
113 assert -180. - 360. <= west < 180.
114 assert -180. < east <= 180. + 360.
115 assert -90. <= south < 90.
116 assert -90. < north <= 90.
118 if east < west:
119 east += 360.
121 if west < -180.:
122 west += 360.
123 east += 360.
125 return (west, east, south, north)
128def ascii_str(u):
129 return u.encode('ascii', 'replace')
132def get_file(zfn, fn):
134 geonames_dir = config.config().geonames_dir
135 filepath = op.join(geonames_dir, zfn or fn)
136 if not os.path.exists(filepath):
137 download_file(zfn or fn, geonames_dir)
139 if zfn is not None:
140 import zipfile
141 z = zipfile.ZipFile(filepath, 'r')
142 f = z.open(fn, 'r')
143 else:
144 z = None
145 f = open(filepath, 'rb')
147 return f, z
150def load_country_info(zfn, fn, minpop=1000000, minarea=10000):
152 f, z = get_file(zfn, fn)
154 countries = {}
155 for line in f:
156 t = line.split(b'\t')
157 start = t[0].decode('utf8')[0]
158 if start != '#':
159 population = int(t[7])
160 area = float(t[6])
161 if minpop <= population and minarea <= area:
162 iso = t[0].decode('utf8')
163 name = t[4].decode('utf8')
164 capital = t[5].decode('utf8')
165 feature_code = str(t[16].decode('ascii'))
166 countries[feature_code] = (
167 iso, name, capital, population, area, feature_code)
169 f.close()
170 if z is not None:
171 z.close()
173 return countries
176def load_country_shapes_json(zfn, fn):
178 f, z = get_file(zfn, fn)
180 import json
182 data = json.load(f)
183 fts = data["features"]
185 mid_points_countries = {}
186 for ft in fts:
187 geonameid = ft["properties"]["geoNameId"]
188 coords = ft["geometry"]["coordinates"]
189 coords_arr = num.vstack(
190 [num.array(sco) for co in chain(coords) for sco in chain(co)])
191 latlon = od.geographic_midpoint(coords_arr[:, 1], coords_arr[:, 0])
192 mid_points_countries[geonameid] = latlon
194 return mid_points_countries
197def load_all_countries_keep(minpop=0, minarea=0, region=None):
199 country_infos = load_country_info(
200 None, "countryInfo.txt", minpop=minpop, minarea=minarea)
201 mid_points = load_country_shapes_json(
202 "shapes_simplified_low.json.zip", "shapes_simplified_low.json")
204 if region:
205 w, e, s, n = positive_region(region)
207 for geonameid, infos in country_infos.items():
208 try:
209 lat, lon = mid_points[geonameid]
210 except KeyError:
211 lat = lon = None
213 if not region or (
214 (w <= lon <= e or w <= lon + 360. <= e)
215 and (s <= lat <= n)):
216 yield Country(*infos, lat, lon)
219def get_countries_region(minpop=0, minarea=0, region=None):
220 return list(load_all_countries_keep(
221 minpop=minpop, minarea=minarea, region=region))
224def load_cities(zfn, fn, minpop=1000000, region=None):
226 f, z = get_file(zfn, fn)
228 if region:
229 w, e, s, n = positive_region(region)
231 for line in f:
232 t = line.split(b'\t')
233 pop = int(t[14])
234 if minpop <= pop:
235 lat = float(t[4])
236 lon = float(t[5])
237 feature_code = str(t[7].decode('ascii'))
238 if not region or (
239 (w <= lon <= e or w <= lon + 360. <= e)
240 and (s <= lat <= n)):
242 yield GeoName2(
243 t[1].decode('utf8'),
244 str(t[2].decode('utf8').encode('ascii', 'replace')
245 .decode('ascii')),
246 lat, lon, pop, feature_code)
248 f.close()
249 if z is not None:
250 z.close()
253g_cities = {}
256def load_all_keep(zfn, fn, minpop=1000000, region=None, exclude=()):
257 if (zfn, fn) not in g_cities:
258 g_cities[zfn, fn] = list(load_cities(zfn, fn, minpop=0))
260 if region:
261 w, e, s, n = positive_region(region)
262 return [c for c in g_cities[zfn, fn]
263 if (minpop <= c.population and
264 ((w <= c.lon <= e or w <= c.lon + 360. <= e)
265 and (s <= c.lat <= n)) and
266 c.feature_code not in exclude)]
268 else:
269 return [c for c in g_cities[zfn, fn] if (
270 minpop <= c.population and c.feature_code not in exclude)]
273def get_cities_region(region=None, minpop=0):
274 return load_all_keep(
275 'cities1000.zip', 'cities1000.txt',
276 region=region,
277 minpop=minpop,
278 exclude=('PPLX',))
281def get_cities_by_name(name):
282 '''
283 Lookup city by name.
285 The comparison is done case-insensitive.
287 :param name:
288 Name of the city to look for.
289 :type name:
290 str
292 :returns:
293 Zero or more matching city entries.
294 :rtype:
295 :py:class:`list` of :py:class:`GeoName2`
296 '''
297 cities = get_cities_region()
298 candidates = []
299 for c in cities:
300 if c.asciiname.lower() == name.lower():
301 candidates.append(c)
303 return candidates
306def get_cities(lat, lon, radius, minpop=0):
307 '''
308 Get cities in a given circular area.
310 :param lat:
311 Latitude [deg].
312 :type lat:
313 float
315 :param lon:
316 Longitude [deg].
317 :type lon:
318 float
320 :param radius:
321 Search radius [m].
322 :type radius:
323 float
325 :param minpop:
326 Skip entries with population lower than this.
327 :type minpop:
328 int
330 :returns:
331 Matching city entries.
332 :rtype:
333 :py:class:`list` of :py:class:`GeoName2`
334 '''
335 region = od.radius_to_region(lat, lon, radius)
336 cities = get_cities_region(region, minpop=minpop)
338 clats = num.array([c.lat for c in cities])
339 clons = num.array([c.lon for c in cities])
341 dists = od.distance_accurate50m_numpy(lat, lon, clats, clons)
342 order = num.argsort(dists)
343 cities_sorted = [cities[i] for i in order if dists[i] < radius]
344 return cities_sorted