1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6import logging
7import os
8from collections import namedtuple
9import os.path as op
11import numpy as num
13from pyrocko import util, config
14from pyrocko import orthodrome as od
15from pyrocko.util import urlopen
17from itertools import chain
20logger = logging.getLogger('pyrocko.dataset.geonames')
22GeoName = namedtuple('GeoName', '''
23geonameid
24name
25asciiname
26alt_names
27lat
28lon
29feature_class
30feature_code
31country_code
32alt_country_code
33admin1_code
34admin2_code
35admin3_code
36admin4_code
37population
38elevation
39dem
40timezone
41modification_date'''.split())
43GeoName2 = namedtuple('GeoName2', '''
44name
45asciiname
46lat
47lon
48population
49feature_code
50'''.split())
53Country = namedtuple('Country', '''
54ISO
55name
56capital
57population
58area
59feature_code
60lat
61lon
62'''.split())
65base_url = 'https://mirror.pyrocko.org/download.geonames.org/export/dump/'
66fallback_url = 'https://download.geonames.org/export/dump/'
69def download_file(fn, dirpath):
70 url = base_url + fn
71 fpath = op.join(dirpath, fn)
72 logger.info('starting download of %s' % url)
74 util.ensuredirs(fpath)
75 try:
76 f = urlopen(url)
77 except OSError:
78 logger.info('File not found in pyrocko mirror, '
79 'falling back to original source.')
80 url = fallback_url + fn
81 f = urlopen(url)
83 fpath_tmp = fpath + '.%i.temp' % os.getpid()
84 g = open(fpath_tmp, 'wb')
85 while True:
86 data = f.read(1024)
87 if not data:
88 break
89 g.write(data)
91 g.close()
92 f.close()
94 os.rename(fpath_tmp, fpath)
96 logger.info('finished download of %s' % url)
99def positive_region(region):
100 west, east, south, north = [float(x) for x in region]
102 assert -180. - 360. <= west < 180.
103 assert -180. < east <= 180. + 360.
104 assert -90. <= south < 90.
105 assert -90. < north <= 90.
107 if east < west:
108 east += 360.
110 if west < -180.:
111 west += 360.
112 east += 360.
114 return (west, east, south, north)
117def ascii_str(u):
118 return u.encode('ascii', 'replace')
121def get_file(zfn, fn):
123 geonames_dir = config.config().geonames_dir
124 filepath = op.join(geonames_dir, zfn or fn)
125 if not os.path.exists(filepath):
126 download_file(zfn or fn, geonames_dir)
128 if zfn is not None:
129 import zipfile
130 z = zipfile.ZipFile(filepath, 'r')
131 f = z.open(fn, 'r')
132 else:
133 z = None
134 f = open(filepath, 'rb')
136 return f, z
139def load_country_info(zfn, fn, minpop=1000000, minarea=10000):
141 f, z = get_file(zfn, fn)
143 countries = {}
144 for line in f:
145 t = line.split(b'\t')
146 start = t[0].decode('utf8')[0]
147 if start != '#':
148 population = int(t[7])
149 area = float(t[6])
150 if minpop <= population and minarea <= area:
151 iso = t[0].decode('utf8')
152 name = t[4].decode('utf8')
153 capital = t[5].decode('utf8')
154 feature_code = str(t[16].decode('ascii'))
155 countries[feature_code] = (
156 iso, name, capital, population, area, feature_code)
158 f.close()
159 if z is not None:
160 z.close()
162 return countries
165def load_country_shapes_json(zfn, fn):
167 f, z = get_file(zfn, fn)
169 import json
171 data = json.load(f)
172 fts = data["features"]
174 mid_points_countries = {}
175 for ft in fts:
176 geonameid = ft["properties"]["geoNameId"]
177 coords = ft["geometry"]["coordinates"]
178 coords_arr = num.vstack(
179 [num.array(sco) for co in chain(coords) for sco in chain(co)])
180 latlon = od.geographic_midpoint(coords_arr[:, 1], coords_arr[:, 0])
181 mid_points_countries[geonameid] = latlon
183 return mid_points_countries
186def get_countries_region(minpop=0, minarea=0, region=None):
188 country_infos = load_country_info(
189 None, "countryInfo.txt", minpop=minpop, minarea=minarea)
190 mid_points = load_country_shapes_json(
191 "shapes_simplified_low.json.zip", "shapes_simplified_low.json")
193 if region:
194 w, e, s, n = positive_region(region)
196 for geonameid, infos in country_infos.items():
197 try:
198 lat, lon = mid_points[geonameid]
199 except KeyError:
200 lat = lon = None
202 if not region or (
203 (w <= lon <= e or w <= lon + 360. <= e)
204 and (s <= lat <= n)):
205 yield Country(*infos, lat, lon)
208def load_cities(zfn, fn, minpop=1000000, region=None):
210 f, z = get_file(zfn, fn)
212 if region:
213 w, e, s, n = positive_region(region)
215 for line in f:
216 t = line.split(b'\t')
217 pop = int(t[14])
218 if minpop <= pop:
219 lat = float(t[4])
220 lon = float(t[5])
221 feature_code = str(t[7].decode('ascii'))
222 if not region or (
223 (w <= lon <= e or w <= lon + 360. <= e)
224 and (s <= lat <= n)):
226 yield GeoName2(
227 t[1].decode('utf8'),
228 str(t[2].decode('utf8').encode('ascii', 'replace')
229 .decode('ascii')),
230 lat, lon, pop, feature_code)
232 f.close()
233 if z is not None:
234 z.close()
237g_cities = {}
240def load_all_keep(zfn, fn, minpop=1000000, region=None, exclude=()):
241 if (zfn, fn) not in g_cities:
242 g_cities[zfn, fn] = list(load_cities(zfn, fn, minpop=0))
244 if region:
245 w, e, s, n = positive_region(region)
246 return [c for c in g_cities[zfn, fn]
247 if (minpop <= c.population and
248 ((w <= c.lon <= e or w <= c.lon + 360. <= e)
249 and (s <= c.lat <= n)) and
250 c.feature_code not in exclude)]
252 else:
253 return [c for c in g_cities[zfn, fn] if (
254 minpop <= c.population and c.feature_code not in exclude)]
257def get_cities_region(region=None, minpop=0):
258 return load_all_keep(
259 'cities1000.zip', 'cities1000.txt',
260 region=region,
261 minpop=minpop,
262 exclude=('PPLX',))
265def get_cities_by_name(name):
266 cities = get_cities_region()
267 candidates = []
268 for c in cities:
269 if c.asciiname.lower() == name.lower():
270 candidates.append(c)
272 return candidates
275def get_cities(lat, lon, radius, minpop=0):
276 region = od.radius_to_region(lat, lon, radius)
277 cities = get_cities_region(region, minpop=minpop)
279 clats = num.array([c.lat for c in cities])
280 clons = num.array([c.lon for c in cities])
282 dists = od.distance_accurate50m_numpy(lat, lon, clats, clons)
283 order = num.argsort(dists)
284 cities_sorted = [cities[i] for i in order if dists[i] < radius]
285 return cities_sorted