1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Interface to the GSHHG (coastlines, rivers and borders) database.
9The Global Self-consistent Hierarchical High-resolution Geography Database
10(GSHHG) is a collection of polygons representing land, lakes, rivers and
11political borders.
13If the database is not already available, it will be downloaded
14automatically on first use.
16For more information about GSHHG, see
17http://www.soest.hawaii.edu/pwessel/gshhg/.
19.. note::
21 **If you use this dataset, please cite:**
23 Wessel, P., and W. H. F.
24 Smith, A Global Self-consistent, Hierarchical, High-resolution
25 Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743, 1996.
26'''
28import logging
29import io
30import struct
31import time
32import numpy as num
34from os import path
36from pyrocko import config, orthodrome
37from .util import get_download_callback
40logger = logging.getLogger('pyrocko.dataset.gshhg')
41config = config.config()
43km = 1e3
44micro_deg = 1e-6
47def split_region_0_360(wesn):
48 west, east, south, north = wesn
49 if west < 0.:
50 if east <= 0:
51 return [(west+360., east+360., south, north)]
52 else:
53 return [(west+360., 360., south, north),
54 (0., east, south, north)]
55 else:
56 return [wesn]
59def is_valid_bounding_box(wesn):
60 '''
61 Check if a given bounding box meets the GSHHG conventions.
63 :param wesn: bounding box as (west, east, south, north) in [deg]
64 '''
66 w, e, s, n = wesn
68 return (
69 w <= e
70 and s <= n
71 and -90.0 <= s <= 90.
72 and -90. <= n <= 90.
73 and -180. <= w < 360.
74 and -180. <= e < 360.)
77def is_valid_polygon(points):
78 '''
79 Check if polygon points meet the GSHHG conventions.
81 :param points: Array of (lat, lon) pairs, shape (N, 2).
82 '''
84 lats = points[:, 0]
85 lons = points[:, 1]
87 return (
88 num.all(-90. <= lats)
89 and num.all(lats <= 90.)
90 and num.all(-180. <= lons)
91 and num.all(lons < 360.))
94def points_in_bounding_box(points, wesn, tolerance=0.1):
95 '''
96 Check which points are contained in a given bounding box.
98 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
99 :param wesn: Region tuple (west, east, south, north) [deg]
100 :param tolerance: increase the size of the test bounding box by
101 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
102 bounding box).
104 :returns: Bool array of shape (N,).
105 '''
106 points_wrap = points.copy()
107 points_wrap[:, 1] %= 360.
109 mask = num.zeros(points_wrap.shape[0], dtype=bool)
110 for w, e, s, n in split_region_0_360(wesn):
111 mask = num.logical_or(
112 mask,
113 num.logical_and(
114 num.logical_and(
115 w-tolerance <= points_wrap[:, 1],
116 points_wrap[:, 1] <= e+tolerance),
117 num.logical_and(
118 s-tolerance <= points_wrap[:, 0],
119 points_wrap[:, 0] <= n+tolerance)))
121 return mask
124def point_in_bounding_box(point, wesn, tolerance=0.1):
125 '''
126 Check whether point is contained in a given bounding box.
128 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
129 :param wesn: Region tuple (west, east, south, north) [deg]
130 :param tolerance: increase the size of the test bounding box by
131 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
132 bounding box).
134 :rtype: bool
135 '''
137 lat, lon = point
138 lon %= 360.
139 for w, e, s, n in split_region_0_360(wesn):
140 if (w-tolerance <= lon
141 and lon <= e+tolerance
142 and s-tolerance <= lat
143 and lat <= n+tolerance):
145 return True
147 return False
150def bounding_boxes_overlap(wesn1, wesn2):
151 '''
152 Check whether two bounding boxes intersect.
154 :param wesn1, wesn2: Region tuples (west, east, south, north) [deg]
156 :rtype: bool
157 '''
158 for w1, e1, s1, n1 in split_region_0_360(wesn1):
159 for w2, e2, s2, n2 in split_region_0_360(wesn2):
160 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2:
161 return True
163 return False
166def is_polygon_in_bounding_box(points, wesn, tolerance=0.1):
167 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance))
170def bounding_box_covering_points(points):
171 lats = points[:, 0]
172 lat_min, lat_max = num.min(lats), num.max(lats)
174 lons = points[:, 1]
175 lons = lons % 360.
176 lon_min, lon_max = num.min(lons), num.max(lons)
177 if lon_max - lon_min < 180.:
178 return lon_min, lon_max, lat_min, lat_max
180 lons = (lons - 180.) % 360. - 180.
181 lon_min, lon_max = num.min(lons), num.max(lons)
182 if lon_max - lon_min < 180.:
183 return lon_min, lon_max, lat_min, lat_max
185 return (-180., 180., lat_min, lat_max)
188class Polygon(object):
189 '''
190 Representation of a GSHHG polygon.
191 '''
193 RIVER_NOT_SET = 0
195 LEVELS = ['LAND', 'LAKE', 'ISLAND_IN_LAKE', 'POND_IN_ISLAND_IN_LAKE',
196 'ANTARCTIC_ICE_FRONT', 'ANTARCTIC_GROUNDING_LINE']
198 SOURCE = ['CIA_WDBII', 'WVS', 'AC']
200 def __init__(self, gshhg_file, offset, *attr):
201 '''
202 Initialise a GSHHG polygon
204 :param gshhg_file: GSHHG binary file
205 :type gshhg_file: str
206 :param offset: This polygon's offset in binary file
207 :type offset: int
208 :param attr: Polygon attributes
209 ``(pid, npoints, _flag, west, east, south, north,
210 area, area_full, container, ancestor)``.
211 See :file:`gshhg.h` for details.
212 :type attr: tuple
213 '''
214 (self.pid, self.npoints, self._flag,
215 self.west, self.east, self.south, self.north,
216 self.area, self.area_full, self.container, self.ancestor) = attr
218 self.west *= micro_deg
219 self.east *= micro_deg
220 self.south *= micro_deg
221 self.north *= micro_deg
223 self.level_no = (self._flag & 255)
224 self.level = self.LEVELS[self.level_no - 1]
225 self.version = (self._flag >> 8) & 255
227 cross = (self._flag >> 16) & 3
228 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False
229 self.dateline_crossed = True if cross == 2 or cross == 3 else False
231 self.source = self.SOURCE[(self._flag >> 24) & 1]
232 if self.level_no >= 5:
233 self.source = self.SOURCE[2]
235 self.river = (self._flag >> 25) & 1
237 scale = 10.**(self._flag >> 26)
238 self.area /= scale
239 self.area_full /= scale
241 self._points = None
242 self._file = gshhg_file
243 self._offset = offset
245 @property
246 def points(self):
247 '''
248 Points of the polygon.
250 Array of (lat, lon) pairs, shape (N, 2).
252 :rtype: :class:`numpy.ndarray`
253 '''
254 if self._points is None:
255 with open(self._file) as db:
256 db.seek(self._offset)
257 self._points = num.fromfile(
258 db, dtype='>i4', count=self.npoints*2)\
259 .astype(num.float32)\
260 .reshape(self.npoints, 2)
262 self._points = num.fliplr(self._points)
263 if self.level_no in (2, 4):
264 self._points = self._points[::-1, :]
266 self._points *= micro_deg
267 return self._points
269 @property
270 def lats(self):
271 return self.points[:, 0]
273 @property
274 def lons(self):
275 return self.points[:, 1]
277 def _is_level(self, level):
278 if self.level is self.LEVELS[level]:
279 return True
280 return False
282 def is_land(self):
283 '''
284 Check if the polygon is land.
286 :rtype: bool
287 '''
288 return self._is_level(0)
290 def is_lake(self):
291 '''
292 Check if the polygon is a lake.
294 :rtype: bool
295 '''
296 return self._is_level(1)
298 def is_island_in_lake(self):
299 '''
300 Check if the polygon is an island in a lake.
302 :rtype: bool
303 '''
304 return self._is_level(2)
306 def is_pond_in_island_in_lake(self):
307 '''
308 Check if the polygon is pond on an island in a lake.
310 :rtype: bool
311 '''
312 return self._is_level(3)
314 def is_antarctic_icefront(self):
315 '''
316 Check if the polygon is antarctic icefront.
318 :rtype: bool
319 '''
320 return self._is_level(4)
322 def is_antarctic_grounding_line(self):
323 '''
324 Check if the polygon is antarctic grounding line.
326 :rtype: bool
327 '''
328 return self._is_level(5)
330 def contains_point(self, point):
331 '''
332 Check if point lies in polygon.
334 :param point: (lat, lon) [deg]
335 :type point: tuple
336 :rtype: bool
338 See :py:func:`pyrocko.orthodrome.contains_points`.
339 '''
340 return bool(
341 self.contains_points(num.asarray(point)[num.newaxis, :])[0])
343 def contains_points(self, points):
344 '''
345 Check if points lie in polygon.
347 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
348 :type points: :class:`numpy.ndarray`
350 See :py:func:`pyrocko.orthodrome.contains_points`.
352 :returns: Bool array of shape (N,)
353 '''
354 mask = points_in_bounding_box(points, self.get_bounding_box())
355 if num.any(mask):
356 mask[mask] = orthodrome.contains_points(
357 self.points, points[mask, :])
359 return mask
361 def get_bounding_box(self):
362 return (self.west, self.east, self.south, self.north)
364 def __lt__(self, polygon):
365 return self.level_no < polygon.level_no
367 def __str__(self):
368 rstr = '''Polygon id: {p.pid}
369-------------------
370Points: {p.npoints}
371Level: {p.level}
372Area: {p.area} km**2
373Area Full: {p.area_full} km**2
374Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N
375Source: {p.source}
376Greenwhich crossed: {p.greenwhich_crossed}
377Dateline crossed: {p.dateline_crossed}
378 '''.format(p=self)
379 return rstr
382class GSHHG(object):
383 '''
384 GSHHG database access.
386 This class provides methods to select relevant polygons (land, lakes, etc.)
387 for given locations or regions. It also provides robust high-level
388 functions to test if the Earth is dry or wet at given coordinates.
389 '''
391 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa
392 _header_struct = struct.Struct('>IIIiiiiIIii')
394 def __init__(self, gshhg_file):
395 ''' Initialise the database from GSHHG binary.
397 :param gshhg_file: Path to file
398 :type gshhg_file: str
399 :
400 '''
401 t0 = time.time()
402 self._file = gshhg_file
404 self.polygons = []
405 self._read_database()
406 logger.debug('Initialised GSHHG database from %s in [%.4f s]'
407 % (gshhg_file, time.time()-t0))
409 def _read_database(self):
410 with open(self._file, mode='rb') as db:
411 while db:
412 buf = db.read(self._header_struct.size)
413 if not buf:
414 break
415 header = self._header_struct.unpack_from(buf)
416 p = Polygon(
417 self._file,
418 db.tell(),
419 *header)
420 self.polygons.append(p)
422 offset = 8 * header[1]
423 db.seek(offset, io.SEEK_CUR)
425 @classmethod
426 def _get_database(cls, filename):
427 file = path.join(config.gshhg_dir, filename)
428 if not path.exists(file):
429 from pyrocko import util
430 import zipfile
432 archive_path = path.join(config.gshhg_dir,
433 path.basename(cls.gshhg_url))
434 util.download_file(
435 cls.gshhg_url, archive_path,
436 status_callback=get_download_callback(
437 'Downloading GSHHG database...'))
438 if not zipfile.is_zipfile(archive_path):
439 raise util.DownloadError('GSHHG file is corrupted!')
440 logger.info('Unzipping GSHHG database...')
441 zipf = zipfile.ZipFile(archive_path)
442 zipf.extractall(config.gshhg_dir)
443 else:
444 logger.debug('Using cached %s' % filename)
445 return file
447 def get_polygons_at(self, lat, lon):
448 '''
449 Get all polygons whose bounding boxes contain point.
451 :param lat: Latitude in [deg]
452 :type lat: float
453 :param lon: Longitude in [deg]
454 :type lon: float
455 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
456 :rtype: list
457 '''
458 rp = []
459 for p in self.polygons:
460 if point_in_bounding_box((lat, lon), p.get_bounding_box()):
461 rp.append(p)
462 return rp
464 def get_polygons_within(self, west, east, south, north):
465 '''
466 Get all polygons whose bounding boxes intersect with a bounding box.
468 :param west: Western boundary in decimal degree
469 :type west: float
470 :param east: Eastern boundary in decimal degree
471 :type east: float
472 :param north: Northern boundary in decimal degree
473 :type north: float
474 :param south: Southern boundary in decimal degree
475 :type south: float
476 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
477 :rtype: list
478 '''
480 assert is_valid_bounding_box((west, east, south, north))
482 rp = []
483 for p in self.polygons:
484 if bounding_boxes_overlap(
485 p.get_bounding_box(), (west, east, south, north)):
487 rp.append(p)
488 return rp
490 def is_point_on_land(self, lat, lon):
491 '''
492 Check whether a point is on land.
494 Lakes are considered not land.
496 :param lat: Latitude in [deg]
497 :type lat: float
498 :param lon: Longitude in [deg]
499 :type lon: float
501 :rtype: bool
502 '''
504 relevant_polygons = self.get_polygons_at(lat, lon)
505 relevant_polygons.sort()
507 land = False
508 for p in relevant_polygons:
509 if (p.is_land() or p.is_antarctic_grounding_line() or
510 p.is_island_in_lake()):
511 if p.contains_point((lat, lon)):
512 land = True
513 elif (p.is_lake() or p.is_antarctic_icefront() or
514 p.is_pond_in_island_in_lake()):
515 if p.contains_point((lat, lon)):
516 land = False
517 return land
519 def get_land_mask(self, points):
520 '''
521 Check whether given points are on land.
523 Lakes are considered not land.
525 :param points: Array of (lat, lon) pairs, shape (N, 2).
526 :type points: :class:`numpy.ndarray`
527 :return: Boolean land mask
528 :rtype: :class:`numpy.ndarray` of shape (N,)
529 '''
531 west, east, south, north = bounding_box_covering_points(points)
533 relevant_polygons = self.get_polygons_within(west, east, south, north)
534 relevant_polygons.sort()
536 mask = num.zeros(points.shape[0], dtype=bool)
537 for p in relevant_polygons:
538 if (p.is_land() or p.is_antarctic_grounding_line() or
539 p.is_island_in_lake()):
540 land = p.contains_points(points)
541 mask[land] = True
542 elif p.is_lake() or p.is_pond_in_island_in_lake():
543 water = p.contains_points(points)
544 mask[water] = False
545 return mask
547 @classmethod
548 def full(cls):
549 '''
550 Return the full-resolution GSHHG database.
551 '''
552 return cls(cls._get_database('gshhs_f.b'))
554 @classmethod
555 def high(cls):
556 '''
557 Return the high-resolution GSHHG database.
558 '''
559 return cls(cls._get_database('gshhs_h.b'))
561 @classmethod
562 def intermediate(cls):
563 '''
564 Return the intermediate-resolution GSHHG database.
565 '''
566 return cls(cls._get_database('gshhs_i.b'))
568 @classmethod
569 def low(cls):
570 '''
571 Return the low-resolution GSHHG database.
572 '''
573 return cls(cls._get_database('gshhs_l.b'))
575 @classmethod
576 def crude(cls):
577 '''
578 Return the crude-resolution GSHHG database.
579 '''
580 return cls(cls._get_database('gshhs_c.b'))