1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Interface to the GSHHG (coastlines, rivers and borders) database.
9The Global Self-consistent Hierarchical High-resolution Geography Database
10(GSHHG) is a collection of polygons representing land, lakes, rivers and
11political borders.
13If the database is not already available, it will be downloaded
14automatically on first use.
16For more information about GSHHG, see
17http://www.soest.hawaii.edu/pwessel/gshhg/.
19.. note::
21 **If you use this dataset, please cite:**
23 Wessel, P., and W. H. F.
24 Smith, A Global Self-consistent, Hierarchical, High-resolution
25 Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743, 1996.
26'''
28from __future__ import absolute_import
30import logging
31import io
32import struct
33import time
34import numpy as num
36from os import path
38from pyrocko import config, orthodrome
39from .util import get_download_callback
42logger = logging.getLogger('pyrocko.dataset.gshhg')
43config = config.config()
45km = 1e3
46micro_deg = 1e-6
49def split_region_0_360(wesn):
50 west, east, south, north = wesn
51 if west < 0.:
52 if east <= 0:
53 return [(west+360., east+360., south, north)]
54 else:
55 return [(west+360., 360., south, north),
56 (0., east, south, north)]
57 else:
58 return [wesn]
61def is_valid_bounding_box(wesn):
62 '''
63 Check if a given bounding box meets the GSHHG conventions.
65 :param wesn: bounding box as (west, east, south, north) in [deg]
66 '''
68 w, e, s, n = wesn
70 return (
71 w <= e
72 and s <= n
73 and -90.0 <= s <= 90.
74 and -90. <= n <= 90.
75 and -180. <= w < 360.
76 and -180. <= e < 360.)
79def is_valid_polygon(points):
80 '''
81 Check if polygon points meet the GSHHG conventions.
83 :param points: Array of (lat, lon) pairs, shape (N, 2).
84 '''
86 lats = points[:, 0]
87 lons = points[:, 1]
89 return (
90 num.all(-90. <= lats)
91 and num.all(lats <= 90.)
92 and num.all(-180. <= lons)
93 and num.all(lons < 360.))
96def points_in_bounding_box(points, wesn, tolerance=0.1):
97 '''
98 Check which points are contained in a given bounding box.
100 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
101 :param wesn: Region tuple (west, east, south, north) [deg]
102 :param tolerance: increase the size of the test bounding box by
103 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
104 bounding box).
106 :returns: Bool array of shape (N,).
107 '''
108 points_wrap = points.copy()
109 points_wrap[:, 1] %= 360.
111 mask = num.zeros(points_wrap.shape[0], dtype=bool)
112 for w, e, s, n in split_region_0_360(wesn):
113 mask = num.logical_or(
114 mask,
115 num.logical_and(
116 num.logical_and(
117 w-tolerance <= points_wrap[:, 1],
118 points_wrap[:, 1] <= e+tolerance),
119 num.logical_and(
120 s-tolerance <= points_wrap[:, 0],
121 points_wrap[:, 0] <= n+tolerance)))
123 return mask
126def point_in_bounding_box(point, wesn, tolerance=0.1):
127 '''
128 Check whether point is contained in a given bounding box.
130 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
131 :param wesn: Region tuple (west, east, south, north) [deg]
132 :param tolerance: increase the size of the test bounding box by
133 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
134 bounding box).
136 :rtype: bool
137 '''
139 lat, lon = point
140 lon %= 360.
141 for w, e, s, n in split_region_0_360(wesn):
142 if (w-tolerance <= lon
143 and lon <= e+tolerance
144 and s-tolerance <= lat
145 and lat <= n+tolerance):
147 return True
149 return False
152def bounding_boxes_overlap(wesn1, wesn2):
153 '''
154 Check whether two bounding boxes intersect.
156 :param wesn1, wesn2: Region tuples (west, east, south, north) [deg]
158 :rtype: bool
159 '''
160 for w1, e1, s1, n1 in split_region_0_360(wesn1):
161 for w2, e2, s2, n2 in split_region_0_360(wesn2):
162 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2:
163 return True
165 return False
168def is_polygon_in_bounding_box(points, wesn, tolerance=0.1):
169 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance))
172def bounding_box_covering_points(points):
173 lats = points[:, 0]
174 lat_min, lat_max = num.min(lats), num.max(lats)
176 lons = points[:, 1]
177 lons = lons % 360.
178 lon_min, lon_max = num.min(lons), num.max(lons)
179 if lon_max - lon_min < 180.:
180 return lon_min, lon_max, lat_min, lat_max
182 lons = (lons - 180.) % 360. - 180.
183 lon_min, lon_max = num.min(lons), num.max(lons)
184 if lon_max - lon_min < 180.:
185 return lon_min, lon_max, lat_min, lat_max
187 return (-180., 180., lat_min, lat_max)
190class Polygon(object):
191 '''
192 Representation of a GSHHG polygon.
193 '''
195 RIVER_NOT_SET = 0
197 LEVELS = ['LAND', 'LAKE', 'ISLAND_IN_LAKE', 'POND_IN_ISLAND_IN_LAKE',
198 'ANTARCTIC_ICE_FRONT', 'ANTARCTIC_GROUNDING_LINE']
200 SOURCE = ['CIA_WDBII', 'WVS', 'AC']
202 def __init__(self, gshhg_file, offset, *attr):
203 '''
204 Initialise a GSHHG polygon
206 :param gshhg_file: GSHHG binary file
207 :type gshhg_file: str
208 :param offset: This polygon's offset in binary file
209 :type offset: int
210 :param attr: Polygon attributes
211 ``(pid, npoints, _flag, west, east, south, north,
212 area, area_full, container, ancestor)``.
213 See :file:`gshhg.h` for details.
214 :type attr: tuple
215 '''
216 (self.pid, self.npoints, self._flag,
217 self.west, self.east, self.south, self.north,
218 self.area, self.area_full, self.container, self.ancestor) = attr
220 self.west *= micro_deg
221 self.east *= micro_deg
222 self.south *= micro_deg
223 self.north *= micro_deg
225 self.level_no = (self._flag & 255)
226 self.level = self.LEVELS[self.level_no - 1]
227 self.version = (self._flag >> 8) & 255
229 cross = (self._flag >> 16) & 3
230 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False
231 self.dateline_crossed = True if cross == 2 or cross == 3 else False
233 self.source = self.SOURCE[(self._flag >> 24) & 1]
234 if self.level_no >= 5:
235 self.source = self.SOURCE[2]
237 self.river = (self._flag >> 25) & 1
239 scale = 10.**(self._flag >> 26)
240 self.area /= scale
241 self.area_full /= scale
243 self._points = None
244 self._file = gshhg_file
245 self._offset = offset
247 @property
248 def points(self):
249 '''
250 Points of the polygon.
252 Array of (lat, lon) pairs, shape (N, 2).
254 :rtype: :class:`numpy.ndarray`
255 '''
256 if self._points is None:
257 with open(self._file) as db:
258 db.seek(self._offset)
259 self._points = num.fromfile(
260 db, dtype='>i4', count=self.npoints*2)\
261 .astype(num.float32)\
262 .reshape(self.npoints, 2)
264 self._points = num.fliplr(self._points)
265 if self.level_no in (2, 4):
266 self._points = self._points[::-1, :]
268 self._points *= micro_deg
269 return self._points
271 @property
272 def lats(self):
273 return self.points[:, 0]
275 @property
276 def lons(self):
277 return self.points[:, 1]
279 def _is_level(self, level):
280 if self.level is self.LEVELS[level]:
281 return True
282 return False
284 def is_land(self):
285 '''
286 Check if the polygon is land.
288 :rtype: bool
289 '''
290 return self._is_level(0)
292 def is_lake(self):
293 '''
294 Check if the polygon is a lake.
296 :rtype: bool
297 '''
298 return self._is_level(1)
300 def is_island_in_lake(self):
301 '''
302 Check if the polygon is an island in a lake.
304 :rtype: bool
305 '''
306 return self._is_level(2)
308 def is_pond_in_island_in_lake(self):
309 '''
310 Check if the polygon is pond on an island in a lake.
312 :rtype: bool
313 '''
314 return self._is_level(3)
316 def is_antarctic_icefront(self):
317 '''
318 Check if the polygon is antarctic icefront.
320 :rtype: bool
321 '''
322 return self._is_level(4)
324 def is_antarctic_grounding_line(self):
325 '''
326 Check if the polygon is antarctic grounding line.
328 :rtype: bool
329 '''
330 return self._is_level(5)
332 def contains_point(self, point):
333 '''
334 Check if point lies in polygon.
336 :param point: (lat, lon) [deg]
337 :type point: tuple
338 :rtype: bool
340 See :py:func:`pyrocko.orthodrome.contains_points`.
341 '''
342 return bool(
343 self.contains_points(num.asarray(point)[num.newaxis, :])[0])
345 def contains_points(self, points):
346 '''
347 Check if points lie in polygon.
349 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
350 :type points: :class:`numpy.ndarray`
352 See :py:func:`pyrocko.orthodrome.contains_points`.
354 :returns: Bool array of shape (N,)
355 '''
356 mask = points_in_bounding_box(points, self.get_bounding_box())
357 if num.any(mask):
358 mask[mask] = orthodrome.contains_points(
359 self.points, points[mask, :])
361 return mask
363 def get_bounding_box(self):
364 return (self.west, self.east, self.south, self.north)
366 def __lt__(self, polygon):
367 return self.level_no < polygon.level_no
369 def __str__(self):
370 rstr = '''Polygon id: {p.pid}
371-------------------
372Points: {p.npoints}
373Level: {p.level}
374Area: {p.area} km**2
375Area Full: {p.area_full} km**2
376Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N
377Source: {p.source}
378Greenwhich crossed: {p.greenwhich_crossed}
379Dateline crossed: {p.dateline_crossed}
380 '''.format(p=self)
381 return rstr
384class GSHHG(object):
385 '''
386 GSHHG database access.
388 This class provides methods to select relevant polygons (land, lakes, etc.)
389 for given locations or regions. It also provides robust high-level
390 functions to test if the Earth is dry or wet at given coordinates.
391 '''
393 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa
394 _header_struct = struct.Struct('>IIIiiiiIIii')
396 def __init__(self, gshhg_file):
397 ''' Initialise the database from GSHHG binary.
399 :param gshhg_file: Path to file
400 :type gshhg_file: str
401 :
402 '''
403 t0 = time.time()
404 self._file = gshhg_file
406 self.polygons = []
407 self._read_database()
408 logger.debug('Initialised GSHHG database from %s in [%.4f s]'
409 % (gshhg_file, time.time()-t0))
411 def _read_database(self):
412 with open(self._file, mode='rb') as db:
413 while db:
414 buf = db.read(self._header_struct.size)
415 if not buf:
416 break
417 header = self._header_struct.unpack_from(buf)
418 p = Polygon(
419 self._file,
420 db.tell(),
421 *header)
422 self.polygons.append(p)
424 offset = 8 * header[1]
425 db.seek(offset, io.SEEK_CUR)
427 @classmethod
428 def _get_database(cls, filename):
429 file = path.join(config.gshhg_dir, filename)
430 if not path.exists(file):
431 from pyrocko import util
432 import zipfile
434 archive_path = path.join(config.gshhg_dir,
435 path.basename(cls.gshhg_url))
436 util.download_file(
437 cls.gshhg_url, archive_path,
438 status_callback=get_download_callback(
439 'Downloading GSHHG database...'))
440 if not zipfile.is_zipfile(archive_path):
441 raise util.DownloadError('GSHHG file is corrupted!')
442 logger.info('Unzipping GSHHG database...')
443 zipf = zipfile.ZipFile(archive_path)
444 zipf.extractall(config.gshhg_dir)
445 else:
446 logger.debug('Using cached %s' % filename)
447 return file
449 def get_polygons_at(self, lat, lon):
450 '''
451 Get all polygons whose bounding boxes contain point.
453 :param lat: Latitude in [deg]
454 :type lat: float
455 :param lon: Longitude in [deg]
456 :type lon: float
457 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
458 :rtype: list
459 '''
460 rp = []
461 for p in self.polygons:
462 if point_in_bounding_box((lat, lon), p.get_bounding_box()):
463 rp.append(p)
464 return rp
466 def get_polygons_within(self, west, east, south, north):
467 '''
468 Get all polygons whose bounding boxes intersect with a bounding box.
470 :param west: Western boundary in decimal degree
471 :type west: float
472 :param east: Eastern boundary in decimal degree
473 :type east: float
474 :param north: Northern boundary in decimal degree
475 :type north: float
476 :param south: Southern boundary in decimal degree
477 :type south: float
478 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
479 :rtype: list
480 '''
482 assert is_valid_bounding_box((west, east, south, north))
484 rp = []
485 for p in self.polygons:
486 if bounding_boxes_overlap(
487 p.get_bounding_box(), (west, east, south, north)):
489 rp.append(p)
490 return rp
492 def is_point_on_land(self, lat, lon):
493 '''
494 Check whether a point is on land.
496 Lakes are considered not land.
498 :param lat: Latitude in [deg]
499 :type lat: float
500 :param lon: Longitude in [deg]
501 :type lon: float
503 :rtype: bool
504 '''
506 relevant_polygons = self.get_polygons_at(lat, lon)
507 relevant_polygons.sort()
509 land = False
510 for p in relevant_polygons:
511 if (p.is_land() or p.is_antarctic_grounding_line() or
512 p.is_island_in_lake()):
513 if p.contains_point((lat, lon)):
514 land = True
515 elif (p.is_lake() or p.is_antarctic_icefront() or
516 p.is_pond_in_island_in_lake()):
517 if p.contains_point((lat, lon)):
518 land = False
519 return land
521 def get_land_mask(self, points):
522 '''
523 Check whether given points are on land.
525 Lakes are considered not land.
527 :param points: Array of (lat, lon) pairs, shape (N, 2).
528 :type points: :class:`numpy.ndarray`
529 :return: Boolean land mask
530 :rtype: :class:`numpy.ndarray` of shape (N,)
531 '''
533 west, east, south, north = bounding_box_covering_points(points)
535 relevant_polygons = self.get_polygons_within(west, east, south, north)
536 relevant_polygons.sort()
538 mask = num.zeros(points.shape[0], dtype=bool)
539 for p in relevant_polygons:
540 if (p.is_land() or p.is_antarctic_grounding_line() or
541 p.is_island_in_lake()):
542 land = p.contains_points(points)
543 mask[land] = True
544 elif p.is_lake() or p.is_pond_in_island_in_lake():
545 water = p.contains_points(points)
546 mask[water] = False
547 return mask
549 @classmethod
550 def full(cls):
551 '''
552 Return the full-resolution GSHHG database.
553 '''
554 return cls(cls._get_database('gshhs_f.b'))
556 @classmethod
557 def high(cls):
558 '''
559 Return the high-resolution GSHHG database.
560 '''
561 return cls(cls._get_database('gshhs_h.b'))
563 @classmethod
564 def intermediate(cls):
565 '''
566 Return the intermediate-resolution GSHHG database.
567 '''
568 return cls(cls._get_database('gshhs_i.b'))
570 @classmethod
571 def low(cls):
572 '''
573 Return the low-resolution GSHHG database.
574 '''
575 return cls(cls._get_database('gshhs_l.b'))
577 @classmethod
578 def crude(cls):
579 '''
580 Return the crude-resolution GSHHG database.
581 '''
582 return cls(cls._get_database('gshhs_c.b'))