1# http://pyrocko.org - GPLv3
2#
3# The Pyrocko Developers, 21st Century
4# ---|P------/S----------~Lg----------
6'''
7Interface to the GSHHG (coastlines, rivers and borders) database.
9The Global Self-consistent Hierarchical High-resolution Geography Database
10(GSHHG) is a collection of polygons representing land, lakes, rivers and
11political borders.
13If the database is not already available, it will be downloaded
14automatically on first use.
16For more information about GSHHG, see
17http://www.soest.hawaii.edu/pwessel/gshhg/.
19.. note::
21 **If you use this dataset, please cite:**
23 Wessel, P., and W. H. F.
24 Smith, A Global Self-consistent, Hierarchical, High-resolution
25 Shoreline Database, J. Geophys. Res., 101, #B4, pp. 8741-8743, 1996.
26'''
28import logging
29import io
30import struct
31import time
32import numpy as num
34from os import path
36from pyrocko import config, orthodrome
37from .util import get_download_callback
40logger = logging.getLogger('pyrocko.dataset.gshhg')
41config = config.config()
43km = 1e3
44micro_deg = 1e-6
47def split_region_0_360(wesn):
48 west, east, south, north = wesn
49 if west < 0.:
50 if east <= 0:
51 return [(west+360., east+360., south, north)]
52 else:
53 return [(west+360., 360., south, north),
54 (0., east, south, north)]
55 else:
56 return [wesn]
59def is_valid_bounding_box(wesn):
60 '''
61 Check if a given bounding box meets the GSHHG conventions.
63 :param wesn: bounding box as (west, east, south, north) in [deg]
64 '''
66 w, e, s, n = wesn
68 return (
69 w <= e
70 and s <= n
71 and -90.0 <= s <= 90.
72 and -90. <= n <= 90.
73 and -180. <= w < 360.
74 and -180. <= e < 360.)
77def is_valid_polygon(points):
78 '''
79 Check if polygon points meet the GSHHG conventions.
81 :param points: Array of (lat, lon) pairs, shape (N, 2).
82 '''
84 lats = points[:, 0]
85 lons = points[:, 1]
87 return (
88 num.all(-90. <= lats)
89 and num.all(lats <= 90.)
90 and num.all(-180. <= lons)
91 and num.all(lons < 360.))
94def points_in_bounding_box(points, wesn, tolerance=0.1):
95 '''
96 Check which points are contained in a given bounding box.
98 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
99 :param wesn: Region tuple (west, east, south, north) [deg]
100 :param tolerance: increase the size of the test bounding box by
101 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
102 bounding box).
104 :returns: Bool array of shape (N,).
105 '''
106 points_wrap = points.copy()
107 points_wrap[:, 1] %= 360.
109 mask = num.zeros(points_wrap.shape[0], dtype=bool)
110 for w, e, s, n in split_region_0_360(wesn):
111 mask = num.logical_or(
112 mask,
113 num.logical_and(
114 num.logical_and(
115 w-tolerance <= points_wrap[:, 1],
116 points_wrap[:, 1] <= e+tolerance),
117 num.logical_and(
118 s-tolerance <= points_wrap[:, 0],
119 points_wrap[:, 0] <= n+tolerance)))
121 return mask
124def point_in_bounding_box(point, wesn, tolerance=0.1):
125 '''
126 Check whether point is contained in a given bounding box.
128 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
129 :param wesn: Region tuple (west, east, south, north) [deg]
130 :param tolerance: increase the size of the test bounding box by
131 *tolerance* [deg] on every side (Some GSHHG polygons have a too tight
132 bounding box).
134 :rtype: bool
135 '''
137 lat, lon = point
138 lon %= 360.
139 for w, e, s, n in split_region_0_360(wesn):
140 if (w-tolerance <= lon
141 and lon <= e+tolerance
142 and s-tolerance <= lat
143 and lat <= n+tolerance):
145 return True
147 return False
150def bounding_boxes_overlap(wesn1, wesn2):
151 '''
152 Check whether two bounding boxes intersect.
154 :param wesn1, wesn2: Region tuples (west, east, south, north) [deg]
156 :rtype: bool
157 '''
158 for w1, e1, s1, n1 in split_region_0_360(wesn1):
159 for w2, e2, s2, n2 in split_region_0_360(wesn2):
160 if w2 <= e1 and w1 <= e2 and s2 <= n1 and s1 <= n2:
161 return True
163 return False
166def is_polygon_in_bounding_box(points, wesn, tolerance=0.1):
167 return num.all(points_in_bounding_box(points, wesn, tolerance=tolerance))
170def bounding_box_covering_points(points):
171 lats = points[:, 0]
172 lat_min, lat_max = num.min(lats), num.max(lats)
174 lons = points[:, 1]
175 lons = lons % 360.
176 lon_min, lon_max = num.min(lons), num.max(lons)
177 if lon_max - lon_min < 180.:
178 return lon_min, lon_max, lat_min, lat_max
180 lons = (lons - 180.) % 360. - 180.
181 lon_min, lon_max = num.min(lons), num.max(lons)
182 if lon_max - lon_min < 180.:
183 return lon_min, lon_max, lat_min, lat_max
185 return (-180., 180., lat_min, lat_max)
188class Polygon(object):
189 '''
190 Representation of a GSHHG polygon.
191 '''
193 RIVER_NOT_SET = 0
195 LEVELS = ['LAND', 'LAKE', 'ISLAND_IN_LAKE', 'POND_IN_ISLAND_IN_LAKE',
196 'ANTARCTIC_ICE_FRONT', 'ANTARCTIC_GROUNDING_LINE']
198 SOURCE = ['CIA_WDBII', 'WVS', 'AC']
200 def __init__(self, gshhg_file, offset, *attr):
201 '''
202 Initialise a GSHHG polygon
204 :param gshhg_file: GSHHG binary file
205 :type gshhg_file: str
206 :param offset: This polygon's offset in binary file
207 :type offset: int
208 :param attr: Polygon attributes
209 ``(pid, npoints, _flag, west, east, south, north,
210 area, area_full, container, ancestor)``.
211 See :file:`gshhg.h` for details.
212 :type attr: tuple
213 '''
214 (self.pid, self.npoints, self._flag,
215 self.west, self.east, self.south, self.north,
216 self.area, self.area_full, self.container, self.ancestor) = attr
218 self.west *= micro_deg
219 self.east *= micro_deg
220 self.south *= micro_deg
221 self.north *= micro_deg
223 self.level_no = (self._flag & 255)
224 print(self.level_no, self.LEVELS)
225 self.level = self.LEVELS[self.level_no - 1]
226 self.version = (self._flag >> 8) & 255
228 cross = (self._flag >> 16) & 3
229 self.greenwhich_crossed = True if cross == 1 or cross == 3 else False
230 self.dateline_crossed = True if cross == 2 or cross == 3 else False
232 self.source = self.SOURCE[(self._flag >> 24) & 1]
233 if self.level_no >= 5:
234 self.source = self.SOURCE[2]
236 self.river = (self._flag >> 25) & 1
238 scale = 10.**(self._flag >> 26)
239 self.area /= scale
240 self.area_full /= scale
242 self._points = None
243 self._file = gshhg_file
244 self._offset = offset
246 @property
247 def points(self):
248 '''
249 Points of the polygon.
251 Array of (lat, lon) pairs, shape (N, 2).
253 :rtype: :class:`numpy.ndarray`
254 '''
255 if self._points is None:
256 with open(self._file) as db:
257 db.seek(self._offset)
258 self._points = num.fromfile(
259 db, dtype='>i4', count=self.npoints*2)\
260 .astype(num.float32)\
261 .reshape(self.npoints, 2)
263 self._points = num.fliplr(self._points)
264 if self.level_no in (2, 4):
265 self._points = self._points[::-1, :]
267 self._points *= micro_deg
268 return self._points
270 @property
271 def lats(self):
272 return self.points[:, 0]
274 @property
275 def lons(self):
276 return self.points[:, 1]
278 def _is_level(self, level):
279 if self.level is self.LEVELS[level]:
280 return True
281 return False
283 def is_land(self):
284 '''
285 Check if the polygon is land.
287 :rtype: bool
288 '''
289 return self._is_level(0)
291 def is_lake(self):
292 '''
293 Check if the polygon is a lake.
295 :rtype: bool
296 '''
297 return self._is_level(1)
299 def is_island_in_lake(self):
300 '''
301 Check if the polygon is an island in a lake.
303 :rtype: bool
304 '''
305 return self._is_level(2)
307 def is_pond_in_island_in_lake(self):
308 '''
309 Check if the polygon is pond on an island in a lake.
311 :rtype: bool
312 '''
313 return self._is_level(3)
315 def is_antarctic_icefront(self):
316 '''
317 Check if the polygon is antarctic icefront.
319 :rtype: bool
320 '''
321 return self._is_level(4)
323 def is_antarctic_grounding_line(self):
324 '''
325 Check if the polygon is antarctic grounding line.
327 :rtype: bool
328 '''
329 return self._is_level(5)
331 def contains_point(self, point):
332 '''
333 Check if point lies in polygon.
335 :param point: (lat, lon) [deg]
336 :type point: tuple
337 :rtype: bool
339 See :py:func:`pyrocko.orthodrome.contains_points`.
340 '''
341 return bool(
342 self.contains_points(num.asarray(point)[num.newaxis, :])[0])
344 def contains_points(self, points):
345 '''
346 Check if points lie in polygon.
348 :param points: Array of (lat lon) pairs, shape (N, 2) [deg].
349 :type points: :class:`numpy.ndarray`
351 See :py:func:`pyrocko.orthodrome.contains_points`.
353 :returns: Bool array of shape (N,)
354 '''
355 mask = points_in_bounding_box(points, self.get_bounding_box())
356 if num.any(mask):
357 mask[mask] = orthodrome.contains_points(
358 self.points, points[mask, :])
360 return mask
362 def get_bounding_box(self):
363 return (self.west, self.east, self.south, self.north)
365 def __lt__(self, polygon):
366 return self.level_no < polygon.level_no
368 def __str__(self):
369 rstr = '''Polygon id: {p.pid}
370-------------------
371Points: {p.npoints}
372Level: {p.level}
373Area: {p.area} km**2
374Area Full: {p.area_full} km**2
375Extent: {p.west} W, {p.east} E, {p.south} S, {p.north} N
376Source: {p.source}
377Greenwhich crossed: {p.greenwhich_crossed}
378Dateline crossed: {p.dateline_crossed}
379 '''.format(p=self)
380 return rstr
383class RiverPolygon(Polygon):
385 LEVELS = [
386 'DOUBLE_LINED_RIVERS',
387 'PERMANENT_MAJOR_RIVERS',
388 'ADDITIONAL_MAJOR_RIVERS',
389 'ADDITIONAL_RIVERS',
390 'MINOR_RIVERS',
391 'INTERMITTENT_RIVERS-MAJOR',
392 'INTERMITTENT_RIVERS-ADDITIONAL',
393 'INTERMITTENT_RIVERS-MINOR',
394 'MAJOR_CANALS',
395 'MINOR_CANALS',
396 'IRRIGATION_CANALS',
397 'DOCS_MISS',
398 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS', 'DOCS_MISS']
401class GSHHGBase(object):
402 '''
403 GSHHG database access.
405 This class provides methods to select relevant polygons (land, lakes, etc.)
406 for given locations or regions. It also provides robust high-level
407 functions to test if the Earth is dry or wet at given coordinates.
408 '''
410 gshhg_url = 'https://mirror.pyrocko.org/www.soest.hawaii.edu/pwessel/gshhg/gshhg-bin-2.3.7.zip' # noqa
411 _header_struct = struct.Struct('>IIIiiiiIIii')
413 def __init__(self, gshhg_file):
414 ''' Initialise the database from GSHHG binary.
416 :param gshhg_file: Path to file
417 :type gshhg_file: str
418 :
419 '''
420 t0 = time.time()
421 self._file = gshhg_file
423 self.polygons = []
424 self._read_database()
425 logger.debug('Initialised GSHHG database from %s in [%.4f s]'
426 % (gshhg_file, time.time()-t0))
428 def _read_database(self):
429 with open(self._file, mode='rb') as db:
430 while db:
431 buf = db.read(self._header_struct.size)
432 if not buf:
433 break
434 header = self._header_struct.unpack_from(buf)
435 dataset = path.basename(self._file).split("_")[1]
436 print(dataset)
437 if dataset == 'rivers':
438 LoadPolygon = RiverPolygon
439 else:
440 LoadPolygon = Polygon
442 p = LoadPolygon(
443 self._file,
444 db.tell(),
445 *header)
446 self.polygons.append(p)
448 offset = 8 * header[1]
449 db.seek(offset, io.SEEK_CUR)
451 @classmethod
452 def _get_database(cls, filename):
453 file = path.join(config.gshhg_dir, filename)
454 if not path.exists(file):
455 from pyrocko import util
456 import zipfile
458 archive_path = path.join(config.gshhg_dir,
459 path.basename(cls.gshhg_url))
460 util.download_file(
461 cls.gshhg_url, archive_path,
462 status_callback=get_download_callback(
463 'Downloading GSHHG database...'))
464 if not zipfile.is_zipfile(archive_path):
465 raise util.DownloadError('GSHHG file is corrupted!')
466 logger.info('Unzipping GSHHG database...')
467 zipf = zipfile.ZipFile(archive_path)
468 zipf.extractall(config.gshhg_dir)
469 else:
470 logger.debug('Using cached %s' % filename)
471 return file
473 def get_polygons_at(self, lat, lon):
474 '''
475 Get all polygons whose bounding boxes contain point.
477 :param lat: Latitude in [deg]
478 :type lat: float
479 :param lon: Longitude in [deg]
480 :type lon: float
481 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
482 :rtype: list
483 '''
484 rp = []
485 for p in self.polygons:
486 if point_in_bounding_box((lat, lon), p.get_bounding_box()):
487 rp.append(p)
488 return rp
490 def get_polygons_within(self, west, east, south, north):
491 '''
492 Get all polygons whose bounding boxes intersect with a bounding box.
494 :param west: Western boundary in decimal degree
495 :type west: float
496 :param east: Eastern boundary in decimal degree
497 :type east: float
498 :param north: Northern boundary in decimal degree
499 :type north: float
500 :param south: Southern boundary in decimal degree
501 :type south: float
502 :returns: List of :class:`~pyrocko.dataset.gshhg.Polygon`
503 :rtype: list
504 '''
506 assert is_valid_bounding_box((west, east, south, north))
508 rp = []
509 for p in self.polygons:
510 if bounding_boxes_overlap(
511 p.get_bounding_box(), (west, east, south, north)):
513 rp.append(p)
514 return rp
517class Coastlines(GSHHGBase):
519 def is_point_on_land(self, lat, lon):
520 '''
521 Check whether a point is on land.
523 Lakes are considered not land.
525 :param lat: Latitude in [deg]
526 :type lat: float
527 :param lon: Longitude in [deg]
528 :type lon: float
530 :rtype: bool
531 '''
533 relevant_polygons = self.get_polygons_at(lat, lon)
534 relevant_polygons.sort()
536 land = False
537 for p in relevant_polygons:
538 if (p.is_land() or p.is_antarctic_grounding_line() or
539 p.is_island_in_lake()):
540 if p.contains_point((lat, lon)):
541 land = True
542 elif (p.is_lake() or p.is_antarctic_icefront() or
543 p.is_pond_in_island_in_lake()):
544 if p.contains_point((lat, lon)):
545 land = False
546 return land
548 def get_land_mask(self, points):
549 '''
550 Check whether given points are on land.
552 Lakes are considered not land.
554 :param points: Array of (lat, lon) pairs, shape (N, 2).
555 :type points: :class:`numpy.ndarray`
556 :return: Boolean land mask
557 :rtype: :class:`numpy.ndarray` of shape (N,)
558 '''
560 west, east, south, north = bounding_box_covering_points(points)
562 relevant_polygons = self.get_polygons_within(west, east, south, north)
563 relevant_polygons.sort()
565 mask = num.zeros(points.shape[0], dtype=bool)
566 for p in relevant_polygons:
567 if (p.is_land() or p.is_antarctic_grounding_line() or
568 p.is_island_in_lake()):
569 land = p.contains_points(points)
570 mask[land] = True
571 elif p.is_lake() or p.is_pond_in_island_in_lake():
572 water = p.contains_points(points)
573 mask[water] = False
574 return mask
576 @classmethod
577 def full(cls):
578 '''
579 Return the full-resolution GSHHG database.
580 '''
581 return cls(cls._get_database('gshhs_f.b'))
583 @classmethod
584 def high(cls):
585 '''
586 Return the high-resolution GSHHG database.
587 '''
588 return cls(cls._get_database('gshhs_h.b'))
590 @classmethod
591 def intermediate(cls):
592 '''
593 Return the intermediate-resolution GSHHG database.
594 '''
595 return cls(cls._get_database('gshhs_i.b'))
597 @classmethod
598 def low(cls):
599 '''
600 Return the low-resolution GSHHG database.
601 '''
602 return cls(cls._get_database('gshhs_l.b'))
604 @classmethod
605 def crude(cls):
606 '''
607 Return the crude-resolution GSHHG database.
608 '''
609 return cls(cls._get_database('gshhs_c.b'))
612class GSHHG(Coastlines):
613 pass
616class Borders(GSHHGBase):
618 @classmethod
619 def full(cls):
620 '''
621 Return the full-resolution GSHHG database.
622 '''
623 return cls(cls._get_database('wdb_borders_f.b'))
625 @classmethod
626 def high(cls):
627 '''
628 Return the high-resolution GSHHG database.
629 '''
630 return cls(cls._get_database('wdb_borders_h.b'))
632 @classmethod
633 def intermediate(cls):
634 '''
635 Return the intermediate-resolution GSHHG database.
636 '''
637 return cls(cls._get_database('wdb_borders_.b'))
639 @classmethod
640 def low(cls):
641 '''
642 Return the low-resolution GSHHG database.
643 '''
644 return cls(cls._get_database('wdb_borders_l.b'))
646 @classmethod
647 def crude(cls):
648 '''
649 Return the crude-resolution GSHHG database.
650 '''
651 return cls(cls._get_database('wdb_borders_c.b'))
654class Rivers(GSHHGBase):
656 @classmethod
657 def full(cls):
658 '''
659 Return the full-resolution GSHHG database.
660 '''
661 return cls(cls._get_database('wdb_rivers_f.b'))
663 @classmethod
664 def high(cls):
665 '''
666 Return the high-resolution GSHHG database.
667 '''
668 return cls(cls._get_database('wdb_rivers_h.b'))
670 @classmethod
671 def intermediate(cls):
672 '''
673 Return the intermediate-resolution GSHHG database.
674 '''
675 return cls(cls._get_database('wdb_rivers_.b'))
677 @classmethod
678 def low(cls):
679 '''
680 Return the low-resolution GSHHG database.
681 '''
682 return cls(cls._get_database('wdb_rivers_l.b'))
684 @classmethod
685 def crude(cls):
686 '''
687 Return the crude-resolution GSHHG database.
688 '''
689 return cls(cls._get_database('wdb_rivers_c.b'))