Coverage for /usr/local/lib/python3.11/dist-packages/grond/clustering/dbscan.py: 90%
90 statements
« prev ^ index » next coverage.py v6.5.0, created at 2024-11-27 15:15 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2024-11-27 15:15 +0000
1# https://pyrocko.org/grond - GPLv3
2#
3# The Grond Developers, 21st Century
4'''This module provides basic cluster processing for seismic events.'''
6import collections
7import numpy as num
9epsilon = 1e-6
10km = 1000.
13class DBEvent:
14 '''
15 Event condition with respect to the DBSCAN clustering.
17 :param event_id: event index (sequential number)
18 :param event_type: type of the event according to clustering
19 :param directly_reachable: list of ids of events directly reachable
20 '''
22 def __init__(self, event_id, event_type, directly_reachables):
23 self.event_id = event_id
24 self.event_type = event_type
25 self.directly_reachables = directly_reachables
28class ClusterError(Exception):
29 pass
32def dbscan(simmat, nmin, eps, ncluster_limit):
33 '''
34 Apply DBSCAN algorithm, reading a similarity matrix and returning a list of
35 events clusters
37 :param simmat: similarity matrix (numpy matrix)
38 :param nmin: minimum number of neighbours to define a cluster
39 :param eps: maximum distance to search for neighbours
40 '''
41 CORE = 1
42 NOT_CORE = 2
43 EDGE = 3
44 ISLE = 4
45 REACHED = 5
47 nev = len(simmat)
48 clusterevents = []
49 for i in range(nev):
50 event_id = i
51 directly_reachables = []
52 for j in range(nev):
53 if simmat[i, j] <= eps:
54 if (i != j):
55 directly_reachables.append(j)
57 if len(directly_reachables) >= nmin:
58 event_type = CORE
59 else:
60 event_type = NOT_CORE
62 clusterevents.append(
63 DBEvent(event_id, event_type, directly_reachables))
65 for i in range(nev):
66 for j in range(nev):
67 if (i in clusterevents[j].directly_reachables) \
68 and (clusterevents[j].event_type == CORE):
70 if clusterevents[i].event_type == NOT_CORE:
71 clusterevents[i].event_type = EDGE
73 if clusterevents[i].event_type == NOT_CORE:
74 clusterevents[i].event_type = ISLE
76 eventsclusters = num.zeros((nev), dtype=int)
77 actualcluster = -1
78 for i in range(nev):
79 if clusterevents[i].event_type == ISLE:
80 eventsclusters[i] = -1
81 else:
82 if clusterevents[i].event_type == CORE:
83 actualcluster = actualcluster + 1
84 reachedevents = []
85 pointingevents = []
87 eventsclusters[i] = actualcluster
88 reachedevents.append(i)
89 pointingevents.append(i)
91 while len(pointingevents) > 0:
92 newpointingevents = []
93 for j in pointingevents:
94 for k in clusterevents[j].directly_reachables:
95 if clusterevents[k].event_type == CORE:
96 reachedevents.append(k)
97 newpointingevents.append(k)
98 eventsclusters[k] = actualcluster
99 clusterevents[k].event_type = REACHED
100 elif clusterevents[k].event_type == EDGE:
101 reachedevents.append(k)
102 eventsclusters[k] = actualcluster
103 clusterevents[k].event_type = REACHED
105 pointingevents = []
106 for newpointingevent in newpointingevents:
107 pointingevents.append(newpointingevent)
109 n_clusters = actualcluster + 1
110# resorting data clusters (noise remains as -1)
112 clustersizes = []
113 for icl in range(n_clusters):
114 lencl = len([ev for ev in eventsclusters if ev == icl])
115 clustersizes.append([icl, lencl])
117 resorted_clustersizes = sorted(
118 clustersizes, key=lambda tup: tup[1], reverse=True)
120 resorting_dict = {}
121 resorting_dict[-1] = -1
123 for icl in range(n_clusters):
124 if ncluster_limit is None or icl < ncluster_limit:
125 resorting_dict[resorted_clustersizes[icl][0]] = icl
126 else:
127 resorting_dict[resorted_clustersizes[icl][0]] = -1
129 for ievcl, evcl in enumerate(eventsclusters):
130 eventsclusters[ievcl] = resorting_dict[evcl]
132 return eventsclusters
135def get_clusters(events, eventsclusters):
136 '''
137 Provided a list of events and a list of cluster labels of equal size
138 returns a dictionary of clusters with all their events
140 i.e.
142 cluster -1 -> [event1, event4, eventx]
143 cluster 0 -> [event2, event3, eventn]
144 '''
146 origclusters = {}
147 if len(events) != len(eventsclusters):
148 raise ClusterError(
149 'number of events different from number of cluster labels: %s, %s'
150 % len(events), len(eventsclusters))
152 for iev, evcl in enumerate(eventsclusters):
153 if evcl not in origclusters:
154 origclusters[evcl] = [events[iev]]
155 else:
156 origclusters[evcl].append(events[iev])
158 return collections.OrderedDict(sorted(origclusters.items()))