Coverage for /usr/local/lib/python3.11/dist-packages/grond/clustering/dbscan.py: 90%
90 statements
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 16:25 +0000
« prev ^ index » next coverage.py v6.5.0, created at 2023-10-26 16:25 +0000
1'''This module provides basic cluster processing for seismic events.'''
3from __future__ import print_function
4import collections
5import numpy as num
7epsilon = 1e-6
8km = 1000.
11class DBEvent:
12 '''
13 Event condition with respect to the DBSCAN clustering.
15 :param event_id: event index (sequential number)
16 :param event_type: type of the event according to clustering
17 :param directly_reachable: list of ids of events directly reachable
18 '''
20 def __init__(self, event_id, event_type, directly_reachables):
21 self.event_id = event_id
22 self.event_type = event_type
23 self.directly_reachables = directly_reachables
26class ClusterError(Exception):
27 pass
30def dbscan(simmat, nmin, eps, ncluster_limit):
31 '''
32 Apply DBSCAN algorithm, reading a similarity matrix and returning a list of
33 events clusters
35 :param simmat: similarity matrix (numpy matrix)
36 :param nmin: minimum number of neighbours to define a cluster
37 :param eps: maximum distance to search for neighbours
38 '''
39 CORE = 1
40 NOT_CORE = 2
41 EDGE = 3
42 ISLE = 4
43 REACHED = 5
45 nev = len(simmat)
46 clusterevents = []
47 for i in range(nev):
48 event_id = i
49 directly_reachables = []
50 for j in range(nev):
51 if simmat[i, j] <= eps:
52 if (i != j):
53 directly_reachables.append(j)
55 if len(directly_reachables) >= nmin:
56 event_type = CORE
57 else:
58 event_type = NOT_CORE
60 clusterevents.append(
61 DBEvent(event_id, event_type, directly_reachables))
63 for i in range(nev):
64 for j in range(nev):
65 if (i in clusterevents[j].directly_reachables) \
66 and (clusterevents[j].event_type == CORE):
68 if clusterevents[i].event_type == NOT_CORE:
69 clusterevents[i].event_type = EDGE
71 if clusterevents[i].event_type == NOT_CORE:
72 clusterevents[i].event_type = ISLE
74 eventsclusters = num.zeros((nev), dtype=int)
75 actualcluster = -1
76 for i in range(nev):
77 if clusterevents[i].event_type == ISLE:
78 eventsclusters[i] = -1
79 else:
80 if clusterevents[i].event_type == CORE:
81 actualcluster = actualcluster + 1
82 reachedevents = []
83 pointingevents = []
85 eventsclusters[i] = actualcluster
86 reachedevents.append(i)
87 pointingevents.append(i)
89 while len(pointingevents) > 0:
90 newpointingevents = []
91 for j in pointingevents:
92 for k in clusterevents[j].directly_reachables:
93 if clusterevents[k].event_type == CORE:
94 reachedevents.append(k)
95 newpointingevents.append(k)
96 eventsclusters[k] = actualcluster
97 clusterevents[k].event_type = REACHED
98 elif clusterevents[k].event_type == EDGE:
99 reachedevents.append(k)
100 eventsclusters[k] = actualcluster
101 clusterevents[k].event_type = REACHED
103 pointingevents = []
104 for newpointingevent in newpointingevents:
105 pointingevents.append(newpointingevent)
107 n_clusters = actualcluster + 1
108# resorting data clusters (noise remains as -1)
110 clustersizes = []
111 for icl in range(n_clusters):
112 lencl = len([ev for ev in eventsclusters if ev == icl])
113 clustersizes.append([icl, lencl])
115 resorted_clustersizes = sorted(
116 clustersizes, key=lambda tup: tup[1], reverse=True)
118 resorting_dict = {}
119 resorting_dict[-1] = -1
121 for icl in range(n_clusters):
122 if ncluster_limit is None or icl < ncluster_limit:
123 resorting_dict[resorted_clustersizes[icl][0]] = icl
124 else:
125 resorting_dict[resorted_clustersizes[icl][0]] = -1
127 for ievcl, evcl in enumerate(eventsclusters):
128 eventsclusters[ievcl] = resorting_dict[evcl]
130 return eventsclusters
133def get_clusters(events, eventsclusters):
134 '''
135 Provided a list of events and a list of cluster labels of equal size
136 returns a dictionary of clusters with all their events
138 i.e.
140 cluster -1 -> [event1, event4, eventx]
141 cluster 0 -> [event2, event3, eventn]
142 '''
144 origclusters = {}
145 if len(events) != len(eventsclusters):
146 raise ClusterError(
147 'number of events different from number of cluster labels: %s, %s'
148 % len(events), len(eventsclusters))
150 for iev, evcl in enumerate(eventsclusters):
151 if evcl not in origclusters:
152 origclusters[evcl] = [events[iev]]
153 else:
154 origclusters[evcl].append(events[iev])
156 return collections.OrderedDict(sorted(origclusters.items()))