Coverage for /usr/local/lib/python3.11/dist-packages/grond/clustering/dbscan.py: 90%

90 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2024-11-27 15:15 +0000

1# https://pyrocko.org/grond - GPLv3 

2# 

3# The Grond Developers, 21st Century 

4'''This module provides basic cluster processing for seismic events.''' 

5 

6import collections 

7import numpy as num 

8 

9epsilon = 1e-6 

10km = 1000. 

11 

12 

13class DBEvent: 

14 ''' 

15 Event condition with respect to the DBSCAN clustering. 

16 

17 :param event_id: event index (sequential number) 

18 :param event_type: type of the event according to clustering 

19 :param directly_reachable: list of ids of events directly reachable 

20 ''' 

21 

22 def __init__(self, event_id, event_type, directly_reachables): 

23 self.event_id = event_id 

24 self.event_type = event_type 

25 self.directly_reachables = directly_reachables 

26 

27 

28class ClusterError(Exception): 

29 pass 

30 

31 

32def dbscan(simmat, nmin, eps, ncluster_limit): 

33 ''' 

34 Apply DBSCAN algorithm, reading a similarity matrix and returning a list of 

35 events clusters 

36 

37 :param simmat: similarity matrix (numpy matrix) 

38 :param nmin: minimum number of neighbours to define a cluster 

39 :param eps: maximum distance to search for neighbours 

40 ''' 

41 CORE = 1 

42 NOT_CORE = 2 

43 EDGE = 3 

44 ISLE = 4 

45 REACHED = 5 

46 

47 nev = len(simmat) 

48 clusterevents = [] 

49 for i in range(nev): 

50 event_id = i 

51 directly_reachables = [] 

52 for j in range(nev): 

53 if simmat[i, j] <= eps: 

54 if (i != j): 

55 directly_reachables.append(j) 

56 

57 if len(directly_reachables) >= nmin: 

58 event_type = CORE 

59 else: 

60 event_type = NOT_CORE 

61 

62 clusterevents.append( 

63 DBEvent(event_id, event_type, directly_reachables)) 

64 

65 for i in range(nev): 

66 for j in range(nev): 

67 if (i in clusterevents[j].directly_reachables) \ 

68 and (clusterevents[j].event_type == CORE): 

69 

70 if clusterevents[i].event_type == NOT_CORE: 

71 clusterevents[i].event_type = EDGE 

72 

73 if clusterevents[i].event_type == NOT_CORE: 

74 clusterevents[i].event_type = ISLE 

75 

76 eventsclusters = num.zeros((nev), dtype=int) 

77 actualcluster = -1 

78 for i in range(nev): 

79 if clusterevents[i].event_type == ISLE: 

80 eventsclusters[i] = -1 

81 else: 

82 if clusterevents[i].event_type == CORE: 

83 actualcluster = actualcluster + 1 

84 reachedevents = [] 

85 pointingevents = [] 

86 

87 eventsclusters[i] = actualcluster 

88 reachedevents.append(i) 

89 pointingevents.append(i) 

90 

91 while len(pointingevents) > 0: 

92 newpointingevents = [] 

93 for j in pointingevents: 

94 for k in clusterevents[j].directly_reachables: 

95 if clusterevents[k].event_type == CORE: 

96 reachedevents.append(k) 

97 newpointingevents.append(k) 

98 eventsclusters[k] = actualcluster 

99 clusterevents[k].event_type = REACHED 

100 elif clusterevents[k].event_type == EDGE: 

101 reachedevents.append(k) 

102 eventsclusters[k] = actualcluster 

103 clusterevents[k].event_type = REACHED 

104 

105 pointingevents = [] 

106 for newpointingevent in newpointingevents: 

107 pointingevents.append(newpointingevent) 

108 

109 n_clusters = actualcluster + 1 

110# resorting data clusters (noise remains as -1) 

111 

112 clustersizes = [] 

113 for icl in range(n_clusters): 

114 lencl = len([ev for ev in eventsclusters if ev == icl]) 

115 clustersizes.append([icl, lencl]) 

116 

117 resorted_clustersizes = sorted( 

118 clustersizes, key=lambda tup: tup[1], reverse=True) 

119 

120 resorting_dict = {} 

121 resorting_dict[-1] = -1 

122 

123 for icl in range(n_clusters): 

124 if ncluster_limit is None or icl < ncluster_limit: 

125 resorting_dict[resorted_clustersizes[icl][0]] = icl 

126 else: 

127 resorting_dict[resorted_clustersizes[icl][0]] = -1 

128 

129 for ievcl, evcl in enumerate(eventsclusters): 

130 eventsclusters[ievcl] = resorting_dict[evcl] 

131 

132 return eventsclusters 

133 

134 

135def get_clusters(events, eventsclusters): 

136 ''' 

137 Provided a list of events and a list of cluster labels of equal size 

138 returns a dictionary of clusters with all their events 

139 

140 i.e. 

141 

142 cluster -1 -> [event1, event4, eventx] 

143 cluster 0 -> [event2, event3, eventn] 

144 ''' 

145 

146 origclusters = {} 

147 if len(events) != len(eventsclusters): 

148 raise ClusterError( 

149 'number of events different from number of cluster labels: %s, %s' 

150 % len(events), len(eventsclusters)) 

151 

152 for iev, evcl in enumerate(eventsclusters): 

153 if evcl not in origclusters: 

154 origclusters[evcl] = [events[iev]] 

155 else: 

156 origclusters[evcl].append(events[iev]) 

157 

158 return collections.OrderedDict(sorted(origclusters.items()))