Coverage for /usr/local/lib/python3.11/dist-packages/grond/clustering/dbscan.py: 90%

90 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-10-26 16:25 +0000

1'''This module provides basic cluster processing for seismic events.''' 

2 

3from __future__ import print_function 

4import collections 

5import numpy as num 

6 

7epsilon = 1e-6 

8km = 1000. 

9 

10 

11class DBEvent: 

12 ''' 

13 Event condition with respect to the DBSCAN clustering. 

14 

15 :param event_id: event index (sequential number) 

16 :param event_type: type of the event according to clustering 

17 :param directly_reachable: list of ids of events directly reachable 

18 ''' 

19 

20 def __init__(self, event_id, event_type, directly_reachables): 

21 self.event_id = event_id 

22 self.event_type = event_type 

23 self.directly_reachables = directly_reachables 

24 

25 

26class ClusterError(Exception): 

27 pass 

28 

29 

30def dbscan(simmat, nmin, eps, ncluster_limit): 

31 ''' 

32 Apply DBSCAN algorithm, reading a similarity matrix and returning a list of 

33 events clusters 

34 

35 :param simmat: similarity matrix (numpy matrix) 

36 :param nmin: minimum number of neighbours to define a cluster 

37 :param eps: maximum distance to search for neighbours 

38 ''' 

39 CORE = 1 

40 NOT_CORE = 2 

41 EDGE = 3 

42 ISLE = 4 

43 REACHED = 5 

44 

45 nev = len(simmat) 

46 clusterevents = [] 

47 for i in range(nev): 

48 event_id = i 

49 directly_reachables = [] 

50 for j in range(nev): 

51 if simmat[i, j] <= eps: 

52 if (i != j): 

53 directly_reachables.append(j) 

54 

55 if len(directly_reachables) >= nmin: 

56 event_type = CORE 

57 else: 

58 event_type = NOT_CORE 

59 

60 clusterevents.append( 

61 DBEvent(event_id, event_type, directly_reachables)) 

62 

63 for i in range(nev): 

64 for j in range(nev): 

65 if (i in clusterevents[j].directly_reachables) \ 

66 and (clusterevents[j].event_type == CORE): 

67 

68 if clusterevents[i].event_type == NOT_CORE: 

69 clusterevents[i].event_type = EDGE 

70 

71 if clusterevents[i].event_type == NOT_CORE: 

72 clusterevents[i].event_type = ISLE 

73 

74 eventsclusters = num.zeros((nev), dtype=int) 

75 actualcluster = -1 

76 for i in range(nev): 

77 if clusterevents[i].event_type == ISLE: 

78 eventsclusters[i] = -1 

79 else: 

80 if clusterevents[i].event_type == CORE: 

81 actualcluster = actualcluster + 1 

82 reachedevents = [] 

83 pointingevents = [] 

84 

85 eventsclusters[i] = actualcluster 

86 reachedevents.append(i) 

87 pointingevents.append(i) 

88 

89 while len(pointingevents) > 0: 

90 newpointingevents = [] 

91 for j in pointingevents: 

92 for k in clusterevents[j].directly_reachables: 

93 if clusterevents[k].event_type == CORE: 

94 reachedevents.append(k) 

95 newpointingevents.append(k) 

96 eventsclusters[k] = actualcluster 

97 clusterevents[k].event_type = REACHED 

98 elif clusterevents[k].event_type == EDGE: 

99 reachedevents.append(k) 

100 eventsclusters[k] = actualcluster 

101 clusterevents[k].event_type = REACHED 

102 

103 pointingevents = [] 

104 for newpointingevent in newpointingevents: 

105 pointingevents.append(newpointingevent) 

106 

107 n_clusters = actualcluster + 1 

108# resorting data clusters (noise remains as -1) 

109 

110 clustersizes = [] 

111 for icl in range(n_clusters): 

112 lencl = len([ev for ev in eventsclusters if ev == icl]) 

113 clustersizes.append([icl, lencl]) 

114 

115 resorted_clustersizes = sorted( 

116 clustersizes, key=lambda tup: tup[1], reverse=True) 

117 

118 resorting_dict = {} 

119 resorting_dict[-1] = -1 

120 

121 for icl in range(n_clusters): 

122 if ncluster_limit is None or icl < ncluster_limit: 

123 resorting_dict[resorted_clustersizes[icl][0]] = icl 

124 else: 

125 resorting_dict[resorted_clustersizes[icl][0]] = -1 

126 

127 for ievcl, evcl in enumerate(eventsclusters): 

128 eventsclusters[ievcl] = resorting_dict[evcl] 

129 

130 return eventsclusters 

131 

132 

133def get_clusters(events, eventsclusters): 

134 ''' 

135 Provided a list of events and a list of cluster labels of equal size 

136 returns a dictionary of clusters with all their events 

137 

138 i.e. 

139 

140 cluster -1 -> [event1, event4, eventx] 

141 cluster 0 -> [event2, event3, eventn] 

142 ''' 

143 

144 origclusters = {} 

145 if len(events) != len(eventsclusters): 

146 raise ClusterError( 

147 'number of events different from number of cluster labels: %s, %s' 

148 % len(events), len(eventsclusters)) 

149 

150 for iev, evcl in enumerate(eventsclusters): 

151 if evcl not in origclusters: 

152 origclusters[evcl] = [events[iev]] 

153 else: 

154 origclusters[evcl].append(events[iev]) 

155 

156 return collections.OrderedDict(sorted(origclusters.items()))