1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

""" 

Module that allows plotting of string "category" data. i.e. 

``plot(['d', 'f', 'a'],[1, 2, 3])`` will plot three points with x-axis 

values of 'd', 'f', 'a'. 

 

See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an 

example. 

 

The module uses Matplotlib's `matplotlib.units` mechanism to convert from 

strings to integers, provides a tick locator and formatter, and the 

class:`.UnitData` that creates and stores the string-to-integer mapping. 

""" 

 

from collections import OrderedDict 

import itertools 

 

import numpy as np 

 

import matplotlib.units as units 

import matplotlib.ticker as ticker 

 

 

class StrCategoryConverter(units.ConversionInterface): 

@staticmethod 

def convert(value, unit, axis): 

"""Converts strings in value to floats using 

mapping information store in the unit object. 

 

Parameters 

---------- 

value : string or iterable 

value or list of values to be converted 

unit : :class:`.UnitData` 

object string unit information for value 

axis : :class:`~matplotlib.Axis.axis` 

axis on which the converted value is plotted 

 

Returns 

------- 

mapped_ value : float or ndarray[float] 

 

.. note:: axis is not used in this function 

""" 

# dtype = object preserves numerical pass throughs 

values = np.atleast_1d(np.array(value, dtype=object)) 

 

# pass through sequence of non binary numbers 

if all((units.ConversionInterface.is_numlike(v) and 

not isinstance(v, (str, bytes))) for v in values): 

return np.asarray(values, dtype=float) 

 

# force an update so it also does type checking 

unit.update(values) 

 

str2idx = np.vectorize(unit._mapping.__getitem__, 

otypes=[float]) 

 

mapped_value = str2idx(values) 

return mapped_value 

 

@staticmethod 

def axisinfo(unit, axis): 

"""Sets the default axis ticks and labels 

 

Parameters 

--------- 

unit : :class:`.UnitData` 

object string unit information for value 

axis : :class:`~matplotlib.Axis.axis` 

axis for which information is being set 

 

Returns 

------- 

:class:~matplotlib.units.AxisInfo~ 

Information to support default tick labeling 

 

.. note: axis is not used 

""" 

# locator and formatter take mapping dict because 

# args need to be pass by reference for updates 

majloc = StrCategoryLocator(unit._mapping) 

majfmt = StrCategoryFormatter(unit._mapping) 

return units.AxisInfo(majloc=majloc, majfmt=majfmt) 

 

@staticmethod 

def default_units(data, axis): 

"""Sets and updates the :class:`~matplotlib.Axis.axis` units. 

 

Parameters 

---------- 

data : string or iterable of strings 

axis : :class:`~matplotlib.Axis.axis` 

axis on which the data is plotted 

 

Returns 

------- 

class:~.UnitData~ 

object storing string to integer mapping 

""" 

# the conversion call stack is supposed to be 

# default_units->axis_info->convert 

if axis.units is None: 

axis.set_units(UnitData(data)) 

else: 

axis.units.update(data) 

return axis.units 

 

 

class StrCategoryLocator(ticker.Locator): 

"""tick at every integer mapping of the string data""" 

def __init__(self, units_mapping): 

""" 

Parameters 

----------- 

units_mapping : Dict[str, int] 

string:integer mapping 

""" 

self._units = units_mapping 

 

def __call__(self): 

return list(self._units.values()) 

 

def tick_values(self, vmin, vmax): 

return self() 

 

 

class StrCategoryFormatter(ticker.Formatter): 

"""String representation of the data at every tick""" 

def __init__(self, units_mapping): 

""" 

Parameters 

---------- 

units_mapping : Dict[Str, int] 

string:integer mapping 

""" 

self._units = units_mapping 

 

def __call__(self, x, pos=None): 

if pos is None: 

return "" 

r_mapping = {v: StrCategoryFormatter._text(k) 

for k, v in self._units.items()} 

return r_mapping.get(int(np.round(x)), '') 

 

@staticmethod 

def _text(value): 

"""Converts text values into utf-8 or ascii strings. 

""" 

if isinstance(value, bytes): 

value = value.decode(encoding='utf-8') 

elif not isinstance(value, str): 

value = str(value) 

return value 

 

 

class UnitData(object): 

def __init__(self, data=None): 

""" 

Create mapping between unique categorical values and integer ids. 

 

Parameters 

---------- 

data: iterable 

sequence of string values 

""" 

self._mapping = OrderedDict() 

self._counter = itertools.count() 

if data is not None: 

self.update(data) 

 

def update(self, data): 

"""Maps new values to integer identifiers. 

 

Parameters 

---------- 

data: iterable 

sequence of string values 

 

Raises 

------ 

TypeError 

If the value in data is not a string, unicode, bytes type 

""" 

data = np.atleast_1d(np.array(data, dtype=object)) 

 

for val in OrderedDict.fromkeys(data): 

if not isinstance(val, (str, bytes)): 

raise TypeError("{val!r} is not a string".format(val=val)) 

if val not in self._mapping: 

self._mapping[val] = next(self._counter) 

 

 

# Connects the convertor to matplotlib 

units.registry[str] = StrCategoryConverter() 

units.registry[np.str_] = StrCategoryConverter() 

units.registry[bytes] = StrCategoryConverter() 

units.registry[np.bytes_] = StrCategoryConverter()