1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

""" 

All of the Enums that are used throughout the chardet package. 

 

:author: Dan Blanchard (dan.blanchard@gmail.com) 

""" 

 

 

class InputState(object): 

""" 

This enum represents the different states a universal detector can be in. 

""" 

PURE_ASCII = 0 

ESC_ASCII = 1 

HIGH_BYTE = 2 

 

 

class LanguageFilter(object): 

""" 

This enum represents the different language filters we can apply to a 

``UniversalDetector``. 

""" 

CHINESE_SIMPLIFIED = 0x01 

CHINESE_TRADITIONAL = 0x02 

JAPANESE = 0x04 

KOREAN = 0x08 

NON_CJK = 0x10 

ALL = 0x1F 

CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL 

CJK = CHINESE | JAPANESE | KOREAN 

 

 

class ProbingState(object): 

""" 

This enum represents the different states a prober can be in. 

""" 

DETECTING = 0 

FOUND_IT = 1 

NOT_ME = 2 

 

 

class MachineState(object): 

""" 

This enum represents the different states a state machine can be in. 

""" 

START = 0 

ERROR = 1 

ITS_ME = 2 

 

 

class SequenceLikelihood(object): 

""" 

This enum represents the likelihood of a character following the previous one. 

""" 

NEGATIVE = 0 

UNLIKELY = 1 

LIKELY = 2 

POSITIVE = 3 

 

@classmethod 

def get_num_categories(cls): 

""":returns: The number of likelihood categories in the enum.""" 

return 4 

 

 

class CharacterCategory(object): 

""" 

This enum represents the different categories language models for 

``SingleByteCharsetProber`` put characters into. 

 

Anything less than CONTROL is considered a letter. 

""" 

UNDEFINED = 255 

LINE_BREAK = 254 

SYMBOL = 253 

DIGIT = 252 

CONTROL = 251