1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

######################## BEGIN LICENSE BLOCK ######################## 

# The Original Code is mozilla.org code. 

# 

# The Initial Developer of the Original Code is 

# Netscape Communications Corporation. 

# Portions created by the Initial Developer are Copyright (C) 1998 

# the Initial Developer. All Rights Reserved. 

# 

# Contributor(s): 

# Mark Pilgrim - port to Python 

# 

# This library is free software; you can redistribute it and/or 

# modify it under the terms of the GNU Lesser General Public 

# License as published by the Free Software Foundation; either 

# version 2.1 of the License, or (at your option) any later version. 

# 

# This library is distributed in the hope that it will be useful, 

# but WITHOUT ANY WARRANTY; without even the implied warranty of 

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 

# Lesser General Public License for more details. 

# 

# You should have received a copy of the GNU Lesser General Public 

# License along with this library; if not, write to the Free Software 

# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 

# 02110-1301 USA 

######################### END LICENSE BLOCK ######################### 

 

from .mbcharsetprober import MultiByteCharSetProber 

from .codingstatemachine import CodingStateMachine 

from .chardistribution import EUCTWDistributionAnalysis 

from .mbcssm import EUCTW_SM_MODEL 

 

class EUCTWProber(MultiByteCharSetProber): 

def __init__(self): 

super(EUCTWProber, self).__init__() 

self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) 

self.distribution_analyzer = EUCTWDistributionAnalysis() 

self.reset() 

 

@property 

def charset_name(self): 

return "EUC-TW" 

 

@property 

def language(self): 

return "Taiwan"