Coverage for /usr/local/lib/python3.7/dist-packages/chardet/escprober.py: 20%

100

101

######################## BEGIN LICENSE BLOCK ########################

# The Original Code is mozilla.org code.

# The Initial Developer of the Original Code is

# Netscape Communications Corporation.

# Contributor(s):

# Mark Pilgrim - port to Python

# This library is free software; you can redistribute it and/or

# modify it under the terms of the GNU Lesser General Public

# License as published by the Free Software Foundation; either

# version 2.1 of the License, or (at your option) any later version.

# This library is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

# Lesser General Public License for more details.

# You should have received a copy of the GNU Lesser General Public

# License along with this library; if not, write to the Free Software

# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA

# 02110-1301 USA

######################### END LICENSE BLOCK #########################

from .charsetprober import CharSetProber

from .codingstatemachine import CodingStateMachine

from .enums import LanguageFilter, ProbingState, MachineState

from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL,

ISO2022KR_SM_MODEL)

class EscCharSetProber(CharSetProber):

"""

This CharSetProber uses a "code scheme" approach for detecting encodings,

whereby easily recognizable escape or shift sequences are relied on to

identify these encodings.

"""

def __init__(self, lang_filter=None):

super(EscCharSetProber, self).__init__(lang_filter=lang_filter)

self.coding_sm = []

if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:

self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))

self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))

if self.lang_filter & LanguageFilter.JAPANESE:

self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))

if self.lang_filter & LanguageFilter.KOREAN:

self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))

self.active_sm_count = None

self._detected_charset = None

self._detected_language = None

self._state = None

self.reset()

def reset(self):

super(EscCharSetProber, self).reset()

for coding_sm in self.coding_sm:

if not coding_sm:

continue

coding_sm.active = True

coding_sm.reset()

self.active_sm_count = len(self.coding_sm)

self._detected_charset = None

self._detected_language = None

@property

def charset_name(self):

return self._detected_charset

@property

def language(self):

return self._detected_language

def get_confidence(self):

if self._detected_charset:

return 0.99

else:

return 0.00

def feed(self, byte_str):

for c in byte_str:

for coding_sm in self.coding_sm:

if not coding_sm or not coding_sm.active:

continue

coding_state = coding_sm.next_state(c)

if coding_state == MachineState.ERROR:

coding_sm.active = False

self.active_sm_count -= 1

if self.active_sm_count <= 0:

self._state = ProbingState.NOT_ME

return self.state

elif coding_state == MachineState.ITS_ME:

self._state = ProbingState.FOUND_IT

self._detected_charset = coding_sm.get_coding_state_machine()

self._detected_language = coding_sm.language

return self.state

Coverage for /usr/local/lib/python3.7/dist-packages/chardet/escprober.py : 20%

56 statements 11 run 45 missing 0 excluded