1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

# -*- coding: utf-8 -*- 

""" 

This module offers a parser for ISO-8601 strings 

 

It is intended to support all valid date, time and datetime formats per the 

ISO-8601 specification. 

 

..versionadded:: 2.7.0 

""" 

from datetime import datetime, timedelta, time, date 

import calendar 

from dateutil import tz 

 

from functools import wraps 

 

import re 

import six 

 

__all__ = ["isoparse", "isoparser"] 

 

 

def _takes_ascii(f): 

@wraps(f) 

def func(self, str_in, *args, **kwargs): 

# If it's a stream, read the whole thing 

str_in = getattr(str_in, 'read', lambda: str_in)() 

 

# If it's unicode, turn it into bytes, since ISO-8601 only covers ASCII 

if isinstance(str_in, six.text_type): 

# ASCII is the same in UTF-8 

try: 

str_in = str_in.encode('ascii') 

except UnicodeEncodeError as e: 

msg = 'ISO-8601 strings should contain only ASCII characters' 

six.raise_from(ValueError(msg), e) 

 

return f(self, str_in, *args, **kwargs) 

 

return func 

 

 

class isoparser(object): 

def __init__(self, sep=None): 

""" 

:param sep: 

A single character that separates date and time portions. If 

``None``, the parser will accept any single character. 

For strict ISO-8601 adherence, pass ``'T'``. 

""" 

if sep is not None: 

if (len(sep) != 1 or ord(sep) >= 128 or sep in '0123456789'): 

raise ValueError('Separator must be a single, non-numeric ' + 

'ASCII character') 

 

sep = sep.encode('ascii') 

 

self._sep = sep 

 

@_takes_ascii 

def isoparse(self, dt_str): 

""" 

Parse an ISO-8601 datetime string into a :class:`datetime.datetime`. 

 

An ISO-8601 datetime string consists of a date portion, followed 

optionally by a time portion - the date and time portions are separated 

by a single character separator, which is ``T`` in the official 

standard. Incomplete date formats (such as ``YYYY-MM``) may *not* be 

combined with a time portion. 

 

Supported date formats are: 

 

Common: 

 

- ``YYYY`` 

- ``YYYY-MM`` or ``YYYYMM`` 

- ``YYYY-MM-DD`` or ``YYYYMMDD`` 

 

Uncommon: 

 

- ``YYYY-Www`` or ``YYYYWww`` - ISO week (day defaults to 0) 

- ``YYYY-Www-D`` or ``YYYYWwwD`` - ISO week and day 

 

The ISO week and day numbering follows the same logic as 

:func:`datetime.date.isocalendar`. 

 

Supported time formats are: 

 

- ``hh`` 

- ``hh:mm`` or ``hhmm`` 

- ``hh:mm:ss`` or ``hhmmss`` 

- ``hh:mm:ss.sss`` or ``hh:mm:ss.ssssss`` (3-6 sub-second digits) 

 

Midnight is a special case for `hh`, as the standard supports both 

00:00 and 24:00 as a representation. 

 

.. caution:: 

 

Support for fractional components other than seconds is part of the 

ISO-8601 standard, but is not currently implemented in this parser. 

 

Supported time zone offset formats are: 

 

- `Z` (UTC) 

- `±HH:MM` 

- `±HHMM` 

- `±HH` 

 

Offsets will be represented as :class:`dateutil.tz.tzoffset` objects, 

with the exception of UTC, which will be represented as 

:class:`dateutil.tz.tzutc`. Time zone offsets equivalent to UTC (such 

as `+00:00`) will also be represented as :class:`dateutil.tz.tzutc`. 

 

:param dt_str: 

A string or stream containing only an ISO-8601 datetime string 

 

:return: 

Returns a :class:`datetime.datetime` representing the string. 

Unspecified components default to their lowest value. 

 

.. warning:: 

 

As of version 2.7.0, the strictness of the parser should not be 

considered a stable part of the contract. Any valid ISO-8601 string 

that parses correctly with the default settings will continue to 

parse correctly in future versions, but invalid strings that 

currently fail (e.g. ``2017-01-01T00:00+00:00:00``) are not 

guaranteed to continue failing in future versions if they encode 

a valid date. 

 

.. versionadded:: 2.7.0 

""" 

components, pos = self._parse_isodate(dt_str) 

 

if len(dt_str) > pos: 

if self._sep is None or dt_str[pos:pos + 1] == self._sep: 

components += self._parse_isotime(dt_str[pos + 1:]) 

else: 

raise ValueError('String contains unknown ISO components') 

 

return datetime(*components) 

 

@_takes_ascii 

def parse_isodate(self, datestr): 

""" 

Parse the date portion of an ISO string. 

 

:param datestr: 

The string portion of an ISO string, without a separator 

 

:return: 

Returns a :class:`datetime.date` object 

""" 

components, pos = self._parse_isodate(datestr) 

if pos < len(datestr): 

raise ValueError('String contains unknown ISO ' + 

'components: {}'.format(datestr)) 

return date(*components) 

 

@_takes_ascii 

def parse_isotime(self, timestr): 

""" 

Parse the time portion of an ISO string. 

 

:param timestr: 

The time portion of an ISO string, without a separator 

 

:return: 

Returns a :class:`datetime.time` object 

""" 

return time(*self._parse_isotime(timestr)) 

 

@_takes_ascii 

def parse_tzstr(self, tzstr, zero_as_utc=True): 

""" 

Parse a valid ISO time zone string. 

 

See :func:`isoparser.isoparse` for details on supported formats. 

 

:param tzstr: 

A string representing an ISO time zone offset 

 

:param zero_as_utc: 

Whether to return :class:`dateutil.tz.tzutc` for zero-offset zones 

 

:return: 

Returns :class:`dateutil.tz.tzoffset` for offsets and 

:class:`dateutil.tz.tzutc` for ``Z`` and (if ``zero_as_utc`` is 

specified) offsets equivalent to UTC. 

""" 

return self._parse_tzstr(tzstr, zero_as_utc=zero_as_utc) 

 

# Constants 

_MICROSECOND_END_REGEX = re.compile(b'[-+Z]+') 

_DATE_SEP = b'-' 

_TIME_SEP = b':' 

_MICRO_SEP = b'.' 

 

def _parse_isodate(self, dt_str): 

try: 

return self._parse_isodate_common(dt_str) 

except ValueError: 

return self._parse_isodate_uncommon(dt_str) 

 

def _parse_isodate_common(self, dt_str): 

len_str = len(dt_str) 

components = [1, 1, 1] 

 

if len_str < 4: 

raise ValueError('ISO string too short') 

 

# Year 

components[0] = int(dt_str[0:4]) 

pos = 4 

if pos >= len_str: 

return components, pos 

 

has_sep = dt_str[pos:pos + 1] == self._DATE_SEP 

if has_sep: 

pos += 1 

 

# Month 

if len_str - pos < 2: 

raise ValueError('Invalid common month') 

 

components[1] = int(dt_str[pos:pos + 2]) 

pos += 2 

 

if pos >= len_str: 

if has_sep: 

return components, pos 

else: 

raise ValueError('Invalid ISO format') 

 

if has_sep: 

if dt_str[pos:pos + 1] != self._DATE_SEP: 

raise ValueError('Invalid separator in ISO string') 

pos += 1 

 

# Day 

if len_str - pos < 2: 

raise ValueError('Invalid common day') 

components[2] = int(dt_str[pos:pos + 2]) 

return components, pos + 2 

 

def _parse_isodate_uncommon(self, dt_str): 

if len(dt_str) < 4: 

raise ValueError('ISO string too short') 

 

# All ISO formats start with the year 

year = int(dt_str[0:4]) 

 

has_sep = dt_str[4:5] == self._DATE_SEP 

 

pos = 4 + has_sep # Skip '-' if it's there 

if dt_str[pos:pos + 1] == b'W': 

# YYYY-?Www-?D? 

pos += 1 

weekno = int(dt_str[pos:pos + 2]) 

pos += 2 

 

dayno = 1 

if len(dt_str) > pos: 

if (dt_str[pos:pos + 1] == self._DATE_SEP) != has_sep: 

raise ValueError('Inconsistent use of dash separator') 

 

pos += has_sep 

 

dayno = int(dt_str[pos:pos + 1]) 

pos += 1 

 

base_date = self._calculate_weekdate(year, weekno, dayno) 

else: 

# YYYYDDD or YYYY-DDD 

if len(dt_str) - pos < 3: 

raise ValueError('Invalid ordinal day') 

 

ordinal_day = int(dt_str[pos:pos + 3]) 

pos += 3 

 

if ordinal_day < 1 or ordinal_day > (365 + calendar.isleap(year)): 

raise ValueError('Invalid ordinal day' + 

' {} for year {}'.format(ordinal_day, year)) 

 

base_date = date(year, 1, 1) + timedelta(days=ordinal_day - 1) 

 

components = [base_date.year, base_date.month, base_date.day] 

return components, pos 

 

def _calculate_weekdate(self, year, week, day): 

""" 

Calculate the day of corresponding to the ISO year-week-day calendar. 

 

This function is effectively the inverse of 

:func:`datetime.date.isocalendar`. 

 

:param year: 

The year in the ISO calendar 

 

:param week: 

The week in the ISO calendar - range is [1, 53] 

 

:param day: 

The day in the ISO calendar - range is [1 (MON), 7 (SUN)] 

 

:return: 

Returns a :class:`datetime.date` 

""" 

if not 0 < week < 54: 

raise ValueError('Invalid week: {}'.format(week)) 

 

if not 0 < day < 8: # Range is 1-7 

raise ValueError('Invalid weekday: {}'.format(day)) 

 

# Get week 1 for the specific year: 

jan_4 = date(year, 1, 4) # Week 1 always has January 4th in it 

week_1 = jan_4 - timedelta(days=jan_4.isocalendar()[2] - 1) 

 

# Now add the specific number of weeks and days to get what we want 

week_offset = (week - 1) * 7 + (day - 1) 

return week_1 + timedelta(days=week_offset) 

 

def _parse_isotime(self, timestr): 

len_str = len(timestr) 

components = [0, 0, 0, 0, None] 

pos = 0 

comp = -1 

 

if len(timestr) < 2: 

raise ValueError('ISO time too short') 

 

has_sep = len_str >= 3 and timestr[2:3] == self._TIME_SEP 

 

while pos < len_str and comp < 5: 

comp += 1 

 

if timestr[pos:pos + 1] in b'-+Z': 

# Detect time zone boundary 

components[-1] = self._parse_tzstr(timestr[pos:]) 

pos = len_str 

break 

 

if comp < 3: 

# Hour, minute, second 

components[comp] = int(timestr[pos:pos + 2]) 

pos += 2 

if (has_sep and pos < len_str and 

timestr[pos:pos + 1] == self._TIME_SEP): 

pos += 1 

 

if comp == 3: 

# Microsecond 

if timestr[pos:pos + 1] != self._MICRO_SEP: 

continue 

 

pos += 1 

us_str = self._MICROSECOND_END_REGEX.split(timestr[pos:pos + 6], 

1)[0] 

 

components[comp] = int(us_str) * 10**(6 - len(us_str)) 

pos += len(us_str) 

 

if pos < len_str: 

raise ValueError('Unused components in ISO string') 

 

if components[0] == 24: 

# Standard supports 00:00 and 24:00 as representations of midnight 

if any(component != 0 for component in components[1:4]): 

raise ValueError('Hour may only be 24 at 24:00:00.000') 

components[0] = 0 

 

return components 

 

def _parse_tzstr(self, tzstr, zero_as_utc=True): 

if tzstr == b'Z': 

return tz.tzutc() 

 

if len(tzstr) not in {3, 5, 6}: 

raise ValueError('Time zone offset must be 1, 3, 5 or 6 characters') 

 

if tzstr[0:1] == b'-': 

mult = -1 

elif tzstr[0:1] == b'+': 

mult = 1 

else: 

raise ValueError('Time zone offset requires sign') 

 

hours = int(tzstr[1:3]) 

if len(tzstr) == 3: 

minutes = 0 

else: 

minutes = int(tzstr[(4 if tzstr[3:4] == self._TIME_SEP else 3):]) 

 

if zero_as_utc and hours == 0 and minutes == 0: 

return tz.tzutc() 

else: 

if minutes > 59: 

raise ValueError('Invalid minutes in time zone offset') 

 

if hours > 23: 

raise ValueError('Invalid hours in time zone offset') 

 

return tz.tzoffset(None, mult * (hours * 60 + minutes) * 60) 

 

 

DEFAULT_ISOPARSER = isoparser() 

isoparse = DEFAULT_ISOPARSER.isoparse