1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

from __future__ import division, absolute_import, print_function 

 

import numpy as np 

from .numeric import uint8, ndarray, dtype 

from numpy.compat import ( 

long, basestring, os_fspath, contextlib_nullcontext, is_pathlib_path 

) 

from numpy.core.overrides import set_module 

 

__all__ = ['memmap'] 

 

dtypedescr = dtype 

valid_filemodes = ["r", "c", "r+", "w+"] 

writeable_filemodes = ["r+", "w+"] 

 

mode_equivalents = { 

"readonly":"r", 

"copyonwrite":"c", 

"readwrite":"r+", 

"write":"w+" 

} 

 

 

@set_module('numpy') 

class memmap(ndarray): 

"""Create a memory-map to an array stored in a *binary* file on disk. 

 

Memory-mapped files are used for accessing small segments of large files 

on disk, without reading the entire file into memory. NumPy's 

memmap's are array-like objects. This differs from Python's ``mmap`` 

module, which uses file-like objects. 

 

This subclass of ndarray has some unpleasant interactions with 

some operations, because it doesn't quite fit properly as a subclass. 

An alternative to using this subclass is to create the ``mmap`` 

object yourself, then create an ndarray with ndarray.__new__ directly, 

passing the object created in its 'buffer=' parameter. 

 

This class may at some point be turned into a factory function 

which returns a view into an mmap buffer. 

 

Delete the memmap instance to close the memmap file. 

 

 

Parameters 

---------- 

filename : str, file-like object, or pathlib.Path instance 

The file name or file object to be used as the array data buffer. 

dtype : data-type, optional 

The data-type used to interpret the file contents. 

Default is `uint8`. 

mode : {'r+', 'r', 'w+', 'c'}, optional 

The file is opened in this mode: 

 

+------+-------------------------------------------------------------+ 

| 'r' | Open existing file for reading only. | 

+------+-------------------------------------------------------------+ 

| 'r+' | Open existing file for reading and writing. | 

+------+-------------------------------------------------------------+ 

| 'w+' | Create or overwrite existing file for reading and writing. | 

+------+-------------------------------------------------------------+ 

| 'c' | Copy-on-write: assignments affect data in memory, but | 

| | changes are not saved to disk. The file on disk is | 

| | read-only. | 

+------+-------------------------------------------------------------+ 

 

Default is 'r+'. 

offset : int, optional 

In the file, array data starts at this offset. Since `offset` is 

measured in bytes, it should normally be a multiple of the byte-size 

of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of 

file are valid; The file will be extended to accommodate the 

additional data. By default, ``memmap`` will start at the beginning of 

the file, even if ``filename`` is a file pointer ``fp`` and 

``fp.tell() != 0``. 

shape : tuple, optional 

The desired shape of the array. If ``mode == 'r'`` and the number 

of remaining bytes after `offset` is not a multiple of the byte-size 

of `dtype`, you must specify `shape`. By default, the returned array 

will be 1-D with the number of elements determined by file size 

and data-type. 

order : {'C', 'F'}, optional 

Specify the order of the ndarray memory layout: 

:term:`row-major`, C-style or :term:`column-major`, 

Fortran-style. This only has an effect if the shape is 

greater than 1-D. The default order is 'C'. 

 

Attributes 

---------- 

filename : str or pathlib.Path instance 

Path to the mapped file. 

offset : int 

Offset position in the file. 

mode : str 

File mode. 

 

Methods 

------- 

flush 

Flush any changes in memory to file on disk. 

When you delete a memmap object, flush is called first to write 

changes to disk before removing the object. 

 

 

See also 

-------- 

lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

 

Notes 

----- 

The memmap object can be used anywhere an ndarray is accepted. 

Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns 

``True``. 

 

Memory-mapped files cannot be larger than 2GB on 32-bit systems. 

 

When a memmap causes a file to be created or extended beyond its 

current size in the filesystem, the contents of the new part are 

unspecified. On systems with POSIX filesystem semantics, the extended 

part will be filled with zero bytes. 

 

Examples 

-------- 

>>> data = np.arange(12, dtype='float32') 

>>> data.resize((3,4)) 

 

This example uses a temporary file so that doctest doesn't write 

files to your directory. You would use a 'normal' filename. 

 

>>> from tempfile import mkdtemp 

>>> import os.path as path 

>>> filename = path.join(mkdtemp(), 'newfile.dat') 

 

Create a memmap with dtype and shape that matches our data: 

 

>>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4)) 

>>> fp 

memmap([[ 0., 0., 0., 0.], 

[ 0., 0., 0., 0.], 

[ 0., 0., 0., 0.]], dtype=float32) 

 

Write data to memmap array: 

 

>>> fp[:] = data[:] 

>>> fp 

memmap([[ 0., 1., 2., 3.], 

[ 4., 5., 6., 7.], 

[ 8., 9., 10., 11.]], dtype=float32) 

 

>>> fp.filename == path.abspath(filename) 

True 

 

Deletion flushes memory changes to disk before removing the object: 

 

>>> del fp 

 

Load the memmap and verify data was stored: 

 

>>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

>>> newfp 

memmap([[ 0., 1., 2., 3.], 

[ 4., 5., 6., 7.], 

[ 8., 9., 10., 11.]], dtype=float32) 

 

Read-only memmap: 

 

>>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4)) 

>>> fpr.flags.writeable 

False 

 

Copy-on-write memmap: 

 

>>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4)) 

>>> fpc.flags.writeable 

True 

 

It's possible to assign to copy-on-write array, but values are only 

written into the memory copy of the array, and not written to disk: 

 

>>> fpc 

memmap([[ 0., 1., 2., 3.], 

[ 4., 5., 6., 7.], 

[ 8., 9., 10., 11.]], dtype=float32) 

>>> fpc[0,:] = 0 

>>> fpc 

memmap([[ 0., 0., 0., 0.], 

[ 4., 5., 6., 7.], 

[ 8., 9., 10., 11.]], dtype=float32) 

 

File on disk is unchanged: 

 

>>> fpr 

memmap([[ 0., 1., 2., 3.], 

[ 4., 5., 6., 7.], 

[ 8., 9., 10., 11.]], dtype=float32) 

 

Offset into a memmap: 

 

>>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16) 

>>> fpo 

memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32) 

 

""" 

 

__array_priority__ = -100.0 

 

def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0, 

shape=None, order='C'): 

# Import here to minimize 'import numpy' overhead 

import mmap 

import os.path 

try: 

mode = mode_equivalents[mode] 

except KeyError: 

if mode not in valid_filemodes: 

raise ValueError("mode must be one of %s" % 

(valid_filemodes + list(mode_equivalents.keys()))) 

 

if mode == 'w+' and shape is None: 

raise ValueError("shape must be given") 

 

if hasattr(filename, 'read'): 

f_ctx = contextlib_nullcontext(filename) 

else: 

f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b') 

 

with f_ctx as fid: 

fid.seek(0, 2) 

flen = fid.tell() 

descr = dtypedescr(dtype) 

_dbytes = descr.itemsize 

 

if shape is None: 

bytes = flen - offset 

if bytes % _dbytes: 

raise ValueError("Size of available data is not a " 

"multiple of the data-type size.") 

size = bytes // _dbytes 

shape = (size,) 

else: 

if not isinstance(shape, tuple): 

shape = (shape,) 

size = np.intp(1) # avoid default choice of np.int_, which might overflow 

for k in shape: 

size *= k 

 

bytes = long(offset + size*_dbytes) 

 

if mode == 'w+' or (mode == 'r+' and flen < bytes): 

fid.seek(bytes - 1, 0) 

fid.write(b'\0') 

fid.flush() 

 

if mode == 'c': 

acc = mmap.ACCESS_COPY 

elif mode == 'r': 

acc = mmap.ACCESS_READ 

else: 

acc = mmap.ACCESS_WRITE 

 

start = offset - offset % mmap.ALLOCATIONGRANULARITY 

bytes -= start 

array_offset = offset - start 

mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start) 

 

self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm, 

offset=array_offset, order=order) 

self._mmap = mm 

self.offset = offset 

self.mode = mode 

 

if is_pathlib_path(filename): 

# special case - if we were constructed with a pathlib.path, 

# then filename is a path object, not a string 

self.filename = filename.resolve() 

elif hasattr(fid, "name") and isinstance(fid.name, basestring): 

# py3 returns int for TemporaryFile().name 

self.filename = os.path.abspath(fid.name) 

# same as memmap copies (e.g. memmap + 1) 

else: 

self.filename = None 

 

return self 

 

def __array_finalize__(self, obj): 

if hasattr(obj, '_mmap') and np.may_share_memory(self, obj): 

self._mmap = obj._mmap 

self.filename = obj.filename 

self.offset = obj.offset 

self.mode = obj.mode 

else: 

self._mmap = None 

self.filename = None 

self.offset = None 

self.mode = None 

 

def flush(self): 

""" 

Write any changes in the array to the file on disk. 

 

For further information, see `memmap`. 

 

Parameters 

---------- 

None 

 

See Also 

-------- 

memmap 

 

""" 

if self.base is not None and hasattr(self.base, 'flush'): 

self.base.flush() 

 

def __array_wrap__(self, arr, context=None): 

arr = super(memmap, self).__array_wrap__(arr, context) 

 

# Return a memmap if a memmap was given as the output of the 

# ufunc. Leave the arr class unchanged if self is not a memmap 

# to keep original memmap subclasses behavior 

if self is arr or type(self) is not memmap: 

return arr 

# Return scalar instead of 0d memmap, e.g. for np.sum with 

# axis=None 

if arr.shape == (): 

return arr[()] 

# Return ndarray otherwise 

return arr.view(np.ndarray) 

 

def __getitem__(self, index): 

res = super(memmap, self).__getitem__(index) 

if type(res) is memmap and res._mmap is None: 

return res.view(type=ndarray) 

return res