I'm new to python and I'm trying to run multiprocessing:
I'm trying to code a program to convert a tiffs file to dax file present in a directory.
This is my original code:
import os
import datawriter
import datareader
from time import time
from tqdm import tqdm
dataroot = input("Enter the folder location:")
count_for_number_of_files = 0
count_for_frames_in_filelist = 0
for subdir, dir, files in os.walk(dataroot):
for file in files:
if file.endswith(".tif"):
print(f"file {count_for_number_of_files + 1} = {file}")
count_for_number_of_files += 1
print("Total number of files:", count_for_number_of_files)
frame_list = [None] * count_for_number_of_files
for i in range(0, len(frame_list)):
frame_list[i] = input(f"Enter number of frames for file {i + 1}: ")
print("Frames in each file:", frame_list)
start_time = time()
for subdir, dir, files in os.walk(dataroot):
for file in sorted(files):
if file.endswith(".tif"):
dax_file = datawriter.DaxWriter("{}.dax".format(file[0:-4]))
print(f"Processing {frame_list[count_for_frames_in_filelist]} frames for {file}")
for i in tqdm(range(int(frame_list[count_for_frames_in_filelist]))):
data = datareader.TifReader("{}".format(file)).loadAFrame(i)
dax_file.addFrame(data)
count_for_frames_in_filelist += 1
dax_file.close()
print(f"Conversion completed for {count_for_number_of_files} files", '\n',
"Total time taken:", time() - start_time, "seconds")
The new code using multiprocessing is:
import multiprocessing as mp
from multiprocessing import Process, Lock
import numpy as np
import pandas as pd
import os, logging
import originpro as op
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
import datawriter
import datareader
from time import time
from tqdm import tqdm
import tifffile
import hashlib
import re
import threading
dataroot = input("Enter the folder location:")
class Multi:
def f(x):
count_for_number_of_files = 0
count_for_frames_in_filelist = 0
for subdir, dir, files in os.walk(x):
for file in files:
if file.endswith(".tif"):
print(f"file {count_for_number_of_files + 1} = {file}")
count_for_number_of_files += 1
print("Total number of files:", count_for_number_of_files)
frame_list = [None] * count_for_number_of_files
for i in range(0, len(frame_list)):
frame_list[i] = input(f"Enter number of frames for file {i + 1}: ")
print("Frames in each file:", frame_list)
start_time = time()
for subdir, dir, files in os.walk(dataroot):
for file in sorted(files):
if file.endswith(".tif"):
dax_file = datawriter.DaxWriter("{}.dax".format(file[0:-4]))
print(f"Processing {frame_list[count_for_frames_in_filelist]} frames for {file}")
for i in tqdm(range(int(frame_list[count_for_frames_in_filelist]))):
data = datareader.TifReader("{}".format(file)).loadAFrame(i)
dax_file.addFrame(data)
count_for_frames_in_filelist += 1
dax_file.close()
print(f"Conversion completed for {count_for_number_of_files} files", '\n',
"Total time taken:", time() - start_time, "seconds")
my_object=Multi
if __name__ == '__main__':
ctx = mp.get_context('spawn')
q = ctx.Queue()
p = ctx.Process(Multi.f(dataroot))
p.start()
print(q.get())
p.join()
The thing is runtime still remains the same and it hasn't changed which I hoped it'll get faster.
datareader & datawriter are another python files which I'm calling into this function:
datareader.py
#!/usr/bin/env python
"""
Classes that handles reading STORM movie files. Currently this
is limited to the dax, fits, spe and tif formats.
Hazen 06/13
"""
import hashlib
import numpy
import os
import re
import tifffile
# Avoid making astropy mandatory for everybody.
try:
from astropy.io import fits
except ImportError:
pass
def inferReader(filename, verbose=False):
"""
Given a file name this will try to return the appropriate
reader based on the file extension.
"""
ext = os.path.splitext(filename)[1]
if (ext == ".dax"):
return DaxReader(filename, verbose=verbose)
elif (ext == ".fits"):
return FITSReader(filename, verbose=verbose)
elif (ext == ".spe"):
return SpeReader(filename, verbose=verbose)
elif (ext == ".tif") or (ext == ".tiff"):
return TifReader(filename, verbose=verbose)
else:
print(ext, "is not a recognized file type")
raise IOError("only .dax, .spe and .tif are supported (case sensitive..)")
class Reader(object):
"""
The superclass containing those functions that
are common to reading a STORM movie file.
Subclasses should implement:
1. __init__(self, filename, verbose = False)
This function should open the file and extract the
various key bits of meta-data such as the size in XY
and the length of the movie.
2. loadAFrame(self, frame_number)
Load the requested frame and return it as numpy array.
"""
def __init__(self, filename, verbose=False):
super(Reader, self).__init__()
self.filename = filename
self.fileptr = None
self.verbose = verbose
def __del__(self):
self.close()
def __enter__(self):
return self
def __exit__(self, etype, value, traceback):
self.close()
def averageFrames(self, start=None, end=None):
"""
Average multiple frames in a movie.
"""
length = 0
average = numpy.zeros((self.image_height, self.image_width), numpy.float)
for [i, frame] in self.frameIterator(start, end):
if self.verbose and ((i % 10) == 0):
print(" processing frame:", i, " of", self.number_frames)
length += 1
average += frame
if (length > 0):
average = average / float(length)
return average
def close(self):
if self.fileptr is not None:
self.fileptr.close()
self.fileptr = None
def filmFilename(self):
"""
Returns the film name.
"""
return self.filename
def filmSize(self):
"""
Returns the film size.
"""
return [self.image_width, self.image_height, self.number_frames]
def filmLocation(self):
"""
Returns the picture x,y location, if available.
"""
if hasattr(self, "stage_x"):
return [self.stage_x, self.stage_y]
else:
return [0.0, 0.0]
def filmScale(self):
"""
Returns the scale used to display the film when
the picture was taken.
"""
if hasattr(self, "scalemin") and hasattr(self, "scalemax"):
return [self.scalemin, self.scalemax]
else:
return [100, 2000]
def frameIterator(self, start=None, end=None):
"""
Iterator for going through the frames of a movie.
"""
if start is None:
start = 0
if end is None:
end = self.number_frames
for i in range(start, end):
yield [i, self.loadAFrame(i)]
def hashID(self):
"""
A (hopefully) unique string that identifies this movie.
"""
return hashlib.md5(self.loadAFrame(0).tostring()).hexdigest()
def loadAFrame(self, frame_number):
assert frame_number >= 0, "Frame_number must be greater than or equal to 0, it is " + str(frame_number)
assert frame_number < self.number_frames, "Frame number must be less than " + str(self.number_frames)
def lockTarget(self):
"""
Returns the film focus lock target.
"""
if hasattr(self, "lock_target"):
return self.lock_target
else:
return 0.0
class DaxReader(Reader):
"""
Dax reader class. This is a Zhuang lab custom format.
"""
def __init__(self, filename, verbose=False):
super(DaxReader, self).__init__(filename, verbose=verbose)
# save the filenames
dirname = os.path.dirname(filename)
if (len(dirname) > 0):
dirname = dirname + "/"
self.inf_filename = dirname + os.path.splitext(os.path.basename(filename))[0] + ".inf"
# defaults
self.image_height = None
self.image_width = None
# extract the movie information from the associated inf file
size_re = re.compile(r'frame dimensions = ([\d]+) x ([\d]+)')
length_re = re.compile(r'number of frames = ([\d]+)')
endian_re = re.compile(r' (big|little) endian')
stagex_re = re.compile(r'Stage X = ([\d\.\-]+)')
stagey_re = re.compile(r'Stage Y = ([\d\.\-]+)')
lock_target_re = re.compile(r'Lock Target = ([\d\.\-]+)')
scalemax_re = re.compile(r'scalemax = ([\d\.\-]+)')
scalemin_re = re.compile(r'scalemin = ([\d\.\-]+)')
inf_file = open(self.inf_filename, "r")
while 1:
line = inf_file.readline()
if not line: break
m = size_re.match(line)
if m:
self.image_height = int(m.group(2))
self.image_width = int(m.group(1))
m = length_re.match(line)
if m:
self.number_frames = int(m.group(1))
m = endian_re.search(line)
if m:
if m.group(1) == "big":
self.bigendian = 1
else:
self.bigendian = 0
m = stagex_re.match(line)
if m:
self.stage_x = float(m.group(1))
m = stagey_re.match(line)
if m:
self.stage_y = float(m.group(1))
m = lock_target_re.match(line)
if m:
self.lock_target = float(m.group(1))
m = scalemax_re.match(line)
if m:
self.scalemax = int(m.group(1))
m = scalemin_re.match(line)
if m:
self.scalemin = int(m.group(1))
inf_file.close()
# set defaults, probably correct, but warn the user
# that they couldn't be determined from the inf file.
if not self.image_height:
print("Could not determine image size, assuming 256x256.")
self.image_height = 256
self.image_width = 256
# open the dax file
if os.path.exists(filename):
self.fileptr = open(filename, "rb")
else:
if self.verbose:
print("dax data not found", filename)
def loadAFrame(self, frame_number):
"""
Load a frame & return it as a numpy array.
"""
super(DaxReader, self).loadAFrame(frame_number)
self.fileptr.seek(frame_number * self.image_height * self.image_width * 2)
image_data = numpy.fromfile(self.fileptr, dtype='uint16', count=self.image_height * self.image_width)
image_data = numpy.reshape(image_data, [self.image_height, self.image_width])
if self.bigendian:
image_data.byteswap(True)
return image_data
class FITSReader(Reader):
"""
FITS file reader class.
FIXME: This depends on internals of astropy.io.fits that I'm sure
we are not supposed to be messing with. The problem is that
astropy.io.fits does not support memmap'd images when the
image is scaled (which is pretty much always the case?). To
get around this we set _ImageBaseHDU._do_not_scale_image_data
to True, then do the image scaling ourselves.
We want memmap = True as generally it won't make sense to
load the entire movie into memory.
Another consequence of this is that we only support
'pseudo unsigned' 16 bit FITS format files.
"""
def __init__(self, filename, verbose=False):
super(FITSReader, self).__init__(filename, verbose=verbose)
self.hdul = fits.open(filename, memmap=True)
hdr = self.hdul[0].header
# We only handle 16 bit FITS files.
assert ((hdr['BITPIX'] == 16) and (hdr['bscale'] == 1) and (hdr['bzero'] == 32768)), \
"Only 16 bit pseudo-unsigned FITS format is currently supported!"
# Get image size. We're assuming that the film is a data cube in
# the first / primary HDU.
#
self.image_height = hdr['naxis2']
self.image_width = hdr['naxis1']
if (hdr['naxis'] == 3):
self.number_frames = hdr['naxis3']
else:
self.number_frames = 1
self.hdu0 = self.hdul[0]
# Hack, override astropy.io.fits internal so that we can load
# data with memmap = True.
#
self.hdu0._do_not_scale_image_data = True
def close(self):
pass
def loadAFrame(self, frame_number):
super(FITSReader, self).loadAFrame(frame_number)
frame = self.hdu0.data[frame_number, :, :].astype(numpy.uint16)
frame -= 32768
return frame
class SpeReader(Reader):
"""
SPE (Roper Scientific) reader class.
"""
def __init__(self, filename, verbose=False):
super(SpeReader, self).__init__(filename, verbose=verbose)
# open the file & read the header
self.header_size = 4100
self.fileptr = open(filename, "rb")
self.fileptr.seek(42)
self.image_width = int(numpy.fromfile(self.fileptr, numpy.uint16, 1)[0])
self.fileptr.seek(656)
self.image_height = int(numpy.fromfile(self.fileptr, numpy.uint16, 1)[0])
self.fileptr.seek(1446)
self.number_frames = int(numpy.fromfile(self.fileptr, numpy.uint32, 1)[0])
self.fileptr.seek(108)
image_mode = int(numpy.fromfile(self.fileptr, numpy.uint16, 1)[0])
if (image_mode == 0):
self.image_size = 4 * self.image_width * self.image_height
self.image_mode = numpy.float32
elif (image_mode == 1):
self.image_size = 4 * self.image_width * self.image_height
self.image_mode = numpy.uint32
elif (image_mode == 2):
self.image_size = 2 * self.image_width * self.image_height
self.image_mode = numpy.int16
elif (image_mode == 3):
self.image_size = 2 * self.image_width * self.image_height
self.image_mode = numpy.uint16
else:
print("unrecognized spe image format: ", image_mode)
def loadAFrame(self, frame_number, cast_to_int16=True):
"""
Load a frame & return it as a numpy array.
"""
super(SpeReader, self).loadAFrame(frame_number)
self.fileptr.seek(self.header_size + frame_number * self.image_size)
image_data = numpy.fromfile(self.fileptr, dtype=self.image_mode, count=self.image_height * self.image_width)
if cast_to_int16:
image_data = image_data.astype(numpy.uint16)
image_data = numpy.reshape(image_data, [self.image_height, self.image_width])
return image_data
class TifReader(Reader):
"""
TIF reader class.
This is supposed to handle the following:
1. A normal Tiff file with one frame/image per page.
2. Tiff files with multiple frames on a single page.
3. Tiff files with multiple frames on multiple pages.
"""
def __init__(self, filename, verbose=False):
super(TifReader, self).__init__(filename, verbose)
self.page_data = None
self.page_number = -1
# Save the filename
self.fileptr = tifffile.TiffFile(filename)
number_pages = len(self.fileptr.pages)
# Single page Tiff file, which might be a "ImageJ Tiff"
# with many frames on a page.
#
if (number_pages == 1):
# Determines the size without loading the entire file.
isize = self.fileptr.series[0].shape
# Check if this is actually just a single frame tiff, if
# it is we'll just load it into memory.
#
if (len(isize) == 2):
self.frames_per_page = 1
self.number_frames = 1
self.image_height = isize[0]
self.image_width = isize[1]
self.page_data = self.fileptr.asarray()
# Otherwise we'll memmap it in case it is really large.
#
else:
self.frames_per_page = isize[0]
self.number_frames = isize[0]
self.image_height = isize[1]
self.image_width = isize[2]
self.page_data = self.fileptr.asarray(out='memmap')
# Multiple page Tiff file.
#
else:
isize = self.fileptr.asarray(key=0).shape
# Check for one frame per page.
if (len(isize) == 2):
self.frames_per_page = 1
self.number_frames = number_pages
self.image_height = isize[0]
self.image_width = isize[1]
# Multiple frames per page.
#
# FIXME: No unit test for this kind of file.
#
else:
self.frames_per_page = isize[0]
self.number_frames = number_pages * isize[0]
self.image_height = isize[1]
self.image_width = isize[2]
if self.verbose:
print("{0:0d} frames per page, {1:0d} pages".format(self.frames_per_page, number_pages))
def loadAFrame(self, frame_number, cast_to_int16=True):
super(TifReader, self).loadAFrame(frame_number)
# All the data is on a single page.
if self.number_frames == self.frames_per_page:
if (self.number_frames == 1):
image_data = self.page_data
else:
image_data = self.page_data[frame_number, :, :]
# Multiple frames of data on multiple pages.
elif (self.frames_per_page > 1):
page = int(frame_number / self.frames_per_page)
frame = frame_number % self.frames_per_page
# This is an optimization for files with a large number of frames
# per page. In this case tifffile will keep loading the entire
# page over and over again, which really slows everything down.
# Ideally tifffile would let us specify which frame on the page
# we wanted.
#
# Since it was going to load the whole thing anyway we'll have
# memory overflow either way, so not much we can do about that
# except hope for small file sizes.
#
if (page != self.page_number):
self.page_data = self.fileptr.asarray(key=page)
self.page_number = page
image_data = self.page_data[frame, :, :]
# One frame on each page.
else:
image_data = self.fileptr.asarray(key=frame_number)
assert (len(image_data.shape) == 2), "Not a monochrome tif image! " + str(image_data.shape)
if cast_to_int16:
image_data = image_data.astype(numpy.uint16)
return image_data
if (__name__ == "__main__"):
import sys
if (len(sys.argv) != 2):
print("usage: <movie>")
exit()
movie = inferReader(sys.argv[1], verbose=True)
print("Movie size is", movie.filmSize())
frame = movie.loadAFrame(0)
print(frame.shape, type(frame), frame.dtype)
#
# The MIT License
#
# Copyright (c) 2013 Zhuang Lab, Harvard University
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
datawriter.py
#!/usr/bin/env python
"""
Writes dax files or tiff files. This is mostly used
by the simulator.
We try and follow a convention were the first dimension (slow
axis) is the image height and the second dimension (fast axis)
is the image width, so image.shape = [height, width]
Hazen 1/18
"""
import numpy
import os
import tifffile
# Import here to avoid making astropy mandatory for everybody.
try:
from astropy.io import fits
except ImportError:
pass
def inferWriter(filename, width = None, height = None):
"""
Given a file name this will try to return the appropriate
writer based on the file extension.
"""
ext = os.path.splitext(filename)[1]
if (ext == ".dax"):
return DaxWriter(filename, width = width, height = height)
elif (ext == ".fits"):
return FITSWriter(filename, width = width, height = height)
elif (ext == ".tif") or (ext == ".tiff"):
return TiffWriter(filename, width = width, height = height)
else:
print(ext, "is not a recognized file type")
raise IOError("only .dax and .tif are supported (case sensitive..)")
def dummyDaxFile(name, x_size, y_size):
ddax = DaxWriter(name, width = x_size, height = y_size)
frame = numpy.ones((x_size, y_size))
ddax.addFrame(frame)
ddax.close()
def singleFrameDax(name, frame):
[fx, fy] = frame.shape
dax_file = DaxWriter(name, width = fy, height = fx)
dax_file.addFrame(frame)
dax_file.close()
class Writer(object):
def __init__(self, width = None, height = None, **kwds):
super(Writer, self).__init__(**kwds)
self.w = width
self.h = height
def frameToU16(self, frame):
frame = frame.copy()
frame[(frame < 0)] = 0
frame[(frame > 65535)] = 65535
return numpy.round(frame).astype(numpy.uint16)
class DaxWriter(Writer):
def __init__(self, name, **kwds):
super(DaxWriter, self).__init__(**kwds)
self.name = name
if len(os.path.dirname(name)) > 0:
self.root_name = os.path.dirname(name) + "/" + os.path.splitext(os.path.basename(name))[0]
else:
self.root_name = os.path.splitext(os.path.basename(name))[0]
self.fp = open(self.name, "wb")
self.l = 0
def addFrame(self, frame):
frame = self.frameToU16(frame)
if (self.w is None) or (self.h is None):
[self.h, self.w] = frame.shape
else:
assert(self.h == frame.shape[0])
assert(self.w == frame.shape[1])
frame.tofile(self.fp)
self.l += 1
def close(self):
self.fp.close()
self.w = int(self.w)
self.h = int(self.h)
inf_fp = open(self.root_name + ".inf", "w")
inf_fp.write("binning = 1 x 1\n")
inf_fp.write("data type = 16 bit integers (binary, little endian)\n")
inf_fp.write("frame dimensions = " + str(self.w) + " x " + str(self.h) + "\n")
inf_fp.write("number of frames = " + str(self.l) + "\n")
inf_fp.write("Lock Target = 0.0\n")
if True:
inf_fp.write("x_start = 1\n")
inf_fp.write("x_end = " + str(self.w) + "\n")
inf_fp.write("y_start = 1\n")
inf_fp.write("y_end = " + str(self.h) + "\n")
inf_fp.close()
class FITSWriter(Writer):
"""
This is mostly for testing. It will store all the movie data in
memory, then dump it when the file is closed.
"""
def __init__(self, filename, **kwds):
super(FITSWriter, self).__init__(**kwds)
self.filename = filename
self.frames = []
def addFrame(self, frame):
frame = self.frameToU16(frame)
if (self.w is None) or (self.h is None):
[self.h, self.w] = frame.shape
else:
assert(self.h == frame.shape[0])
assert(self.w == frame.shape[1])
self.frames.append(frame)
def close(self):
# Remove old file, if any.
if os.path.exists(self.filename):
os.remove(self.filename)
data = numpy.zeros((len(self.frames), self.h, self.w), dtype = numpy.uint16)
for i in range(len(self.frames)):
data[i,:,:] = self.frames[i]
hdu = fits.PrimaryHDU(data)
hdu.writeto(self.filename)
class TiffWriter(Writer):
def __init__(self, filename, **kwds):
super(TiffWriter, self).__init__(**kwds)
self.tif_fp = tifffile.TiffWriter(filename)
def addFrame(self, frame):
frame = self.frameToU16(frame)
# Enforce that all the frames are the same size.
if (self.h is None) or (self.w is None):
[self.h, self.w] = frame.shape
else:
assert(self.h == frame.shape[0])
assert(self.w == frame.shape[1])
self.tif_fp.save(frame)
def close(self):
self.tif_fp.close()
Any suggestions for making my code faster shall be welcomed.
I am trying to perform a csv data parsing using pandas.read_csv(bytes, chunksize=n) where bytes is a ongoing stream of data which I want to receive from a database CLOB field, reading it by chunks.
reader = pandas.read_csv(io.BytesIO(b'1;qwer\n2;asdf\n3;zxcv'), sep=';', chunksize=2)
for row_chunk in reader:
print(row_chunk)
Code above is working fine, but I want to use some updatable stream instead of fixed io.BytesIO(b'...')
I tried to redefine read method like this
class BlobIO(io.BytesIO):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._chunk_size = 4
self._file_data_table = 'my_table'
self._job_id = 'job_id'
self._get_raw_sql = """
select dbms_lob.substr(body, {0}, {1})
from {2}
where job_id = '{3}'
"""
dsn_tns = cx_Oracle.makedsn('host', 'port', 'service_name')
self.ora_con = cx_Oracle.connect('ora_user', 'ora_pass', dsn_tns)
self.res = b''
self.ora_cur = self.ora_con.cursor()
self.chunker = self.get_chunk()
next(self.chunker)
def get_chunk(self):
returned = 0
sended = (yield)
self._chunk_size = sended or self._chunk_size
while True:
to_exec = self._get_raw_sql.format(
self._chunk_size,
returned + 1,
self._file_data_table,
self._job_id)
self.ora_cur.execute(to_exec)
self.res = self.ora_cur.fetchall()[0][0]
returned += self._chunk_size
yield self.res
sended = (yield self.res)
self._chunk_size = sended or self._chunk_size
if not self.res:
break
def read(self, nbytes=None):
if nbytes:
self.chunker.send(nbytes)
else:
self.chunker.send(self._chunk_size)
try:
to_return = next(self.chunker)
except StopIteration:
self.ora_con.close()
to_return = b''
return to_return
buffer = BlobIO()
reader = pandas.read_csv(buffer, encoding='cp1251', sep=';', chunksize=2)
but it looks like I'm doing something completely wrong because pd.read_csv never got executed here at the last line and I don't understand what is happening there.
Maybe creating buffer = BytesIO(b'') and then writing new data to the buffer buffer.write(new_chunk_from_db) could be a better approach but I don't understand when exactly should I call such a write action.
I believe I can create a temporary file with the contents of a CLOB which I can then pass to read_csv, but I really want to skip this step and read data directly from database.
Please give me some directions.
cx_Oracle provides native way to read LOBs. Seems like overriding BytesIO read with cx_Oracle LOB read does the job:
class BlobIO(BytesIO):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.res = b''
self.ora_con = db.get_conn()
self.ora_cur = self.ora_con.cursor()
self.ora_cur.execute("select lob from table")
self.res = self.ora_cur.fetchall()[0][0]
self.offset = 1
def read(self, size=None):
r = self.res.read(self.offset, size)
self.offset += size
# size + 1 should be here to perform nonoverlaping reads
# but looks like panadas C parser uses some kind of overlaping
# because while testing size+1 - parser occasionally missed some bytes
if not r:
self.ora_cur.close()
self.ora_con.close()
return r
blob_buffer = BlobIO()
reader = pandas.read_csv(
blob_buffer,
chunksize=JobContext.rchunk_size)
for row_chunk in reader:
print(row_chunk)
How can I create a "sub stream" in python. Let's say I have an file opened for reading. I want to return a file-like object that you can use to read only part of that file.
with open(filename, 'rb') as f:
start = 0x34
size = 0x20
return Substream(f, start, size) # <-- How do I do this?
Seeking to 0 on this object should go to "start" on the f object. Further more reading past size should trigger eof behavior. Hope this makes sense. How do I accomplish this?
A quick subclass of io.RawIOBase seems to do the trick, at least for my use case. I understand this is not a full implementation of the io.RawIOBase interface, but it gets the job done.
class Substream(io.RawIOBase):
"""Represents a view of a subset of a file like object"""
def __init__(self, file: io.RawIOBase, start, size):
self.file = file
self.start = start
self.size = size
self.p = 0
def seek(self, offset, origin=0):
if origin == 0:
self.p = offset
elif origin == 1:
self.p += offset
# TODO: origin == 2
else:
raise ValueError("Unexpected origin: {}".format(origin))
def read(self, n):
prev = self.file.tell()
self.file.seek(self.start + self.p)
data = self.file.read(n if self.p + n <= self.size else self.size - self.p)
self.p += len(data)
self.file.seek(prev)
return data
Use it like so
with open(filename) as f:
print(Substream(f, 10, 100).read(10))
I wonder if this can be done on file descriptor level instead somehow...?
I'm trying to create a instance of a class to test that the module I created is working properly.
Here is the module (fileWriter.py), the error appears to be in the init method:
class File(object):
'''process the data from a file'''
#fields
#fileName = name of file
#textData = data read from/written to file
#Constructor
def __init__(self, fileName = 'saved_data.txt', textData = ''):
#Attributes
self.fileName = fileName
self.textData = textData
#Properties
#property #getter
def fileName(self):
return self.__fileName
#fileName.setter #setter
def fileName(self, value):
self.__fileName = value
#property #getter
def textData(self, value):
self.__textData = value
#Methods
def saveData(self):
'''appends data to file'''
try:
fileObj = open(self.fileName, 'a')
fileObj.write(self.textData)
fileObj.close()
except Exception as e:
print('You have the following error: ' + str(e))
return('Data successfully saved to file.')
def toString(self):
'''returns text data explicitly'''
return self.fileName + ':' + self.textData
def __str__(self):
'''returns text data implicitly'''
return self.toString()
To test the class, I wrote the following test harness:
import fileWriter
import fileWriter
#test harness
processorObj = fileWriter.File()
processorObj.fileName = 'test.txt'
processorObj.textData = 'testing, 1, 2, 3...'
strMessage = processorObj.saveData()
print(strMessage)
if __name__ == '__main__':
raise Exception('Don\'t run a module by itself!')
When I run the test file, I get the error:
File "testFileWriter.py", line 4, in
processorObj = fileWriter.File()
File "/Users/Haruka/Documents/python_class/Employees/fileWriter.py", line 19, in init
self.textData = textData
AttributeError: can't set attribute
I can't figure out what's wrong with self.textData = textData. Can anybody help?
I'm not sure if you formatted your code after pasting, but there are a few typos:
def __init__(self, file_name = 'saved_data.txt', text_data = ''):
#Attributes
self.__file_name = file_name
self.__text_data = text_data
and
#property #getter
def text_data(self):
return self.__text_data
Later in test, you're also trying to set the text_data property without a setter in your example. You can add to your class:
#textData.setter
def text_data(self, value):
self.__text_data = value
The more pythonic way to do some of the file io stuff is with a context.
def save_data(self):
'''appends data to file'''
with open(self.file_name, 'a') as f:
f.write(self.text_data)
return('Data successfully saved to file.')
I was looking for a way to access configuration file entries from instance- resp. class-bound variables. Therefor I have created the following module:
from ..lib.files import ConfigFile
from abc import abstractmethod
__all__ = ['ClassConfig',
'InstanceConfig',
'Configurable']
class ConfigEntry():
"""
A Config entry
"""
__value = None
def __init__(self, value=None):
"""
Initializes the
"""
self.__value = value
def __set__(self, __, value):
self.__value = value
#property
def value(self):
"""
Returns the value
"""
return self.__value
class ClassConfig(ConfigEntry):
"""
A class config entry
"""
def __get__(self, obj, cls):
"""
Returns its value, when called by a class, else itself
"""
if obj == None:
return self.value
else:
return self
class InstanceConfig(ConfigEntry):
"""
An instance config entry
"""
def __get__(self, obj, cls):
"""
Returns its value, when called by an instance, else itself
"""
if obj != None:
return self.value
else:
return self
class Configurable():
"""
Configuration file binding
"""
__SUFFIX = '.conf'
__TYPES = {int: 'int',
float: 'float',
str: 'str',
bool: 'bool'}
__file_ = None
__lbi = '['
__lei = ']'
__ls = ','
__ts = '←'
__loaded = False
def __init__(self, path, suffix=None):
"""
Initialize the config file
"""
# Initializes instance methods
self.__setinstattr()
suffix = suffix if suffix != None else self.__SUFFIX
self.__file_ = ConfigFile(path + suffix)
self.load()
def __setinstattr(self):
"""
Set instance attributes
"""
self.__fields = self.__inst___fields
self._file = self.__inst____file
self._force_load = self.__inst__force_load
self.load = self.__inst_load
self.store = self.__inst_store
#staticmethod
def __filter(attrs):
return [a for a in attrs
if a == a.upper()
and not a.startswith('_')]
#staticmethod
def __encode(val):
"""
Encode a value
"""
t = type(val)
if t == list:
return Configurable.__lbi + \
Configurable.__ls.join([Configurable.__encode(i)
for i in val]) \
+ Configurable.__lei
elif val == None:
return None
else:
return Configurable.__ts.join([str(val),
Configurable.__TYPES.get(t, '?')])
#staticmethod
def __decode(val):
"""
Decode a value
"""
def det_type(token):
"""
Determine the type of a token
"""
t = token.strip().split(Configurable.__ts)
if len(t) == 2:
raw_val = t[0]
tpe = t[1]
if tpe == Configurable.__TYPES[str]:
return str(raw_val)
elif tpe == Configurable.__TYPES[int]:
return int(raw_val)
elif tpe == Configurable.__TYPES[float]:
return float(raw_val)
elif tpe == Configurable.__TYPES[bool]:
return True if raw_val.lower() in ['1',
'true',
't'] else False
else:
try:
return int(raw_val)
except:
try:
return float(raw_val)
except:
return raw_val
return token
def str2list(s):
"""
Try to parse a list from a string
"""
def getlist(val):
"""
Get a list from a reversed character list of a string
"""
result = []
token = ''
while val:
c = val.pop()
if c == Configurable.__lei:
token = Configurable.__lei
result = [getlist(val)] + result
elif c == Configurable.__lbi:
if (not Configurable.__lbi in token) and (not Configurable.__lei in token):
result = [det_type(token)] + result
token = c
return result
elif c == Configurable.__ls:
if (not Configurable.__lbi in token) and (not Configurable.__lei in token):
result = [det_type(token)] + result
token = ''
else:
token = c + token
if token:
result = [det_type(token)] + result
return result
l = []
for char in s:
l.append(char)
l = getlist(l)
if len(l) == 0:
return l
return l.pop()
return str2list(val)
#classmethod
def __fields(cls):
"""
Get fields for an instance
"""
result = {}
class Subclass(cls):
def __init__(self):
pass
instance = Subclass()
attrs = Configurable.__filter(dir(instance))
for a in attrs:
aval = getattr(instance, a)
if isinstance(aval, ClassConfig):
value = getattr(cls, a)
result[a] = value
return result
def __inst___fields(self):
"""
Get fields of an instance
"""
result = {}
cls = self.__class__
attrs = Configurable.__filter(dir(cls))
for a in attrs:
val = getattr(cls, a)
if isinstance(val, InstanceConfig):
value = getattr(self, a)
result[a] = value
return result
#classmethod
#abstractmethod
def _file(cls):
"""
Returns the file
XXX: Implement by calling
super()._file(static_path)
"""
pass
#classmethod
def _file_path(cls, path, suffix=None):
"""
Returns the file relative to a path
"""
suffix = suffix if suffix != None else cls.__SUFFIX
f = ConfigFile(path + suffix)
f.create()
return f
def __inst____file(self):
"""
Returns the file
"""
return self.__file_
#classmethod
def load(cls):
"""
Loads the config file content, if not yet done into the class
"""
if not cls.__loaded:
return cls._force_load()
return True
def __inst_load(self):
"""
Loads the config file content, if not yet done into the instance
"""
if not self.__loaded:
return self._force_load()
return True
#classmethod
def _force_load(cls):
"""
Loads the config file's content to the class
"""
if cls._file().exists:
data = cls._file().dict()
for field in Configurable.__filter(data):
setattr(cls, field,
Configurable.__decode(data[field]))
cls.__loaded = True
return True
return False
def __inst__force_load(self):
"""
Loads the config file's content to the instance
"""
if self._file().exists:
data = self._file().dict()
for field in Configurable.__filter(data):
setattr(self, field,
Configurable.__decode(data[field]))
self.__loaded = True
return True
return False
#classmethod
def store(cls):
"""
Writes class config to file
"""
result = True
content = cls.__fields()
if not cls._file().exists:
cls._file().create()
for new_field in content:
set_result = cls._file().set(new_field,
Configurable.__encode(content[new_field]))
result = False if not set_result else result
return result
def __inst_store(self):
"""
Writes instance config to file
"""
result = True
content = self.__fields()
if not self._file().exists:
self._file().create()
for new_field in content:
set_result = self._file().set(new_field,
Configurable.__encode(content[new_field]))
result = False if not set_result else result
return result
The Configurable class is now inherited by several sub-classes, which may have global configuration (the class-bound stuff) and user-dependen configuration (the instance-bound stuff) like that:
class Spam(Configurable):
EGGS = InstanceConfig('foo')
GLOBAL_EGGS = ClassConfig('bar')
Now I face the problem, that each time a load() is performed on many instances in sequence, the InstanceConfigEntry will copy the value from the previuos instance:
class RETARD(Daemon):
"""
Real Estate Translation, Archiving and Redirection Daemon
"""
__source = None # The source interface instance
__targets = [] # The target interface instances
__locked = False # System locked state flag
__start_time = None # Start time of loop
__sleeping = 0 # Remaining time to sleep
#===========================================================================
# Default customer config
#===========================================================================
SOURCE = InstanceConfig('') # Name of the source interface
TARGETS = InstanceConfig([]) # Names of the target interfaces
INTERVAL = InstanceConfig(120.0) # Loop interval
DEBUG = InstanceConfig(False) # Print the import config?
def __init__(self, customer):
"""
Constructor
"""
print('SOURCE1: ' + str(self.SOURCE))
super().__init__(customer)
print('SOURCE2: ' + str(self.SOURCE))
self.__load()
print('SOURCE3: ' + str(self.SOURCE))
# Disable logger on high level to prevent PyXB
# from printing messages to the terminal
logging.disable(9999)
<SNIP>
When loaded like this (daemons contains four different instances):
daemons = []
for customer in customers:
daemons.append(RETARD(customer))
It will produce this output:
SOURCE1:
SOURCE2: IS24
SOURCE3: IS24
SOURCE1: IS24
SOURCE2: is24
SOURCE3: is24
SOURCE1: is24
SOURCE2: infobase
SOURCE3: infobase
SOURCE1: infobase
SOURCE2: infobase
SOURCE3: infobase
I do not understand this behaviour, since I did not change the class' attributes anywhere but just those of the instances.
How can I avoid the instances to ship their changed attributes to the next instance?
The problem here, wich I did not realize was, that the InstanceConfig and ClassConfig entries as bound to the class at the time, the module is loaded. When I assigned other content to the respective attributes during runtime from within an instance, it of course just changed the content of the still class-bound *Config instance.
I worked arund this issue by defaulting values iff they are not contained within the respective config file like so:
from ..lib.files import ConfigFile
from abc import abstractmethod
__all__ = ['ClassConfig',
'InstanceConfig',
'Configurable']
class ConfigEntry():
"""
A Config entry
"""
__value = None
__default = None
def __init__(self, default=None):
"""
Initializes the
"""
self.__default = default
self.__value = default
def __set__(self, __, value):
"""
Sets the value
"""
self.__value = value
#property
def value(self):
"""
Returns the value
"""
return self.__value
#property
def default(self):
"""
Access default value
"""
return self.__default
class ClassConfig(ConfigEntry):
"""
A class config entry
"""
def __get__(self, obj, cls):
"""
Returns its value, when called by a class, else itself
"""
if obj == None:
return self.value
else:
return self
class InstanceConfig(ConfigEntry):
"""
An instance config entry
"""
def __get__(self, obj, cls):
"""
Returns its value, when called by an instance, else itself
"""
if obj != None:
return self.value
else:
return self
class Configurable():
"""
Configuration file binding
"""
__SUFFIX = '.conf'
__TYPES = {int: 'int',
float: 'float',
str: 'str',
bool: 'bool'}
__file_ = None
__lbi = '[' # List begin identifier
__lei = ']' # List end identifier
__ls = ',' # List separator
__ts = '←' # Type separator
__loaded = False
def __init__(self, path, suffix=None):
"""
Initialize the config file
"""
# Initializes instance methods
self.__setinstattr()
suffix = suffix if suffix != None else self.__SUFFIX
self.__file_ = ConfigFile(path + suffix)
self.load()
def __setinstattr(self):
"""
Set instance attributes
"""
self.__fields = self.__inst___fields
self._file = self.__inst____file
self._force_load = self.__inst__force_load
self.load = self.__inst_load
self.store = self.__inst_store
#staticmethod
def __filter(attrs):
return [a for a in attrs
if a == a.upper()
and not a.startswith('_')]
#staticmethod
def __encode(val):
"""
Encode a value
"""
t = type(val)
if t == list:
return Configurable.__lbi + \
Configurable.__ls.join([Configurable.__encode(i)
for i in val]) \
+ Configurable.__lei
elif val == None:
return None
else:
return Configurable.__ts.join([str(val),
Configurable.__TYPES.get(t, '?')])
#staticmethod
def __decode(val):
"""
Decode a value
"""
def det_type(token):
"""
Determine the type of a token
"""
t = token.strip().split(Configurable.__ts)
if len(t) == 2:
raw_val = t[0]
tpe = t[1]
if tpe == Configurable.__TYPES[str]:
return str(raw_val)
elif tpe == Configurable.__TYPES[int]:
return int(raw_val)
elif tpe == Configurable.__TYPES[float]:
return float(raw_val)
elif tpe == Configurable.__TYPES[bool]:
return True if raw_val.lower() in ['1',
'true',
't'] else False
else:
try:
return int(raw_val)
except:
try:
return float(raw_val)
except:
return raw_val
return token
def str2list(s):
"""
Try to parse a list from a string
"""
def getlist(val):
"""
Get a list from a reversed character list of a string
"""
result = []
token = ''
while val:
c = val.pop()
if c == Configurable.__lei:
token = Configurable.__lei
result = [getlist(val)] + result
elif c == Configurable.__lbi:
if (not Configurable.__lbi in token) and (not Configurable.__lei in token):
result = [det_type(token)] + result
token = c
return result
elif c == Configurable.__ls:
if (not Configurable.__lbi in token) and (not Configurable.__lei in token):
result = [det_type(token)] + result
token = ''
else:
token = c + token
if token:
result = [det_type(token)] + result
return result
l = []
for char in s:
l.append(char)
l = getlist(l)
if len(l) == 0:
return l
return l.pop()
return str2list(val)
#classmethod
def __fields(cls):
"""
Get fields for an instance
"""
result = {}
class Subclass(cls):
def __init__(self):
pass
instance = Subclass()
attrs = Configurable.__filter(dir(instance))
for a in attrs:
aval = getattr(instance, a)
if isinstance(aval, ClassConfig):
result[a] = aval
return result
def __inst___fields(self):
"""
Get fields of an instance
"""
result = {}
cls = self.__class__
attrs = Configurable.__filter(dir(cls))
for a in attrs:
val = getattr(cls, a)
if isinstance(val, InstanceConfig):
result[a] = val
return result
#classmethod
#abstractmethod
def _file(cls):
"""
Returns the file
XXX: Implement by calling
super()._file(static_path)
"""
pass
#classmethod
def _file_path(cls, path, suffix=None):
"""
Returns the file relative to a path
"""
suffix = suffix if suffix != None else cls.__SUFFIX
f = ConfigFile(path + suffix)
f.create()
return f
def __inst____file(self):
"""
Returns the file
"""
return self.__file_
#classmethod
def load(cls):
"""
Loads the config file content, if not yet done into the class
"""
if not cls.__loaded:
return cls._force_load()
return True
def __inst_load(self):
"""
Loads the config file content, if not yet done into the instance
"""
if not self.__loaded:
return self._force_load()
return True
#classmethod
def _force_load(cls):
"""
Loads the config file's content to the class
"""
if cls._file().exists:
data = cls._file().dict()
else:
data = {}
fields = cls.__fields()
for field in fields:
val = data.get(field)
if val == None:
val = fields[field].default
else:
val = Configurable.__decode(val)
setattr(cls, field, val)
cls.__loaded = True
return True
def __inst__force_load(self):
"""
Loads the config file's content to the instance
"""
if self._file().exists:
data = self._file().dict()
else:
data = {}
fields = self.__fields()
for field in fields:
val = data.get(field)
if val == None:
val = fields[field].default
else:
val = Configurable.__decode(val)
setattr(self, field, val)
self.__loaded = True
return True
#classmethod
def store(cls):
"""
Writes class config to file
"""
result = True
fields = cls.__fields()
if not cls._file().exists:
cls._file().create()
for field in fields:
val = fields[field].value
set_result = cls._file().set(field,
Configurable.__encode(val))
result = False if not set_result else result
return result
def __inst_store(self):
"""
Writes instance config to file
"""
result = True
fields = self.__fields()
if not self._file().exists:
self._file().create()
for field in fields:
val = fields[field].value
set_result = self._file().set(field,
Configurable.__encode(val))
result = False if not set_result else result
return result