Preallocating 2D arrays in a Numba class

Preallocating 2D arrays in a Numba class - python-3.x

I am working on a class speedup with Numba. When you want to use Numba inside classes you have to define/preallocate your class variables. In this respect my issue is declaring a 2D array before the jitclass. The following MWE directly shows my issue:
import numpy as np
from numba import int32, float32
from numba.experimental import jitclass # import the decorator
spec = [
('value', int32), # a simple scalar field
('array', float32[:]), # an array field
('foo_matrix',int32[:,:]),
]
#jitclass(spec)
class Bag(object):
def __init__(self, value):
self.value = value
self.array = np.zeros(value)
self.foo_matrix = np.zeros((value, value))
#property
def size(self):
return self.array.size
def increment(self, val):
for i in range(self.size):
self.array[i] = val
return self.array
my_class = Bag(3)
When I execute this code I get the following error:
Traceback (most recent call last):
File "/home/acer/codici/tech/numba_prototype.py", line 38, in <module>
my_class = Bag(3)
File "/usr/lib/python3/dist-packages/numba/experimental/jitclass/base.py", line 122, in __call__
return cls._ctor(*bind.args[1:], **bind.kwargs)
File "/usr/lib/python3/dist-packages/numba/core/dispatcher.py", line 414, in _compile_for_args
error_rewrite(e, 'typing')
File "/usr/lib/python3/dist-packages/numba/core/dispatcher.py", line 357, in error_rewrite
raise e.with_traceback(None)
numba.core.errors.TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Internal error at <numba.core.typeinfer.CallConstraint object at 0x7fc6d4945c40>.
Failed in nopython mode pipeline (step: nopython mode backend)
Can only insert float* at [4] in {i8*, i8*, i64, i64, float*, [1 x i64], [1 x i64]}: got double*
File "numba_prototype.py", line 19:
def __init__(self, value):
<source elided>
self.value = value
self.array = np.zeros(value)
^
During: lowering "(self).array = $14call_method.5" at /home/acer/codici/tech/numba_prototype.py (19)
During: resolving callee type: jitclass.Bag#7fc6d5a2afa0<value:int32,array:array(float32, 1d, A),foo_matrix:array(int32, 2d, A)>
During: typing of call at <string> (3)
Enable logging at debug level for details.
File "<string>", line 3:
<source missing, REPL/exec in use?>
which is related to the declaration of the matrix foo_matrix.
Concerning the types definition I followed this.
Of course if I comment out the lines about array declaration and population the code works fine.
How should I modify/do the declaration of the matrix with respect to a jitclass object?
EDIT: inside the class, I have changed the declaration of foo_matrix from np.zeros([value, value]) to np.zeros((value, value)) since defining a numpy array using a list instead of a tuple may be a source of error for numba functions. However the problem persists even with this modification.

The error is due to the fact that in the zeros array and matrix there is no the type specification i.e. : self.array = np.zeros(value, dtype=np.float32).
The working code is the following:
import numpy as np
from numba import int32, float32
from numba.experimental import jitclass
spec = [
('value', int32),
('array', float32[:]),
('foo_matrix',int32[:,:]),
]
#jitclass(spec)
class Bag(object):
def __init__(self, value):
self.value = value
self.array = np.zeros(value, dtype=np.float32)
self.foo_matrix = np.zeros((value, value), dtype=np.int32)
alice = value +1
bob = np.sum(self.array)
#property
def size(self):
return self.array.size
def increment(self, val):
for i in range(self.size):
self.array[i] = val
return self.array
my_class = Bag(3)

Related

How does a pytorch function (such as RoIPool) work?

For example, I'm trying to view the implementation of RoI Pooling in pytorch.
Here is a code fragment showing how to use RoIPool in pytorch
import torch
from torchvision.ops.roi_pool import RoIPool
device = torch.device('cuda')
# create feature layer, proposals and targets
num_proposals = 10
feature_map = torch.randn(1, 64, 32, 32)
proposals = torch.zeros((num_proposals, 4))
proposals[:, 0] = torch.randint(0, 16, (num_proposals,))
proposals[:, 1] = torch.randint(0, 16, (num_proposals,))
proposals[:, 2] = torch.randint(16, 32, (num_proposals,))
proposals[:, 3] = torch.randint(16, 32, (num_proposals,))
roi_pool_obj = RoIPool(3, 2**-1)
roi_pool = roi_pool_obj(feature_map, [proposals])
I'm using pychram, so when I follow RoIPool from the second line, it opens a file located at ~/anaconda3/envs/CV/lib/python3.8/site-package/torchvision/ops/roi_pool.py, which is exactly the same as codes in the documentation.
I pasted the code below without documentations.
from typing import List, Union
import torch
from torch import nn, Tensor
from torch.jit.annotations import BroadcastingList2
from torch.nn.modules.utils import _pair
from torchvision.extension import _assert_has_ops
from ..utils import _log_api_usage_once
from ._utils import convert_boxes_to_roi_format, check_roi_boxes_shape
def roi_pool(
input: Tensor,
boxes: Union[Tensor, List[Tensor]],
output_size: BroadcastingList2[int],
spatial_scale: float = 1.0,
) -> Tensor:
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
_log_api_usage_once(roi_pool)
_assert_has_ops()
check_roi_boxes_shape(boxes)
rois = boxes
output_size = _pair(output_size)
if not isinstance(rois, torch.Tensor):
rois = convert_boxes_to_roi_format(rois)
output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale, output_size[0], output_size[1])
return output
class RoIPool(nn.Module):
def __init__(self, output_size: BroadcastingList2[int], spatial_scale: float):
super().__init__()
_log_api_usage_once(self)
self.output_size = output_size
self.spatial_scale = spatial_scale
def forward(self, input: Tensor, rois: Tensor) -> Tensor:
return roi_pool(input, rois, self.output_size, self.spatial_scale)
def __repr__(self) -> str:
s = f"{self.__class__.__name__}(output_size={self.output_size}, spatial_scale={self.spatial_scale})"
return s
So, in the code example:
When running roi_pool_obj = RoIPool(3, 2**-1) it will create an instance of RoIPool by calling its __init__ method, which only initialized two instance variables;
When running roi_pool = roi_pool_obj(feature_map, [proposals]), it must have called the forward() method (but I don't know how) which then called the roi_pool() function above;
When running the roi_pool() function, it did some checking first and then computed output with the line output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale, output_size[0], output_size[1]).
But this doesn't show details of how roi_pool is implemented and pycharm showed Cannot find declaration to go to when I tried to follow torch.ops.torchvision.roi_pool.
To summarize, I have two questions:
How does the forward() called by running roi_pool = roi_pool_obj(feature_map, [proposals])?
How can I view the source code of torch.ops.torchvision.roi_pool or where is the file containing it's implementaion located?
Last but not least, I've just started reading source code which is pretty difficult for me. I'd appreciate it if you can also provide some advice or tutorials.

RoIPool is a subclass of torch.nn.Module. Source code:
https://github.com/pytorch/vision/blob/07ae61bf9c21ddd1d5f65d326aa9636849b383ca/torchvision/ops/roi_pool.py#L56
nn.Module defines __call__ method which in turn calls forward method. Source code:
https://github.com/pytorch/pytorch/blob/b2311192e6c4745aac3fdd774ac9d56a36b396d4/torch/nn/modules/module.py#L1234
When you executing roi_pool = roi_pool_obj(feature_map, [proposals]) statement the __call__ method uses the forward() of RoiPool. Source code:
https://github.com/pytorch/vision/blob/07ae61bf9c21ddd1d5f65d326aa9636849b383ca/torchvision/ops/roi_pool.py#L67
RoiPool.forward calls torch.ops.torchvision.roi_pool.
https://github.com/pytorch/vision/blob/07ae61bf9c21ddd1d5f65d326aa9636849b383ca/torchvision/ops/roi_pool.py#L52
ops is a object which loads native libraries implemented in c++:
https://github.com/pytorch/pytorch/blob/b2311192e6c4745aac3fdd774ac9d56a36b396d4/torch/_ops.py#L537
so when you call torch.ops.torchvision it will use torchvision library.
Here the roi_pool function is registered:
https://github.com/pytorch/vision/blob/7947fc8fb38b1d3a2aca03f22a2e6a3caa63f2a0/torchvision/csrc/ops/roi_pool.cpp#L53
Here you can find the actual implementation of rol_pool
CPU:
https://github.com/pytorch/vision/blob/7947fc8fb38b1d3a2aca03f22a2e6a3caa63f2a0/torchvision/csrc/ops/cpu/roi_pool_kernel.cpp
GPU:
https://github.com/pytorch/vision/blob/7947fc8fb38b1d3a2aca03f22a2e6a3caa63f2a0/torchvision/csrc/ops/cuda/roi_pool_kernel.cu

using ImageFolder with albumentations in pytorch

I have a situation where I need to use ImageFolder with the albumentations lib to make the augmentations in pytorch - custom dataloader is not an option.
To this end, I am stumped and I am not able to get ImageFolder to work with albumenations. I have tried something along these lines:
class Transforms:
def __init__(self, transforms: A.Compose):
self.transforms = transforms
def __call__(self, img, *args, **kwargs):
return self.transforms(image=np.array(img))['image']
and then:
trainset = datasets.ImageFolder(traindir,transform=Transforms(transforms=A.Resize(32 , 32)))
where traindir is some dir with images. I however get thrown a weird error:
RuntimeError: Given groups=1, weight of size [16, 3, 3, 3], expected input[1024, 32, 32, 3] to have 3 channels, but got 32 channels instead
and I cant seem to find a reproducible example to make a simple aug pipleline work with imagefolder.
UPDATE
On the recommendation of #Shai, I have done this now:
class Transforms:
def __init__(self):
self.transforms = A.Compose([A.Resize(224,224),ToTensorV2()])
def __call__(self, img, *args, **kwargs):
return self.transforms(image=np.array(img))['image']
trainset = datasets.ImageFolder(traindir,transform=Transforms())
but I get thrown:
self.padding, self.dilation, self.groups)
RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.cuda.FloatTensor) should be the same

You need to use ToTensorV2 transformation as the final one:
trainset = datasets.ImageFolder(traindir,transform=Transforms(transforms=A.Compose([A.Resize(32 , 32), ToTensorV2()]))

By looking into ImageFolder implementation on PyTorch[link] and some proposed work in Kaggle [link]. I propose the following solution (which is successfully tested from my side):
import numpy as np
from typing import Any, Callable, Optional, Tuple
from torchvision.datasets.folder import DatasetFolder, default_loader, IMG_EXTENSIONS
class CustomImageFolder(DatasetFolder):
def __init__(
self,
root: str,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
loader: Callable[[str], Any] = default_loader,
is_valid_file: Optional[Callable[[str], bool]] = None,
):
super().__init__(
root,
loader,
IMG_EXTENSIONS if is_valid_file is None else None,
transform=transform,
target_transform=target_transform,
is_valid_file=is_valid_file,
)
self.imgs = self.samples
def __getitem__(self, index: int) -> Tuple[Any, Any]:
"""
Args:
index (int): Index
Returns:
tuple: (sample, target) where target is class_index of the target class.
"""
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
try:
sample = self.transform(sample)
except Exception:
sample = self.transform(image=np.array(sample))["image"]
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
def __len__(self) -> int:
return len(self.samples)
Now you can run the code as follows:
trainset = CustomImageFolder(traindir,transform=Transforms(transforms=A.Resize(32 , 32)))

Create wrapper to return particular values from an already existing function

Hi I have a function called
tfnet.return_predict()
which when run on an image outputs certain set o values such as the class of object confidence and coordinates of bounding box. What i want to do is make a wrapper which returns only the confidence value.
So my code is as follows. I am using Darkflow to perform Prediction of classes on images.
#Initialise Libraries
# Load the YOLO Neural Network
tfnet = TFNet(options) #call the YOLO network
image = cv2.imread('C:/darkflow/Car.jpg', cv2.IMREAD_COLOR) #Load image
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
print(tfnet.return_predict(image)) #function to run predictions
The output of print is
[{'label': 'Car', 'confidence': 0.32647023, 'topleft': {'x': 98, 'y': 249}, 'bottomright': {'x': 311, 'y': 455}}]
So from this i want to create a wrapper which just returns the 'confidence' value.
I know how to create wrappers and define functions for it but how to do it for already defined functions.
Any suggestion is of great help to mee
EDIT: I tried:
def log_calls(tfnet.return_predict):
def wrapper(*args, **kwargs):
#name = func.__name__
print('before {name} was called')
r = func(*args, **kwargs)
print('after {name} was called')
return r
return wrapper
But the 'tfnet.return_predict' is returning error
SyntaxError: invalid syntax

Do you need to redefine the tfnet.return_predict function to only return confidence? Or is having a separate function okay? If it's the latter, then it seems like you can just do this:
def conf_only(*args, **kwargs):
out = tfnet.return_predict(*args, **kwargs)
return out[0]["confidence"]
and calling conf_only returns just that part of the dict.
If you need to have tfnet.return_predict redefined and want that to only return confidence, then you can make a decorator:
def conf_deco(func):
def wrapper(*args, **kwargs):
return func(*args, **kwargs)[0]["confidence"]
return wrapper
For example, pretending dummy_function is already predefined
def dummy_function(*args, **kwargs):
print(args, kwargs)
return [{"confidence": .32, "other": "asdf"}]
In [4]: dummy_function("something", kw='else')
('something',) {'kw': 'else'}
Out[4]: [{'confidence': 0.32, 'other': 'asdf'}]
Now redefine it with:
In [6]: dummy_function = conf_deco(dummy_function)
and it'll only return the confidence value
In [7]: dummy_function("something", kw='else')
('something',) {'kw': 'else'}
Out[7]: 0.32

Sklearn method in class

I would like to create a class that uses sklearn transformation methods. I found this article and I am using it as an example.
from sklearn import preprocessing
from sklearn.base import TransformerMixin
def minmax(dataframe):
minmax_transformer = preprocessing.MinMaxScaler()
return minmax_tranformer
class FunctionFeaturizer(TransformerMixin):
def __init__(self, scaler):
self.scaler = scaler
def fit(self, X, y=None):
return self
def transform(self, X):
fv = self.scaler(X)
return fv
if __name__=="__main__":
scaling = FunctionFeaturizer(minmax)
df = pd.DataFrame({'feature': np.arange(10)})
df_scaled = scaling.fit(df).transform(df)
print(df_scaled)
The output is StandardScaler(copy=True, with_mean=True, with_std=True) which is actually the result of the preprocessing.StandardScaler().fit(df) if I use it out of the class.
What I am expecting is:
array([[0. ],
[0.11111111],
[0.22222222],
[0.33333333],
[0.44444444],
[0.55555556],
[0.66666667],
[0.77777778],
[0.88888889],
[1. ]])
I am feeling that I am mixing few things here but I do not know what.
Update
I did some modifications:
def minmax():
return preprocessing.MinMaxScaler()
class FunctionFeaturizer(TransformerMixin):
def __init__(self, scaler):
self.scaler = scaler
def fit(self, X, y=None):
return self
def fit_transform(self, X):
self.scaler.fit(X)
return self.scaler.transform(X)
if __name__=="__main__":
scaling = FunctionFeaturizer(minmax)
df = pd.DataFrame({'feature': np.arange(10)})
df_scaled = scaling.fit_transform(df)
print(df_scaled)
But now I am receiving the following error:
Traceback (most recent call last):
File "C:/my_file.py", line 33, in <module>
test_scale = scaling.fit_transform(df)
File "C:/my_file.py", line 26, in fit_transform
self.scaler.fit(X)
AttributeError: 'function' object has no attribute 'fit'

Solving your error
in your code you have:
if __name__=="__main__":
scaling = FunctionFeaturizer(minmax)
df = pd.DataFrame({'feature': np.arange(10)})
df_scaled = scaling.fit_transform(df)
print(df_scaled)
change the line
scaling = FunctionFeaturizer(minmax)
to
scaling = FunctionFeaturizer(minmax())
you need to call the function to get the instantiation of MinMaxScaler returned to you.
Suggestion
Instead of implementing fit and fit_transform, implement fit and transform unless you can optimize both process into fit_tranform. This way, it is clearer what you are doing.
If you implement only fit and transform, you can still call fit_transform because you extend the TransformerMixin class. It will just call both functions in a row.
Getting your expected results
Your transformer is looking at every column of your dataset and distributing the values linearly between 0 and 1.
So, to get your expected results, it will really depend on what your df looks like. However, you did not share that with us, so it is difficult to tell if you will get it.
However, if you have df = [[0],[1],[2],[3],[4],[5],[6],[7],[8],[9]], you will see your expected result.
if __name__=="__main__":
scaling = FunctionFeaturizer(minmax())
df = [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]
df_scaled = scaling.fit_transform(df)
print(df_scaled)
> [[0. ]
> [0.11111111]
> [0.22222222]
> [0.33333333]
> [0.44444444]
> [0.55555556]
> [0.66666667]
> [0.77777778]
> [0.88888889]
> [1. ]]

RuntimeError: Unable to parse arguments while using scipy.optimize.bisect

I am having trouble with a code that uses the scipy.optimize.bisect root finder 1. While using that routine I get the following error:
Traceback (most recent call last):
File "project1.py", line 100, in <module>
zero1=bisect(rho_function1,x[0],crit,maxiter=1e6,full_output=True)
File "/home/irya/anaconda3/envs/hubble/lib/python3.6/site-packages/scipy /optimize/zeros.py", line 287, in bisect
r = _zeros._bisect(f,a,b,xtol,rtol,maxiter,args,full_output,disp)
RuntimeError: Unable to parse arguments
The code I am using is the following:
import numpy as np
from scipy.optimize import bisect
gamma_default=-0.8
gamma_used=-2.0
def rho_function(x,P_0=5.0e3,B_0=3.0e-6,gamma=gamma_default):
"""function used to solve rho. Here, B_0[G] (default 3.0 uG), _0[K/cm^3] and
gamma are constants. The variable x is mean to be rho/rho_0"""
kb=1.3806e-16 # boltzmann constant in erg/k
f= B_0**2/(8.0*np.pi*kb)*(x**(4./3)-1) + P_0*(x**gamma-1)
return f,P_0,B_0,gamma
def rho_function1(x):
P_0=5.0e3
B_0=3.0e-6
gamma=gamma_default
"""function used to solve rho. Here, B_0[G] (default 3.0 uG), P_0[K/cm^3] and
gamma are constants. The variable x is mean to be rho/rho_0"""
kb=1.3806e-16 # boltzmann constant in erg/k
f= B_0**2/(8.0*np.pi*kb)*(x**(4./3)-1) + P_0*(x**gamma-1)
return f
def rho_prime_function(x,P_0=5.0e3,B_0=3.0e-6,gamma=gamma_default):
"""Derivative of the rho_function"""
kb=1.3806e-16 # boltzmann constant in erg/k
f=B_0**2/(6.0*np.pi*kb)*x**(1./3) + P_0*gamma*x**(gamma-1)
return f
def rho_2nd_prime_function(x,P_0=5.0e3,B_0=3.0e-6,gamma=gamma_default):
kb=1.3806e-16 # boltzmann constant in erg/k
f=B_0**2/(18.0*np.pi*kb)*x**(-2./3) + P_0*gamma*(gamma-1)*x** (gamma-2)
return f
def magnetic_term(x,B_0=3.0e-6):
""""Magnetic term of the rho function"""
kb=1.3806e-16 # boltzmann constant in erg/k
f=B_0**2/(8.0*np.pi*kb)*(x**(4./3)-1)
return f
def TI_term(x,P_0=5.0e3,gamma=gamma_default):
f=P_0*(x**gamma-1)
return f
x=np.arange(0.8,2,0.01)
f,P_0_out,B_0_out,gamma_out=rho_function(x,gamma=gamma_used)
f_prime=rho_prime_function(x,gamma=gamma_used)
b_term=magnetic_term(x)
ti_term=TI_term(x,gamma=gamma_used)
kb=1.3806e-16
crit=(-B_0_out**2/(6.*np.pi*kb*P_0_out*gamma_out))**(3./(3.*gamma_out-4))
print("crit =",crit)
print("Using interval: a=",x[0],", b=",crit)
#using bisect
zero1=bisect(rho_function1,x[0],crit,maxiter=1e6,full_output=True)
I would like to know what is wrong with the way I am suing to pass the arguments to the bisect routine. Could you please help me?
Cheers,

The parameter maxiter must be an integer. 1e6 is a float. Use
maxiter=int(1e6)
or
maxiter=1000000

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Preallocating 2D arrays in a Numba class - python-3.x

Related

How does a pytorch function (such as RoIPool) work?

using ImageFolder with albumentations in pytorch

Create wrapper to return particular values from an already existing function

Sklearn method in class

RuntimeError: Unable to parse arguments while using scipy.optimize.bisect

Categories

Resources