Loading image dataset in numpy arrays in python

Loading image dataset in numpy arrays in python - python-3.x

I have 6 different images. I want to store them together in a single numpy array. Is that possible? If yes, how can I do that?
from PIL import Image
from matplotlib import image
import matplotlib.pyplot as plt
from os import listdir
from numpy import asarray
import numpy as np
for i in range(1,6):
image=Image.open(str(i)+'.jpg')
image=image.resize((100,100))
temp=asarray(image)
print(np.append(X_train,temp,axis=0))
This raises the following Exception:
ValueError: all the input arrays must have same number of dimensions

you can create a list of arrays and the convert back to numpy array
list_of_pics = list()
for i in range(1,6):
image=Image.open(str(i)+'.jpg')
image=image.resize((100,100))
list_of_pics.append(np.asarray(image))
new_array = np.array(list_of_pics)
the final dimentions of new_array should be (6,100,100)

Related

`FileNotFoundError: No such file: '/content/Weeds_Detectiontrain/1.png'` How to write the correct directory

I'm new to python! I am trying to upgrade myself. However, I have a dataset namely tree_dataset which contains 3 folders i.e. test, train, and validation. Each folder contains 9 different folders (classes). Now, I want to show a sample of all first images (data) from the 9 different classes.
My code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style="whitegrid")
import os
import imageio
import skimage
import skimage.io
import skimage.transform
# Showing sample of all first images (data) from the 09 different classes
f, ax = plt.subplots(nrows=1,ncols=9, figsize=(20, 10))
i=0
for d in directory:
file='/content/tree_dataset'+d+'/1.png'
im=imageio.imread(file)
ax[i].imshow(im,resample=True)
ax[i].set_title(d, fontsize=8)
i+=1
Error: FileNotFoundError: No such file: '/content/Weeds_Detectiontrain/1.png'

Numpy and matplotlib: Can't edit an image array

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
pic = Image.open('mountain1.jpg')
pic_array = np.asarray(pic)
# plt.imshow(pic_array[:,:,2], cmap='gray')
pic_array[:,:,2]=0
plt.imgshow(pic_array)
plt.show()
i get the following error/ pic_array[:,:,2]=0
ValueError: assignment destination is read-only How do i edit the array?

The problem is that in the code the original image and the numpy array share the same memory. hence the read-only error when you try to update the array.
Create the copy as the original array and it should be fine.
Another small thing that I noticed is plt.imgshow(image) is used instead of plt.imshow(image).
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
pic = Image.open('mountain1.jpg')
# copy of the numpy array so that the original image is not changed.
pic_array = np.asarray(pic).copy()
pic_array[:,:,2]=0
plt.imshow(pic_array)
Cheers!

When I applied RandomForest in Python, ValueError: Found input variables with inconsistent numbers of samples: [2883, 1236]

File "D:\Users\Watson Rockstar\Anaconda3\lib\site-packages\sklearn\utils\validation.py", line 205, in check_consistent_length
" samples: %r" % [int(l) for l in lengths])
ValueError:
Found input variables with inconsistent numbers of samples: [2883, 1236]
This dataset totally has 4119 data, and the Xtrain volum= (2883,18), Xtest volum = (1236,18)
I have tried to use LabelEncoder and OneHotEncoder to sovle the problems, but it is not helpful:
# Ignore the warnings
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')
# data visualisation and manipulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import style
import seaborn as sns
import missingno as msno
#configure
# sets matplotlib to inline and displays graphs below the corressponding cell.
#import the necessary modelling algos.
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
#preprocessing
from sklearn.preprocessing import LabelEncoder
telebanking = pd.read_csv('bank-additional.csv')
telebank = telebanking.drop(['duration','default'],axis =1)
def transform(feature):
le = LabelEncoder()
telebank[feature] = le.fit_transform(telebank[feature])
print(le.classes_)
cat_telebank=telebank.select_dtypes(include='object')
cat_telebank.columns
for col in cat_telebank.columns:
transform(col)
scaler=StandardScaler()
scaled_telebank=scaler.fit_transform(telebank.drop('y',axis=1))
X=scaled_telebank
Y=telebank['y'].as_matrix()
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,Y,test_size=0.3)
def compare(model):
clf = model
clf.fit(Xtrain,Ytrain)
pred = clf.predict(Xtrain)
acc.append(accuracy_score(pred,Ytest))
prec.append(precision_score(pred,Ytest))
rec.append(recall_score(pred,Ytest))
auroc.append(roc_auc_score(pred,Ytest))
acc=[]
prec=[]
rec=[]
auroc=[]
models=[RandomForestClassifier(),DecisionTreeClassifier()]
model_names=['RandomForestClassifier','DecisionTreeClassifier']
for model in range(len(models)):
compare(models[model])
d={'Modelling Algo':model_names,'Accuracy':acc,'Precision':prec,'Recall':rec,'Area Under ROC Curve':auroc}
met_telebank=pd.DataFrame(d)
met_telebank
It is the first warning's detail.

Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,Y,test_size=0.3)
should be
Xtrain,Ytrain,Xtest,Ytest = train_test_split(X,Y,test_size=0.3)
This is causing the error, because it wants to use Xtest as the Ytrain values.

How to plot the distribution of each in feature in cancer dataset

I want to get the distribution of each features in cancer dataset using ggplot but its giving me error.
#pip install plotnine
from plotnine import ggplot
from plotnine import *
from sklearn.datasets import load_breast_cancer
for i in cancer.feature_names:
ggplot(cancer.data)+aes(x=i)+geom_bar(size=10)
This is the error message i got
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

I would recommand to use seaborn for that. Here is an example of plotting the distribution of each in feature in cancer dataset by target:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
# loading data
cancer = load_breast_cancer()
data = pd.DataFrame(np.c_[cancer['data'], cancer['target']],
columns= np.append(cancer['feature_names'], ['target']))
df = data.melt(['target'], var_name='cols', value_name='vals')
g = sns.FacetGrid(df, col='cols', hue="target", palette="Set1", col_wrap=4)
g = (g.map(sns.distplot, "vals", hist=True, ))

from plotnine import ggplot
from plotnine import *
from sklearn.datasets import load_breast_cancer
cancer=load_breast_cancer()
import pandas as pd
import matplotlib.pyplot as plt
data=pd.DataFrame(cancer.data,columns=cancer.feature_names)
for i in data.columns:
print(ggplot(data)+aes(x=i)+geom_density(size=1))
print(ggplot(data)+aes(x=i)+geom_bar(size=10))

How to scale a data using Python 3

I am trying to scale my data using Python 3
But I keep getting this error: I am out of ideas as to what could be the issue? Please can you assist me guys? I would deeply appreciate your help!
import pandas as pd
import numpy as np
from numpy.random import randn
from pandas import Series, DataFrame
from pandas.plotting import scatter_matrix
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import rcParams
from pylab import rcParams
import seaborn as sb
import scipy
from scipy import stats
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy.stats import chi2_contingency
import sklearn
from sklearn import preprocessing
from sklearn.preprocessing import scale
mtcars = pd.read_csv('mtcars.csv')
mtcars.columns = ['Car
names','mpg','cyl','disp','hp','drat','wt','qsec','vs','am','gear','carb']
mpg = mtcars['mpg']
#Scale your data
mpg_matrix = mpg.reshape(-1,1)
scaled = preprocessing.MinMaxScaler()
scaled_mpg = scaled.fit_transform(mpg_matrix)
plt.plot(scaled_mpg)
plt.show()
mpg_matrix = mpg.numpy.reshape(-1,1)
tr__
File "C:\Anaconda\lib\site-packages\pandas\core\generic.py", line 5067, in __getattr__
return object.__getattribute__(self, name)
AttributeError: 'Series' object has no attribute 'numpy'

pandas.core.series.Series doesn't have reshape.
Perhaps:
mpg_matrix = mpg.values.reshape(-1,1)
i.e. get the underlying numpy array and reshape that.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Loading image dataset in numpy arrays in python - python-3.x

Related

`FileNotFoundError: No such file: '/content/Weeds_Detectiontrain/1.png'` How to write the correct directory

Numpy and matplotlib: Can't edit an image array

When I applied RandomForest in Python, ValueError: Found input variables with inconsistent numbers of samples: [2883, 1236]

How to plot the distribution of each in feature in cancer dataset

How to scale a data using Python 3

Categories

Resources