ValueError: could not convert string to float 'weibullTAFC' - string

I am a beginner with coding.I am try to call the function fitData on another script (main code).
I called the function fitData with this sintax, at the end of the script:
# If the experiments stops before a default response is inserted
stairs.addResponse(0)
stairs.saveAsExcel(outputfile)
df.to_excel(outputfile + '_trials_summary.xlsx')
win.close()
raise
#analyzeStaircases(stairs, stairInfo['AverageReversals'])
fitData(stairs, 75)
################################
# The experiment starts here #
###############################
if __name__ == "__main__":
startExperiment()
fitData code
from psychopy import data
import pylab
from numpy import average,std
def fitData(stairs,percent):
allIntensities, allResponses = [],[]
for s in stairs.staircases:
allIntensities.append( s.intensities )
allResponses.append( s.data )
for s in stairs.staircases:
print ("Mean for condition"),s.condition['label'],"=",average(s.reversalIntensities),"std=",std(s.reversalIntensities)
#plot each condition
pylab.subplot(221)
#for stairNumber, thisStair in enumerate(allIntensities):
pylab.plot(allIntensities[0], 'o-', label=stairs.staircases[0].condition['label'] )
pylab.xlabel('Trials')
pylab.ylabel('Speed [cm/s]')
pylab.legend()
# Get combined data
combinedInten, combinedResp, combinedN = data.functionFromStaircase(allIntensities, allResponses, 10)
# Fit curve - in this case using a Weibull function
fit = data.FitWeibull('weibullTAFC',combinedInten, combinedResp, guess=None)
#fit = data.FitCumNormal(combinedInten,combinedResp)
intensitiesDomainInterp = pylab.arange(min(allIntensities[0]), max(allIntensities[0]), 0.01)
smoothResponses = fit.eval(intensitiesDomainInterp)
thresh = fit.inverse(percent)
#Plot fitted curve
pylab.subplot(222)
pylab.axis(xmin=min(allIntensities[0]),xmax=max(allIntensities[0]))
pylab.xlabel('Speed [cm/s]')
pylab.ylabel('Probability')
pylab.plot(intensitiesDomainInterp, smoothResponses, '-')
pylab.plot([thresh, thresh],[0,percent],'--')
pylab.plot([0, thresh],[percent,percent],'--')
pylab.title('Threshold at ' + str(percent) + '= %0.3f' % (thresh) )
# Plot points
pylab.plot(combinedInten, combinedResp, 'o')
pylab.ylim([0,1])
# SECOND CONDITION, the plots are in a second row, under
pylab.subplot(223)
#for stairNumber, thisStair in enumerate(allIntensities):
pylab.plot(allIntensities[1], 'o-', label=stairs.staircases[1].condition['label'] )
pylab.xlabel('Trials')
pylab.ylabel('Speed [cm/s]')
pylab.legend()
# Get combined data
combinedInten, combinedResp, combinedN = data.functionFromStaircase(allIntensities[1], allResponses[1], 10)
#fit curve - in this case using a Weibull function
#fit = data.FitFunction('weibullTAFC',combinedInten, combinedResp, guess=None)
fit = data.FitCumNormal(combinedInten,combinedResp)
intensitiesDomainInterp = pylab.arange(min(allIntensities[1]), max(allIntensities[1]), 0.01)
smoothResponses = fit.eval(intensitiesDomainInterp)
thresh = fit.inverse(percent)
#print "Threshold at " + str(percent) +"% with Cumulative Normal= ",thresh
#Plot fitted curve
pylab.subplot(224)
pylab.axis(xmin=min(allIntensities[1]),xmax=max(allIntensities[1]))
pylab.xlabel('Speed [cm/s]')
pylab.ylabel('Probability')
pylab.plot(intensitiesDomainInterp, smoothResponses, '-')
pylab.plot([thresh, thresh],[0,percent],'--')
pylab.plot([0, thresh],[percent,percent],'--')
pylab.title('Threshold at ' + str(percent) + '= %0.3f' % (thresh) )
# Plot points
pylab.plot(combinedInten, combinedResp, 'o')
pylab.ylim([0,1])
pylab.show()
When I call the function fitData on the main code I get this output errore:
Traceback (most recent call last):
File "C:\Users\OneDrive\Desktop\py2convers\psychoflicker-master\src\trackingExperiment2Staircase - py3 - FITDATA.py", line 627, in <module>
startExperiment()
File "C:\Users\OneDrive\Desktop\py2convers\psychoflicker-master\src\trackingExperiment2Staircase - py3 - FITDATA.py", line 621, in startExperiment
fitData(stairs, 75)
File "C:\Users\OneDrive\Desktop\py2convers\psychoflicker-master\src\common\fitData.py", line 39, in fitData
fit = data.FitWeibull('weibullTAFC',combinedInten, combinedResp, guess=None, display=1, expectedMin=0.5, optimize_kws=None)
File "C:\Users\OneDrive\Desktop\py2convers\psychoflicker-master\src\venv\lib\site-packages\psychopy\data\fit.py", line 36, in __init__
self._doFit()
File "C:\Users\OneDrive\Desktop\py2convers\psychoflicker-master\src\venv\lib\site-packages\psychopy\data\fit.py", line 55, in _doFit
self.params, self.covar = optimize.curve_fit(
File "C:\Users\OneDrive\Desktop\py2convers\psychoflicker-master\src\venv\lib\site-packages\scipy\optimize\_minpack_py.py", line 743, in curve_fit
xdata = np.asarray_chkfinite(xdata, float)
File "C:\Users\OneDrive\Desktop\py2convers\psychoflicker-master\src\venv\lib\site-packages\numpy\lib\function_base.py", line 601, in asarray_chkfinite
a = asarray(a, dtype=dtype, order=order)
ValueError: could not convert string to float: 'weibullTAFC'
Could you give me some suggestion on how can I fix this error?
Also I am not sure that the arguments of the function fitData on the main script are correct.

Related

using the matplotlib .pylot for drawing histogram and the smooth curve which lies on the histogram

I have tried to draw a histogram using matplotlib and the pandas but while drawing the smooth curve it gave me an error I can you please help to resolve this and maybe give me some method to draw the smooth curve on histogram using matplotlib I am trying not to use any another library (seaborn) here is the code
mu,sigma = 100,15
plt.style.use('dark_background')
x = mu + sigma * np.random.randn(10000)
n,bins,patches = plt.hist(x,bins=50,density=1,facecolor='g',alpha = 0.5)
zee=bins[:-1]
plt.plot(np.round(zee),patches,'ro')
plt.xlabel('Smarts')
plt.ylabel('Probablity')
plt.title('Histogram of the Iq')
plt.axis([40,160,0,0.03])
plt.grid(1)
plt.show()
the error shown is
python3 -u "/home/somesh/Downloads/vscode_code/python ml course /firstml.py"
Traceback (most recent call last):
File "/home/somesh/Downloads/vscode_code/python ml course /firstml.py", line 149, in <module>
plt.plot(np.round(zee),patches,'ro')
File "/home/somesh/.local/lib/python3.8/site-packages/matplotlib/pyplot.py", line 2840, in plot
return gca().plot(
File "/home/somesh/.local/lib/python3.8/site-packages/matplotlib/axes/_axes.py", line 1745, in plot
self.add_line(line)
File "/home/somesh/.local/lib/python3.8/site-packages/matplotlib/axes/_base.py", line 1964, in add_line
self._update_line_limits(line)
File "/home/somesh/.local/lib/python3.8/site-packages/matplotlib/axes/_base.py", line 1986, in _update_line_limits
path = line.get_path()
File "/home/somesh/.local/lib/python3.8/site-packages/matplotlib/lines.py", line 1011, in get_path
self.recache()
File "/home/somesh/.local/lib/python3.8/site-packages/matplotlib/lines.py", line 658, in recache
y = _to_unmasked_float_array(yconv).ravel()
File "/home/somesh/.local/lib/python3.8/site-packages/matplotlib/cbook/__init__.py", line 1289, in _to_unmasked_float_array
return np.asarray(x, float)
File "/home/somesh/.local/lib/python3.8/site-packages/numpy/core/_asarray.py", line 85, in asarray
return array(a, dtype, copy=False, order=order)
TypeError: float() argument must be a string or a number, not 'Rectangle'
and is this possible to draw the smooth curve using only the matplotlib library
edit 1: thanks for the answer I was finally able to spot the error
In your code, zee is a matplotlibobject Rectangle object. However, the plot function need a float as input.
Since what you are plotting is a normal distribution. Also, you like the curve to be smooth. So why not generate a normal distribution and plot it into same figure. Here is a modified version of your code.
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
mu,sigma = 100,15
plt.style.use('dark_background')
x = mu + sigma * np.random.randn(10000)
n,bins,patches = plt.hist(x,bins=50,density=1,facecolor='g',alpha = 0.5)
# zee=bins[:-1]
# plt.plot(np.round(zee),patches,'ro')
x_overlay = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x_overlay, stats.norm.pdf(x_overlay, mu, sigma),"ro")
plt.xlabel('Smarts')
plt.ylabel('Probablity')
plt.title('Histogram of the Iq')
plt.axis([40,160,0,0.03])
plt.grid(1)
plt.show()
Output of the plot:
n has the same size with zee, which is length(bins)-1:
mu,sigma = 100,15
plt.style.use('dark_background')
x = mu + sigma * np.random.randn(10000)
n,bins,patches = plt.hist(x,bins=50,density=1,facecolor='g',alpha = 0.5)
zee=bins[:-1]
## this
plt.plot(np.round(zee),n,'ro')
Output:

Tensorflow get_single_element not working with tf.data.TFRecordDataset.batch()

I am trying to perform ZCA whitening on a Tensorflow Dataset. In order to do this, I am trying to extract my data from my Dataset as a Tensor, perform the whitening, then create another Dataset after.
I followed the example here Get data set as numpy array from TFRecordDataset, excluding the point at which the Tensors were evaluated.
get_single_element is throwing this error:
Traceback (most recent call last):
File "/Users/takeoffs/Code/takeoffs_ai/test_pipeline_local.py", line 239, in <module>
validation_steps=val_steps, callbacks=callbacks)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py", line 780, in fit
steps_name='steps_per_epoch')
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 198, in model_iteration
val_iterator = _get_iterator(val_inputs, model._distribution_strategy)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_arrays.py", line 517, in _get_iterator
return training_utils.get_iterator(inputs)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_utils.py", line 1315, in get_iterator
initialize_iterator(iterator)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_utils.py", line 1322, in initialize_iterator
K.get_session((init_op,)).run(init_op)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 950, in run
run_metadata_ptr)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1173, in _run
feed_dict_tensor, options, run_metadata)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
run_metadata)
File "/Users/takeoffs/Code/takeoffs_ai/venv/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Dataset had more than one element.
[[node DatasetToSingleElement_1 (defined at /test_pipeline_local.py:88) ]]
What's strange is that, according to the post linked to above, batch() is supposed to return a Dataset with a single element.
Here is the code I'm running. I hard-coded my batch-size to 20 for local testing purposes.
def _tfrec_ds(tfrec_path, restore_shape, dtype):
"""Reads in a tf record dataset
Args:
tfrec_path (str): Str for path to a tfrecord file
restore_shape (tuple(int)): shape to transform data to
dtype (TF type): datatype to cast to
Returns:
ds: a dataset
"""
ds = tf.data.TFRecordDataset(tfrec_path)
def parse(x):
result = tf.parse_tensor(x, out_type=dtype)
result = tf.reshape(result, restore_shape)
result = tf.cast(result, tf.float32)
return result
ds = ds.map(parse, num_parallel_calls=tf.contrib.data.AUTOTUNE)
return ds
def get_data_zip(in_dir,
num_samples_fname,
x_shape,
y_shape,
batch_size=5,
dtype=tf.float32,
X_fname="X.tfrec",
y_fname="y.tfrec",
augment=True):
#Get number of samples
with FileIO(in_dir + num_samples_fname, "r") as f:
N = int(f.readlines()[0])
#Load in TFRecordDatasets
if in_dir[len(in_dir)-1] != "/":
in_dir += "/"
N = 20
def zca(x):
'''Returns tf Dataset X with ZCA whitened pixels.'''
flat_x = tf.reshape(x, (N, (x_shape[0] * x_shape[1] * x_shape[2])))
sigma = tf.tensordot(tf.transpose(flat_x), flat_x, axes=1) / 20
u, s, _ = tf.linalg.svd(sigma)
s_inv = 1. / tf.math.sqrt(s + 1e-6)
a = tf.tensordot(u, s_inv, axes=1)
principal_components = tf.tensordot(a, tf.transpose(u), axes=1)
whitex = flat_x*principal_components
batch_shape = [N] + list(x_shape)
x = tf.reshape(whitex, batch_shape)
return x
X_path = in_dir + X_fname
y_path = in_dir + y_fname
X = _tfrec_ds(X_path, x_shape, dtype)
y = _tfrec_ds(y_path, y_shape, dtype)
buffer_size = 500
shuffle_seed = 8
#Perform ZCA whitening
dataset = X.batch(N)
whole_dataset_tensors = tf.data.experimental.get_single_element(dataset)
whole_dataset_tensors = zca(whole_dataset_tensors)
X = tf.data.Dataset.from_tensor_slices(whole_dataset_tensors)
#Shuffle, repeat and batch
Xy = tf.data.Dataset.zip((X, y))
Xy = Xy.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=buffer_size, seed=shuffle_seed))\
.batch(batch_size).prefetch(tf.contrib.data.AUTOTUNE)
return Xy, N

A simple way of freeing Memory in python [duplicate]

I am trying to read large oscilloscope .trc files and plot them. Plotting one file works but as soon as I put the script into a loop, trying to plot all files (1 file a loop) I am getting a MemoryError.
Code:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import readTrc #external file, same location as script
foldername = 'trc_folder'
folder = os.listdir(foldername)
path = os.path.dirname(os.path.realpath(__file__))
for filenumber, i in enumerate(folder):
trc = path + '/' + foldername + '/' + i
print('reading trc file ' + str(filenumber))
datX, datY, m = readTrc.readTrc(trc)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df_oszi = pd.concat([srx, sry], axis = 1)
df_oszi.set_index(0, inplace = True)
#ERROR APPEARS with xticks argument
#removing xticks does not help, because then errorpath changes to
#/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py
df_oszi.plot(grid = 1,
color = 'blue',
linewidth = 0.5,
figsize = (9,5),
legend = False,
xticks = np.arange(df_oszi.index[0], df_oszi.index[-1], 1))
print('plotting file ' + str(filenumber))
plt.savefig('Plot_' + str(filenumber) + '.png', dpi = 300)
The problem seems to be with the external module readTrc. It took me quite a while to figure this out because python was throwing errors around Matplotlib and Pandas rather than readTrc, which seems to be an unofficial script for reading .trc files. I found it on the net as I was looking for a way to read .trc files in python. If you know a better way for reading oscilloscope files, please let me know.
I zipped everything you need to execute the script to this folder: folder
(It is quite large 582MB, because every .trc file is about of 200MB size) Inside you will find the script, a folder with .trc files and the external python file (module) readTrc which is required for reading .trc files. Executing the script should plot the first file but throw a MemoryError when plotting/constructing the second, at least on my Ubuntu machine. What confuses me is that I only get this MemoryError on Ubuntu (18.04), not on Windows 10.
I would appreciate help so that I can continue with my project. Please let me know, if you need additional information.
Edit:
Single Download for readTrc.py
Single Download for Script.py
print(type(datX)) returns:
<class 'numpy.ndarray'>
printing datX returns an object with 50 million values:
[-0.005 -0.005 -0.005 ... 0.005 0.005 0.005]
these are round by the print() function and are:
-0.004999999906663635
-0.004999999806663634
-0.004999999706663633
-0.004999999606663631
-0.00499999950666363
Edit 2:
To run the code with the new version of readTrc make these changes:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import readTrc
foldername = 'trc_folder'
folder = os.listdir(foldername)
path = os.path.dirname(os.path.realpath(__file__))
for filenumber, i in enumerate(folder):
trc = path + '/' + foldername + '/' + i
print('reading trc file ' + str(filenumber))
datX, datY, d = readTrc.Trc().open(trc)
srx, sry = pd.Series(datX * 1000), pd.Series(datY * 1000)
df_oszi = pd.concat([srx, sry], axis = 1)
df_oszi.set_index(0, inplace = True)
df_oszi.plot(grid = 1,
color = 'blue',
linewidth = 0.5,
figsize = (9,5),
legend = False,
xticks = np.arange(df_oszi.index[0], df_oszi.index[-1], 1))
print('plotting file ' + str(filenumber))
plt.savefig('Plot_' + str(filenumber) + '.png', dpi = 300)
MemoryError:
Traceback (most recent call last):
File "/home/artur/Desktop/zip_original/Script.py", line 27, in <module>
xticks = np.arange(df_oszi.index[0], df_oszi.index[-1], 1))
File "/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py", line 2941, in __call__
sort_columns=sort_columns, **kwds)
File "/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py", line 1977, in plot_frame
**kwds)
File "/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py", line 1804, in _plot
plot_obj.generate()
File "/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py", line 260, in generate
self._make_plot()
File "/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py", line 985, in _make_plot
**kwds)
File "/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py", line 1001, in _plot
lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds)
File "/usr/local/lib/python3.6/dist-packages/pandas/plotting/_core.py", line 615, in _plot
return ax.plot(*args, **kwds)
File "/usr/local/lib/python3.6/dist-packages/matplotlib/__init__.py", line 1805, in inner
return func(ax, *args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_axes.py", line 1604, in plot
self.add_line(line)
File "/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_base.py", line 1891, in add_line
self._update_line_limits(line)
File "/usr/local/lib/python3.6/dist-packages/matplotlib/axes/_base.py", line 1913, in _update_line_limits
path = line.get_path()
File "/usr/local/lib/python3.6/dist-packages/matplotlib/lines.py", line 945, in get_path
self.recache()
File "/usr/local/lib/python3.6/dist-packages/matplotlib/lines.py", line 649, in recache
self._xy = np.column_stack(np.broadcast_arrays(x, y)).astype(float)
MemoryError
Edit 3:
Sampling the dataset seems to reduce the data value. These are examples of the same dataset with sampling = 1, sampling = 10, sampling = 100
srx, sry = pd.Series(datX[::sampling] * 1000), pd.Series(datY[::sampling] * 1000)
The reason for this is the extremely short impulse period of Ultra High Frequency waves (UHF). Each impulse can be consisting of only a few data values. If you set down the amount of values taken into account, this results in large data loss. Although this solution makes the code work, it also reduces the data value significantly.
Oh, wow, I couldn't see the wood for the trees, as they say.
You're attempting to plot way too many data points (i.e. 100000002, i think that's about 4km length of paper printed at 600dpi), which can be resolved either by sampling:
sampling=100
srx, sry = pd.Series(datX[::sampling] * 1000), pd.Series(datY[::sampling] * 1000)
or by selectively studying specific ranges:
srx, sry = pd.Series(datX[0:50000] * 1000), pd.Series(datY[0:50000] * 1000)
or a combination of both.
It took quite some time but I managed to get the MemoryError under control. Not only had I to put gc.collect() at the end of each loop but also plt.close(). Only then the Errors would stop. Sorry for the confusion. I learned a lot from this.

ValueError: Number of features of the model must match the input. Model n_features is 45 and input n_features is 2

I'm trying to plot a Random Forest visualization for classification purposes with python 3.
Firstly, I read a CSV file where all necesary data is located. Here, Read_CSV() is a method who run correctly, giving three variables, features (vector with all feature names, specifically 45), data (only the data without label column. There are 148000 rows and 45 columns), labels (column of labels in integer format. There are 3 classes to classify as integers 0, 1 or 2. There are also 148000 rows in this vector).
features,data,labels = Read_CSV()
X_train,X_test,Y_train,Y_test = train_test_split(data,labels,test_size=0.35,random_state=0)
X = np.array(X).astype(np.float)
y = np.array(y).astype(np.float)
ax = ax or plt.gca()
ax.scatter(X[:, 0], X[:, 1], c=y, s=30, cmap=cmap,
clim=(y.min(), y.max()), zorder=3)
ax.axis('tight')
ax.axis('off')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# fit the estimator
model.fit(X, y)
xx, yy = np.meshgrid(np.linspace(*xlim, num=200),
np.linspace(*ylim, num=200))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
# Create a color plot with the results
n_classes = len(np.unique(y))
contours = ax.contourf(xx, yy, Z, alpha=0.3,
levels=np.arange(n_classes + 1) - 0.5,
cmap=cmap, clim=(y.min(), y.max()),
zorder=1)
ax.set(xlim=xlim, ylim=ylim)
This part of the code showed here is completely dedicated to obtain a plot like this:
enter image description here
When I run this code I obtain the following:
Traceback (most recent call last):
File "C:/Users/Carles/PycharmProjects/Article/main.py", line 441, in <module>
main()
File "C:/Users/Carles/PycharmProjects/Article/main.py", line 388, in main
visualize_classifier(RandomForestClassifier(),X_train, Y_train)
File "C:/Users/Carles/PycharmProjects/Article/main.py", line 353, in visualize_classifier
Z = model.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)
File "C:\Users\Carles\PycharmProjects\Article\venv\lib\site-packages\sklearn\ensemble\forest.py", line 538, in predict
proba = self.predict_proba(X)
File "C:\Users\Carles\PycharmProjects\Article\venv\lib\site-packages\sklearn\ensemble\forest.py", line 578, in predict_proba
X = self._validate_X_predict(X)
File "C:\Users\Carles\PycharmProjects\Article\venv\lib\site-packages\sklearn\ensemble\forest.py", line 357, in _validate_X_predict
return self.estimators_[0]._validate_X_predict(X, check_input=True)
File "C:\Users\Carles\PycharmProjects\Article\venv\lib\site-packages\sklearn\tree\tree.py", line 384, in _validate_X_predict
% (self.n_features_, n_features))
ValueError: Number of features of the model must match the input. Model n_features is 45 and input n_features is 2

TypeError: Image data can not convert to float. How to mitigate this error?

from numpy import mean,cov,cumsum,dot,linalg,size,flipud
import numpy as np
from pylab import imread,subplot,imshow,title,gray,figure,show,NullLocator
def princomp(A,numpc=0):
#computing eigenvalues and eigenvectors of covariance matrix
M = (A-mean(A.T,axis=1)).T # subtract the mean (along columns)
[latent,coeff] = linalg.eig(cov(M))
p = size(coeff,axis=1)
idx = np.argsort(latent) # sorting the eigenvalues
idx = idx[::-1] # in ascending order
#sorting eigenvectors according to the sorted eigenvalues
coeff = coeff[:,idx]
latent = latent[idx] # sorting eigenvalues
if numpc < p and numpc >= 0:
coeff = coeff[:,range(numpc)] # cutting some PCs if needed
score = dot(coeff.T,M) # projection of the data in the new space
return coeff,score,latent
A = imread('beatles.jpg') # load an image
A = mean(A,2) # to get a 2-D array
full_pc = size(A,axis=1) # numbers of all the principal components
i = 1
dist = []
for numpc in range(0,full_pc+10,10): # 0 10 20 ... full_pc
coeff, score, latent = princomp(A,numpc)
Ar = dot(coeff,score).T+mean(A,axis=0) # image reconstruction
# difference in Frobenius norm
dist.append(linalg.norm(A-Ar,'fro'))
# showing the pics reconstructed with less than 50 PCs
if numpc <= 50:
ax = subplot(2,3,i,frame_on=False)
ax.xaxis.set_major_locator(NullLocator()) # remove ticks
ax.yaxis.set_major_locator(NullLocator())
i += 1
imshow(flipud(Ar))
title('PCs # '+str(numpc))
gray()
figure()
imshow(flipud(A))
title('numpc FULL')
gray()
show()
The error is coming as shown above. This is a sample code on PCA image compression that I found on the net. Suggestions would be helpful. The entire error is :
Traceback (most recent call last):
File "try.py", line 36, in <module>
imshow(flipud(Ar))
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/pyplot.py", line 3157, in imshow
**kwargs)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/__init__.py", line 1898, in inner
return func(ax, *args, **kwargs)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/axes/_axes.py", line 5124, in imshow
im.set_data(X)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/image.py", line 596, in set_data
raise TypeError("Image data can not convert to float")
TypeError: Image data can not convert to float
You cannot plot a complex array with matplotlib imshow. So you need to take the real or imaginary part alone, e.g.
Ar = dot(coeff,score).T+mean(A,axis=0) # image reconstruction
Ar = Ar.real

Resources