LaTeX is not able to process the string in python - python-3.x

Trying to graph a weibull distribution in python3 using the code available at (http://www.astroml.org/book_figures/chapter3/fig_weibull_distribution.html)
# Author: Jake VanderPlas
# License: BSD
# The figure produced by this code is published in the textbook
# "Statistics, Data Mining, and Machine Learning in Astronomy" (2013)
# For more information, see http://astroML.github.com
# To report a bug or issue, use the following forum:
# https://groups.google.com/forum/#!forum/astroml-general
import numpy as np
from scipy.stats import dweibull
from matplotlib import pyplot as plt
#----------------------------------------------------------------------
# This function adjusts matplotlib settings for a uniform feel in the textbook.
# Note that with usetex=True, fonts are rendered with LaTeX. This may
# result in an error if LaTeX is not installed on your system. In that case,
# you can set usetex to False.
from astroML.plotting import setup_text_plots
setup_text_plots(fontsize=8, usetex=True)
#------------------------------------------------------------
# Define the distribution parameters to be plotted
k_values = [0.5, 1, 2, 2]
lam_values = [1, 1, 1, 2]
linestyles = ['-', '--', ':', '-.', '--']
mu = 0
x = np.linspace(-10, 10, 1000)
#------------------------------------------------------------
# plot the distributions
fig, ax = plt.subplots(figsize=(5, 3.75))
for (k, lam, ls) in zip(k_values, lam_values, linestyles):
dist = dweibull(k, mu, lam)
plt.plot(x, dist.pdf(x), ls=ls, c='black',
label=r'$k=%.1f,\ \lambda=%i$' % (k, lam))
plt.xlim(0, 5)
plt.ylim(0, 0.6)
plt.xlabel('$x$')
plt.ylabel(r'$p(x|k,\lambda)$')
plt.title('Weibull Distribution')
plt.legend()
plt.show()
I keet getting the following error:
RuntimeError: LaTeX was not able to process the following string:
b'lp'
Here is the full report generated by LaTeX:
<matplotlib.figure.Figure at 0x22c11c7a2e8>
I cant figure out where is "b'lp'".

Related

InvalidArgumentError: Graph execution error

I am trying to apply LCM (linear coregionalization model), as a Gaussian process, to a CSV file dataset.
This dataset includes two inputs (FracYear, Auxiliar) and two outputs (VV,VH).
import gpflow as gpflow
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
plt.style.use('ggplot')
# %matplotlib inline
import seaborn as sns
np.random.seed(1)
def plot_gp(x, mu, var, color='k'):
plt.plot(x, mu, color=color, lw=2)
plt.plot(x, mu + 2*np.sqrt(var), '--', color=color)
plt.plot(x, mu - 2*np.sqrt(var), '--', color=color)
def plot(m):
xtest = np.linspace(0, 1, 300)[:,None]
line, = plt.plot(X1, Y1, 'x', mew=2)
mu, var = m.predict_f(np.hstack((xtest, np.zeros_like(xtest))))
plot_gp(xtest, mu, var, line.get_color())
line, = plt.plot(X2, Y2, 'x', mew=2)
mu, var = m.predict_f(np.hstack((xtest, np.ones_like(xtest))))
plot_gp(xtest, mu, var, line.get_color())
import pandas as pd
d = pd.read_csv('C://Users//Rick//Documents//UNI//PROJ//invento.csv',delimiter=',', header=None, skiprows=1, names=['FracYear', 'VH', 'VV'])
# Replace missing values with NaN
d.replace(-200.0, np.nan, inplace=True)
# Data preparation
# We start by generating some training data to fit the model with. For this example, we choose the following two correlated functions for our outputs:
# make a dataset with two outputs, correlated, heavy-tail noise. One has more noise than the other.
df = pd.DataFrame(data=d)
X1 = df['FracYear'] = pd.to_numeric(df['FracYear'])
X2 = df['Auxiliar'] = pd.to_numeric(df['Auxiliar'])
Y1 = df['VH'] = pd.to_numeric(df['VH'])
Y2 = df['VV'] = pd.to_numeric(df['VV'])
plt.plot(X1, Y1, 'x', mew=2)
plt.plot(X2, Y2, 'x', mew=2)
plt.show()
# Base Matern kernel
k1 = gpflow.kernels.Matern32(active_dims=[0])
# Build the coreg kernel
coreg = gpflow.kernels.Coregion(output_dim=2, rank=1, active_dims=[1])
kern = k1 * coreg
# Build Likelihood
lik = gpflow.likelihoods.SwitchedLikelihood([
gpflow.likelihoods.StudentT(), gpflow.likelihoods.StudentT()
])
# Augment the input with ones or zeros to indicate the required output dimension
X_augmented = np.vstack((np.hstack((X1, np.zeros_like(X1))), np.hstack((X2, np.ones_like(X2)))))
# Augment the Y data with ones or zeros that specify a likelihood from the list of likelihoods
Y_augmented = np.vstack((np.hstack((Y1, np.zeros_like(Y1))), np.hstack((Y2, np.ones_like(Y2)))))
# now buld the GP model as normal
m = gpflow.models.VGP((X_augmented, Y_augmented), kernel=kern, likelihood=lik)
# fit the covariance function parameters
#gpflow.train.ScipyOptimizer().minimize(m, maxiter=1000)
from gpflow.ci_utils import ci_niter
maxiter = ci_niter(10000)
gpflow.optimizers.Scipy().minimize(
m.training_loss, m.trainable_variables, options=dict(maxiter=maxiter), method="L-BFGS-B",
)
## Fit and plot
xtest = np.hstack([np.linspace(0, 1, 100)]*3)[:,None]
mu1, var1 = m.predict_f(np.hstack((xtest, np.zeros_like(xtest))))
mu2, var2 = m.predict_f(np.hstack((xtest, np.ones_like(xtest))))
plt.plot(X1, Y1, 'x', mew=2, color='r')
plt.plot(X2, Y2, 'x', mew=2, color='b')
plt.plot(np.linspace(0, 1, 100), np.reshape(mu2, [100,3]))
plt.plot(np.linspace(0, 1, 100), mu1, 'r')
plt.plot(np.linspace(0, 1, 100), mu2, 'b')
plt.show()
Please, note that I am using Colab. On the other hand, I am having issues when installing Tensorflow in Spyder. Having said this, I don't know which would be better: Colab, Spyder, Jupyter.
My doubt is the error resulting from the prompt with "gpflow.optimizers.Scipy().minimize".
The error is very long. It starts with the title of this thread, and ends like this: "Node: 'GatherV2_2'
indices[0] = 2019 is not in [0, 2)
[[{{node GatherV2_2}}]] [Op:__inference__tf_eval_9447]"
If anyone has any idea about this, please notice me. Also, if any of you has a LCM code which works for csv files, it may be interesting for me to keep it an eye.
Thanks!!

Unable to read data from kdeplot

I have a pandas dataframe with two columns, A and B, named df in the following bits of code.
And I try to plot a kde for each value of B like so:
import seaborn as sbn, numpy as np, pandas as pd
fig = plt.figure(figsize=(15, 7.5))
sbn.kdeplot(data=df, x="A", hue="B", fill=True)
fig.savefig("test.png")
I read the following propositions but only those where I compute the kde from scratch using statsmodel or some other module get me somewhere:
Seaborn/Matplotlib: how to access line values in FacetGrid?
Get data points from Seaborn distplot
For curiosity's sake, I would like to know why I am unable to get something from the following code:
kde = sns.kdeplot(data=df, x="A", hue="B", fill=True)
line = kde.lines[0]
x, y = line.get_data()
print(x, y)
The error I get is IndexError: list index out of range. kde.lines has a length of 0.
Accessing the lines through fig.axes[0].lines[0] also raises an IndexError.
All in all, I think I tried everything proposed in the previous threads (I tried switching to displot instead of using kdeplot but this is the same story, only that I have to access axes differently, note displot and not distplot because it is deprecated), but every time I get to .get_lines(), ax.lines, ... what is returned is an empty list. So I can't get any values out of it.
EDIT : Reproducible example
import pandas as pd, numpy as np, matplotlib.pyplot as plt, seaborn as sbn
# 1. Generate random data
df = pd.DataFrame(columns=["A", "B"])
for i in [1, 2, 3, 5, 7, 8, 10, 12, 15, 17, 20, 40, 50]:
for _ in range(10):
df = df.append({"A": np.random.random() * i, "B": i}, ignore_index=True)
# 2. Plot data
fig = plt.figure(figsize=(15, 7.5))
sbn.kdeplot(data=df, x="A", hue="B", fill=True)
# 3. Read data (error)
ax = fig.axes[0]
x, y = ax.lines[0].get_data()
print(x, y)
This happens because using fill=True changes the object that matplotlib draws.
When no fill is used, lines are plotted:
fig = plt.figure(figsize=(15, 7.5))
ax = sbn.kdeplot(data=df, x="A", hue="B")
print(ax.lines)
# [<matplotlib.lines.Line2D object at 0x000001F365EF7848>, etc.]
when you use fill, it changes them to PolyCollection objects
fig = plt.figure(figsize=(15, 7.5))
ax = sbn.kdeplot(data=df, x="A", hue="B", fill=True)
print(ax.collections)
# [<matplotlib.collections.PolyCollection object at 0x0000016EE13F39C8>, etc.]
You could draw the kdeplot a second time, but with fill=False so that you have access to the line objects

Matplotlib glitch [python3]

H everyone,
I'm trying to make a simple matplotlib graphic function to represent a capacitor charing and discharging functions. Here is the program:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import math
x_data = [0] # storing the variable values
y_data = [0] # storing the dependent variable value
fig, ax = plt.subplots() # creating a subplot and its axes
ax.set_xlim(0, 20) # setting the x axe limits
ax.set_ylim(0, 12) # setting the y axe limits
line, = ax.plot(0, 0) # create a line object
# creating an function to animate
def animation_frame(t):
x_data.append(t) # append the variable to x_data arrya
# print a sort of differential of the function
print(y_data[len(y_data) - 1] - y_data[len(y_data) - 2])
# check if the differential is so small that can be considered as 0 (local maximum point)
if y_data[len(y_data) - 1] - y_data[len(y_data) - 2] >= 0.0012 or y_data[len(y_data) - 1] - y_data[len(y_data) - 2] == 0.0:
# if it isn't continue plotting the charging function
y_data.append(9 * ( 1 - (np.e)**(-(t/(180000*0.00001)))))
else:
# plot the discharging function
y_data.append(9 * (np.e)**(-(t/(180000*0.00001))))
# set the line points coordinates
line.set_xdata(x_data)
line.set_ydata(y_data)
run the animation function
animation = FuncAnimation(fig, func=animation_frame, frames=np.arange(0, 10, 0.05), interval=10)
# show the plot
plt.show()
It isn't important the function type, because even if in the else of the if-else clause (in the animate function) I insert a simple identity (f(t) = t), it won't work and it glitches out, here are some images:
I suspect that the issue is inside the differential check part, in the animate function... But I don't know how to resolve it.
Do you have any ideas?

Import error: No module named 'mstamp_stomp'

I am trying to run demo.py from the git repository https://github.com/mcyeh/mstamp/tree/master/Python.
This is the source code of the paper Matrix Profile VI: Meaningful Multidimensional Motif Discovery. I have attached the code below.
# -*- coding: utf-8 -*-
"""
#author: Michael Yeh
C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
Multidimensional Motif Discovery," IEEE ICDM 2017.
https://sites.google.com/view/mstamp/
http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
"""
import scipy.io as sio
import matplotlib.pyplot as plt
from mstamp_stomp import mstamp as mstamp_stomp
from mstamp_stamp import mstamp as mstamp_stamp
def plot_motifs(matrix_profile, dimensionality=1):
motif_at = matrix_profile[dimensionality - 1, :].argsort()[:2]
plt.figure(figsize=(14, 7))
for i in range(3):
plt.subplot(4, 1, i + 1)
plt.plot(data.T[i, :])
plt.title('$T_{}$'.format(i + 1))
for m in motif_at:
plt.plot(range(m, m + sub_len), data.T[i, :][m:m + sub_len], c='r')
plt.xlim((0, matrix_profile.shape[1]))
plt.subplot(414)
plt.title('{}-dimensional Matrix Profile'.format(dimensionality))
plt.plot(matrix_profile[dimensionality - 1, :])
for m in motif_at:
plt.axvline(m, c='r')
plt.xlim((0, matrix_profile.shape[1]))
plt.tight_layout()
if __name__ == '__main__':
mat = sio.loadmat('toy_data.mat')
data = mat['data']
sub_len = mat['sub_len'][0][0]
# using the stomp based method to compute the multidimensional matrix
# profile
mat_pro_1, pro_idx_1 = mstamp_stomp(data.T, sub_len,
return_dimension=False)
# plot the matrix profile as image
plt.figure()
plt.title('Matrix Profile (STOMP)')
plt.imshow(mat_pro_1, extent=[0, 1, 0, 1])
# using the stamp based method to compute the multidimensional matrix
# profile
mat_pro_2, pro_idx_2 = mstamp_stamp(data.T, sub_len,
return_dimension=False)
# plot the matrix profile as image
plt.figure()
plt.title('Matrix Profile (STAMP)')
plt.imshow(mat_pro_2, extent=[0, 1, 0, 1])
plot_motifs(mat_pro_2)
# the function can also be used to compute the 1D matrix profile
mat_pro_3, _ = mstamp_stomp(data[:, 1].T, sub_len,
return_dimension=False)
plt.figure()
plt.plot(mat_pro_3[0, :])
mat_pro_4, _ = mstamp_stamp(data[:, 1].T, sub_len,
return_dimension=False)
plt.figure()
plt.plot(mat_pro_4[0, :])
plt.show()
Import error: No module named 'mstamp_stomp'
This reflects a search path problem. You will want to chdir into the directory containing the sources, and you will also want to have . dot in your path before you execute the code:
$ cd mstamp/Python
$ export PYTHONPATH=.
$ python demo.py
You can use this code fragment to debug such issues:
import pprint
import sys
pprint.pprint(sys.path)

Smooth curves in Python Plots [duplicate]

I've got the following simple script that plots a graph:
import matplotlib.pyplot as plt
import numpy as np
T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])
plt.plot(T,power)
plt.show()
As it is now, the line goes straight from point to point which looks ok, but could be better in my opinion. What I want is to smooth the line between the points. In Gnuplot I would have plotted with smooth cplines.
Is there an easy way to do this in PyPlot? I've found some tutorials, but they all seem rather complex.
You could use scipy.interpolate.spline to smooth out your data yourself:
from scipy.interpolate import spline
# 300 represents number of points to make between T.min and T.max
xnew = np.linspace(T.min(), T.max(), 300)
power_smooth = spline(T, power, xnew)
plt.plot(xnew,power_smooth)
plt.show()
spline is deprecated in scipy 0.19.0, use BSpline class instead.
Switching from spline to BSpline isn't a straightforward copy/paste and requires a little tweaking:
from scipy.interpolate import make_interp_spline, BSpline
# 300 represents number of points to make between T.min and T.max
xnew = np.linspace(T.min(), T.max(), 300)
spl = make_interp_spline(T, power, k=3) # type: BSpline
power_smooth = spl(xnew)
plt.plot(xnew, power_smooth)
plt.show()
Before:
After:
For this example spline works well, but if the function is not smooth inherently and you want to have smoothed version you can also try:
from scipy.ndimage.filters import gaussian_filter1d
ysmoothed = gaussian_filter1d(y, sigma=2)
plt.plot(x, ysmoothed)
plt.show()
if you increase sigma you can get a more smoothed function.
Proceed with caution with this one. It modifies the original values and may not be what you want.
See the scipy.interpolate documentation for some examples.
The following example demonstrates its use, for linear and cubic spline interpolation:
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d
# Define x, y, and xnew to resample at.
x = np.linspace(0, 10, num=11, endpoint=True)
y = np.cos(-x**2/9.0)
xnew = np.linspace(0, 10, num=41, endpoint=True)
# Define interpolators.
f_linear = interp1d(x, y)
f_cubic = interp1d(x, y, kind='cubic')
# Plot.
plt.plot(x, y, 'o', label='data')
plt.plot(xnew, f_linear(xnew), '-', label='linear')
plt.plot(xnew, f_cubic(xnew), '--', label='cubic')
plt.legend(loc='best')
plt.show()
Slightly modified for increased readability.
One of the easiest implementations I found was to use that Exponential Moving Average the Tensorboard uses:
def smooth(scalars: List[float], weight: float) -> List[float]: # Weight between 0 and 1
last = scalars[0] # First value in the plot (first timestep)
smoothed = list()
for point in scalars:
smoothed_val = last * weight + (1 - weight) * point # Calculate smoothed value
smoothed.append(smoothed_val) # Save it
last = smoothed_val # Anchor the last smoothed value
return smoothed
ax.plot(x_labels, smooth(train_data, .9), x_labels, train_data)
I presume you mean curve-fitting and not anti-aliasing from the context of your question. PyPlot doesn't have any built-in support for this, but you can easily implement some basic curve-fitting yourself, like the code seen here, or if you're using GuiQwt it has a curve fitting module. (You could probably also steal the code from SciPy to do this as well).
Here is a simple solution for dates:
from scipy.interpolate import make_interp_spline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as dates
from datetime import datetime
data = {
datetime(2016, 9, 26, 0, 0): 26060, datetime(2016, 9, 27, 0, 0): 23243,
datetime(2016, 9, 28, 0, 0): 22534, datetime(2016, 9, 29, 0, 0): 22841,
datetime(2016, 9, 30, 0, 0): 22441, datetime(2016, 10, 1, 0, 0): 23248
}
#create data
date_np = np.array(list(data.keys()))
value_np = np.array(list(data.values()))
date_num = dates.date2num(date_np)
# smooth
date_num_smooth = np.linspace(date_num.min(), date_num.max(), 100)
spl = make_interp_spline(date_num, value_np, k=3)
value_np_smooth = spl(date_num_smooth)
# print
plt.plot(date_np, value_np)
plt.plot(dates.num2date(date_num_smooth), value_np_smooth)
plt.show()
It's worth your time looking at seaborn for plotting smoothed lines.
The seaborn lmplot function will plot data and regression model fits.
The following illustrates both polynomial and lowess fits:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
T = np.array([6, 7, 8, 9, 10, 11, 12])
power = np.array([1.53E+03, 5.92E+02, 2.04E+02, 7.24E+01, 2.72E+01, 1.10E+01, 4.70E+00])
df = pd.DataFrame(data = {'T': T, 'power': power})
sns.lmplot(x='T', y='power', data=df, ci=None, order=4, truncate=False)
sns.lmplot(x='T', y='power', data=df, ci=None, lowess=True, truncate=False)
The order = 4 polynomial fit is overfitting this toy dataset. I don't show it here but order = 2 and order = 3 gave worse results.
The lowess = True fit is underfitting this tiny dataset but may give better results on larger datasets.
Check the seaborn regression tutorial for more examples.
Another way to go, which slightly modifies the function depending on the parameters you use:
from statsmodels.nonparametric.smoothers_lowess import lowess
def smoothing(x, y):
lowess_frac = 0.15 # size of data (%) for estimation =~ smoothing window
lowess_it = 0
x_smooth = x
y_smooth = lowess(y, x, is_sorted=False, frac=lowess_frac, it=lowess_it, return_sorted=False)
return x_smooth, y_smooth
That was better suited than other answers for my specific application case.

Resources