How do I rightly code linear regression with gradient descent in Python? - python-3.x

import pandas as pd
import matplotlib.pyplot as plt
# I'm trying to code the utter basic func of LinearRegression
# from sklearn.linear_model import LinearRegression
dataframe = pd.read_fwf('brain_body.txt') # link given below
x_values = dataframe[['Brain']]
y_values = dataframe[['Body']]
lr = LinearRegression(0.0001, 10) # sending learning_rate and iterations
lr.fit(x_values, y_values)
# commenting out because the values are insane
# plt.scatter(x_values, y_values)
# plt.plot(x_values, clf.predict(x_values))
# plt.show()
Link to brain_body.txt
Here's the class I've written
class LinearRegression:
def __init__(self, learning_rate, iterations):
self.b = 0 # b as in y=mx+b
self.m = 0 # m as in y=mx+b
self.learning_rate = learning_rate
self.iterations = iterations
def get_y(self, x):
return self.m * float(x) + self.b
def step_gradient(self, x_values, y_values):
print()
print("Values before: m =", self.m, " b =", self.b)
m_gradient = 0
b_gradient = 0
N = float(len(x_values.ix[:, 0]))
print('%11s' % "d(m)", '%11s' % "m_gradient", '%11s' % "d(b)", '%11s' % "b_gradient")
for i in range(int(N)):
x = x_values.iloc[i][0]
y = y_values.iloc[i][0]
# EDIT: I missed a * -1 here
# But that wouldn't just fix everything, adjusting learning rate does
pm = (y - self.get_y(x)) * x # partial derivative of m
pb = (y - self.get_y(x)) * -1 # partial derivative of b
m_gradient += pm * 2 / N
b_gradient += pb * 2 / N
print('%11s' % pm, '%11s' % m_gradient, '%11s' % pb, '%11s' % b_gradient)
self.m -= self.learning_rate * m_gradient # adjust current m
self.b -= self.learning_rate * b_gradient # adjust current b
print("Values after: m =", self.m, " b =", self.b)
print()
def fit(self, x_values, y_values): # equivalent to train_model
for i in range(self.iterations):
self.step_gradient(x_values, y_values)
return
def predict(self, x_values): # equivalent to get_output
predictions = []
for x in x_values.ix[:, 0]:
predictions.append(self.get_y(x))
return predictions
I watched Siraj Raval's How to do Linear Regression the right way and followed almost the same way he did. I did learn what partial derivatives and gradient descents are, but I do not what the values of partial derivatives be (or to guess them). And the numbers are going like crazy in the first iteration itself:
Values before: m = 0 b = 0
d(m) m_gradient d(b) b_gradient
150.6325 4.85911290323 -44.5 -1.43548387097
7.44 5.09911290323 -15.5 -1.93548387097
10.935 5.45185483871 -8.1 -2.19677419355
196695.0 6350.45185484 -423.0 -15.8419354839
4341.435 6490.49814516 -119.5 -19.6967741935
3180.9 6593.10782258 -115.0 -23.4064516129
1456.306 6640.08543548 -98.2 -26.5741935484
5.72 6640.26995161 -5.5 -26.7516129032
243.02 6648.10930645 -58.0 -28.6225806452
2.72 6648.19704839 -6.4 -28.8290322581
0.404 6648.21008065 -4.0 -28.9580645161
5.244 6648.37924194 -5.7 -29.1419354839
6.6 6648.59214516 -6.6 -29.3548387097
0.0007 6648.59216774 -0.14 -29.3593548387
0.06 6648.59410323 -1.0 -29.3916129032
37.8 6649.81345806 -10.8 -29.74
24.6 6650.60700645 -12.3 -30.1367741935
10.71 6650.95249032 -6.3 -30.34
11723841.0 384839.371845 -4603.0 -178.823870968
0.0069 384839.372068 -0.3 -178.833548387
78394.9 387368.23981 -419.0 -192.349677419
341255.0 398376.465616 -655.0 -213.478709677
2.7475 398376.554245 -3.5 -213.591612903
1150.0 398413.651019 -115.0 -217.301290323
84.48 398416.376181 -25.6 -218.127096774
1.0 398416.408439 -5.0 -218.288387097
24.675 398417.204406 -17.5 -218.852903226
359720.0 410021.075374 -680.0 -240.788387097
84042.0 412732.107632 -406.0 -253.88516129
27625.0 413623.236665 -325.0 -264.369032258
9.225 413623.534245 -12.3 -264.765806452
81840.0 416263.534245 -1320.0 -307.346451613
38007648.0 1642316.69554 -5712.0 -491.604516129
13.65 1642317.13586 -3.9 -491.730322581
1217.2 1642356.40037 -179.0 -497.504516129
1960.0 1642419.62618 -56.0 -499.310967742
68.85 1642421.84715 -17.0 -499.859354839
0.12 1642421.85102 -1.0 -499.891612903
0.0092 1642421.85132 -0.4 -499.904516129
0.0025 1642421.8514 -0.25 -499.912580645
17.5 1642422.41591 -12.5 -500.315806452
122500.0 1646374.02882 -490.0 -516.122258065
30.25 1646375.00462 -12.1 -516.512580645
9712.5 1646688.31107 -175.0 -522.157741935
15700.0 1647194.76269 -157.0 -527.222258065
22950.4 1647935.09817 -440.0 -541.415806452
1893.725 1647996.18607 -179.5 -547.206129032
1.32 1647996.22865 -2.4 -547.283548387
4860.0 1648153.00285 -81.0 -549.896451613
75.6 1648155.44156 -21.0 -550.573870968
168.0896 1648160.8638 -39.2 -551.838387097
0.532 1648160.88096 -1.9 -551.899677419
0.09 1648160.88387 -1.2 -551.938387097
0.366 1648160.89567 -3.0 -552.03516129
0.01584 1648160.89619 -0.33 -552.045806452
34560.0 1649275.73489 -180.0 -557.852258065
75.0 1649278.15425 -25.0 -558.658709677
27040.0 1650150.41231 -169.0 -564.110322581
2.34 1650150.4878 -2.6 -564.194193548
18.468 1650151.08354 -11.4 -564.561935484
0.26 1650151.09193 -2.5 -564.642580645
213.444 1650157.97722 -50.4 -566.268387097
Values after: m = -165.015797722 b = 0.0566268387097
Values after 10 iteration: m = -1.76899770934e+22 b = 4.21166966984e+18
How do I rightly do LinearRegression from scratch?

This might not be a true answer as it's using R (I could probably figure this out in python, but it would take me longer). I think your issue is in the size of your learning_rate. I'm taking this machine learning class at the moment and so I'm familiar with what you're doing and attempted to implement it myself. Here was my code:
library(ggplot2)
## create test data
data <- data.frame(x = 1:10, y = 1:10)
n <- nrow(data)
## initialize values
m <- 0
b <- 0
alpha <- 0.01
iters <- 100
results <- data.frame(i = 1:iters,
pm = 1:iters,
pb = 1:iters,
m = 1:iters,
b = 1:iters)
for (i in 1:iters) {
y_hat <- (m * data$x) + b
pm <- (1/n) * sum((y_hat - data$y) * data$x)
pb <- (1/n) * sum(y_hat - data$y)
m <- m - (alpha * pm)
b <- b - (alpha * pb)
## uncomment if you want; shows "animated" change
## p <- ggplot(data, aes(x = x, y = y)) + geom_point()
## p <- p + geom_abline(intercept = b, slope = m)
## print(p)
## this turned out to be key for looking at output
results[i, 2:5] <- c(pm, pb, m, b)
}
Now, note the end of results with a big alpha, 0.1:
> tail(results)
i pm pb m b
95 95 -2.864612e+45 -4.114745e+44 2.135518e+44 3.067470e+43
96 96 8.390457e+45 1.205210e+45 -6.254938e+44 -8.984628e+43
97 97 -2.457567e+46 -3.530062e+45 1.832073e+45 2.631600e+44
98 98 7.198218e+46 1.033956e+46 -5.366146e+45 -7.707961e+44
99 99 -2.108360e+47 -3.028460e+46 1.571745e+46 2.257664e+45
100 100 6.175391e+47 8.870365e+46 -4.603646e+46 -6.612702e+45
See how m and b are flip flopping? The learning rate alpha is so high that alpha * derivative are jumping over the minima! In the linked class this is shown in the gradient descent videos, but the concept is the same as this image I found:
Look at results using alpha = 0.01:
> tail(results)
i pm pb m b
95 95 -0.003483741 0.02425319 0.9834438 0.1152615
96 96 -0.003476426 0.02420226 0.9834785 0.1150195
97 97 -0.003469127 0.02415144 0.9835132 0.1147780
98 98 -0.003461842 0.02410073 0.9835478 0.1145370
99 99 -0.003454573 0.02405012 0.9835824 0.1142965
100 100 -0.003447319 0.02399962 0.9836169 0.1140565
It's slow, but we're honing in on m = 1 and b = 0 as expected. With your real data, I had a similar issue. The main code body is the same, with this replacing the data <- data.frame() line at the beginning:
data <- read.table(file = "https://raw.githubusercontent.com/llSourcell/linear_regression_demo/master/brain_body.txt",
header = T, sep = "", stringsAsFactors = F)
names(data) <- c("y", "x")
Everything else is the same, except that I played with alpha and iters. Here's what I found!
## your learning rate; diverging/flip-flopping
## alpha <- 0.0001
> tail(results)
i pm pb m b
95 95 -3.842565e+190 -1.167811e+187 3.801319e+186 1.155276e+183
96 96 3.541406e+192 1.076285e+189 -3.503393e+188 -1.064732e+185
97 97 -3.263851e+194 -9.919315e+190 3.228817e+190 9.812842e+186
98 98 3.008048e+196 9.141894e+192 -2.975760e+192 -9.043766e+188
99 99 -2.772294e+198 -8.425404e+194 2.742537e+194 8.334966e+190
100 100 2.555018e+200 7.765068e+196 -2.527592e+196 -7.681718e+192
## 1/10 as big; still diverging!
## alpha <- 0.00001
> tail(results)
i pm pb m b
95 95 -2.453089e+92 -7.455293e+88 2.189776e+87 6.655047e+83
96 96 2.040052e+93 6.200012e+89 -1.821074e+88 -5.534508e+84
97 97 -1.696559e+94 -5.156089e+90 1.514452e+89 4.602638e+85
98 98 1.410902e+95 4.287936e+91 -1.259457e+90 -3.827672e+86
99 99 -1.173342e+96 -3.565957e+92 1.047397e+91 3.183190e+87
100 100 9.757815e+96 2.965541e+93 -8.710418e+91 -2.647222e+88
## even smaller; that's better!
## alpha <- 0.000001
> tail(results)
i pm pb m b
95 95 -0.01579109 51.95899 0.8856351 -0.004667159
96 96 -0.01579107 51.95894 0.8856352 -0.004719118
97 97 -0.01579106 51.95889 0.8856352 -0.004771077
98 98 -0.01579104 51.95885 0.8856352 -0.004823036
99 99 -0.01579103 51.95880 0.8856352 -0.004874995
100 100 -0.01579102 51.95875 0.8856352 -0.004926953
With this final result, I plotted the results which look reasonable?
p <- ggplot(data, aes(x = x, y = y)) + geom_point()
p <- p + geom_abline(intercept = b, slope = m)
print(p)
So, to wrap up:
I didn't verify/check your python code
I did implement my understanding of gradient descent in R and try with a test to verify behavior
I re-tried this with your actual data to find it appears to work
thus, my recommendation would be to re-try your method with simplified data (sounds like you already might have) and then look at the initial steps with a very small learning rate to see if that fixes it. If not, there may still be something wrong with your math?
Hope that helps!

Related

Cannot optimize the bias parameter in linear regression

I am trying to train a very basic linear regression model to predict a linear equation Y = m*X + c
The Weight parameter is optimized to 5 but the Bias parameter is stuck at 0. Am I doing something wrong?
X = np.array(range(1,1000))
Y = 5 * X + 7
def forward(W, X ,b):
return W * X + b
def getcost(Y, y):
return np.sum((Y-y)**2) / 1000
def backward(W, b, X, Y, y, lr):
dW = -2 * np.dot((Y-y).T, X) / 1000
db = -2 * np.sum(Y-y) / 1000
W -= lr * dW
b -= lr * db
return W, b
W = 0.0
b = 0.0
for i in range(80):
y = forward(W, X ,b)
cost = getcost(Y, y)
W, b = backward(W, b, X, Y, y, lr=0.000001)
print(int(cost), W, b)
The range of X is too extensive since X and Y have a linear relationship the model can be trained on a small range of values. The learning rate is very small it will take much more time to converge since your input set is very big. If you really want to use the same data then You can normalize X.
X = np.array(range(1,30))
Y = 5 * X +7
# Normalize the X values
#X = (X - np.mean(X)) / np.std(X)
N = len(Y)
learning_rate = 0.001
# Initialize the model with the correct values for m and b
m, b = 0.0, 0.0
errors = []
for p in range(8000):
hyp = m * X + b
error = Y - hyp
m_gradient = -(2/N) * np.sum(X * error)
b_gradient = -(2/N) * np.sum(error)
m = m - learning_rate * m_gradient
b = b - learning_rate * b_gradient
errors.append(np.mean(error ** 2))
if p%400==0:
print(f'm={m} b={b} ' )
# prediction for x = 231 , y should be 5*200+7 = 1007
print( m*200+b)
plt.plot(errors)
#
plt.xlabel('Iteration')
plt.ylabel('Error')
plt.show()
I agree with #Ahsan Nawaz
The only changes I made to your code are -
Scaled your features (for otherwise, increasing the learning_rate gave NANs)
Increased the learning rate
Increased the number of epochs
Here is your code modified -
import numpy as np
from sklearn.preprocessing import StandardScaler
X = np.array(range(1,1000))
scaler = StandardScaler()
scaler.fit(X.reshape(-1,1))
X = scaler.transform(X.reshape(-1,1)).reshape(-1)
Y = 5 * X + 7
def forward(W, X ,b):
return W * X + b
def getcost(Y, y):
return np.sum((Y-y)**2) / 1000
def backward(W, b, X, Y, y, lr):
dW = -2 * np.dot((Y-y).T, X) / 1000
db = -2 * np.sum(Y-y) / 1000
W -= lr * dW
b -= lr * db
return W, b
W = 0.0
b = 0.0
for i in range(8000):
y = forward(W, X ,b)
cost = getcost(Y, y)
W, b = backward(W, b, X, Y, y, lr=0.001)
print(int(cost), W, b)
Here is the final output -
0 4.999999437318114 6.999999212245364

Where is my code hanging (in an infinite loop)?

I am new to Python and trying to get this script to run, but it seems to be hanging in an infinite loop. When I use ctrl+c to stop it, it is always on line 103.
vs = 20.05 * np.sqrt(Tb + Lb * (y - y0)) # m/s speed of sound as a function of temperature
I am used to MatLab (from school) and the editor it has. I ran into issues earlier with the encoding for this code. Any suggestions on a (free) editor? I am currently using JEdit and/or Notepad.
Here is the full script:
#!/usr/bin/env python
# -*- coding: ANSI -*-
import numpy as np
from math import *
from astropy.table import Table
import matplotlib.pyplot as plt
from hanging_threads import start_monitoring#test for code hanging
start_monitoring(seconds_frozen=10, test_interval=100)
"""Initial Conditions and Inputs"""
d = 154.71/1000 # diameter of bullet (in meters)
m = 46.7 # mass of bullet ( in kg)
K3 = 0.87*0.3735 # drag coefficient at supersonic speed
Cd1 = 0.87*0.108 #drag coefficient at subsonic speed
v0 = 802 # muzzle velocity in m/sec
dt = 0.01 # timestep in seconds
"""coriolis inputs"""
L = 90*np.pi/180 # radians - latitude of firing site
AZ = 90*np.pi/180 # radians - azimuth angle of fire measured clockwise from North
omega = 0.0000727 #rad/s rotation of the earth
"""wind inputs"""
wx = 0 # m/s
wz = 0 # m/s
"""initializing variables"""
vx = 0 #initial x velocity
vy = 0 #initial y velocity
vy0 = 0
y_max = 0 #apogee
v = 0
t = 0
x = 0
"""Variable Atmospheric Pressure"""
rho0 = 1.2041 # density of air at sea-level (kg/m^3)
T = 20 #temperature at sea level in celcius
Tb = T + 273.15 # temperature at sea level in Kelvin
Lb = -2/304.8 # temperature lapse rate in K/m (-2degrees/1000ft)- not valid above 36000ft
y = 0 # current altitude
y0 = 0 # initial altitude
g = 9.81 # acceleration due to gravity in m/s/s
M = 0.0289644 #kg/mol # molar mass of air
R = 8.3144598 # J/molK - universal gas constant
# air density as a function of altitude and temperature
rho = rho0 * ((Tb/(Tb+Lb*(y-y0)))**(1+(g*M/(R*Lb))))
"""Variable Speed of Sound"""
vs = 20.05*np.sqrt(Tb +Lb*(y-y0)) # m/s speed of sound as a function of temperature
Area = pi*(d/2)**2 # computing the reference area
phi_incr = 5 #phi0 increment (degrees)
N = 12 # length of table
"""Range table"""
dtype = [('phi0', 'f8'), ('phi_impact', 'f8'), ('x', 'f8'), ('z', 'f8'),('y', 'f8'), ('vx', 'f8'), ('vz', 'f8'), ('vy', 'f8'), ('v', 'f8'),('M', 'f8'), ('t', 'f8')]
table = Table(data=np.zeros(N, dtype=dtype))
"""Calculates entire trajectory for each specified angle"""
for i in range(N):
phi0 = (i + 1) * phi_incr
"""list of initial variables used in while loop"""
t = 0
y = 0
y_max = y
x = 0
z = 0
vx = v0*np.cos(radians(phi0))
vy = v0*np.sin(radians(phi0))
vx_w = 0
vz_w = 0
vz = 0
v = v0
ay = 0
ax = 0
wx = wx
wz = wz
rho = rho0 * ((Tb / (Tb + Lb * (y - y0))) ** (1 + (g * M / (R * Lb))))
vs = 20.05 * np.sqrt(Tb + Lb * (y - y0)) # m/s speed of sound as a function of temperature
ax_c = -2 * omega * ((vz * sin(L)) + vy * cos(L) * sin(AZ))
ay_c = 2 * omega * ((vz * cos(L) * cos(AZ)) + vx_w * cos(L) * sin(AZ))
az_c = -2 * omega * ((vy * cos(L) * cos(AZ)) - vx_w * sin(L))
Mach = v/vs
""" initializing variables for plots"""
t_list = [t]
x_list = [x]
y_list = [y]
vy_list = [vy]
v_list = [v]
phi0_list = [phi0]
Mach_list = [Mach]
while y >= 0:
phi0 = phi0
"""drag calculation with variable density, Temp and sound speed"""
rho = rho0 * ((Tb / (Tb + Lb * (y - y0))) ** (1 + (g * M / (R *Lb))))
vs = 20.05 * np.sqrt(Tb + Lb * (y - y0)) # m/s speed of sound as a function of temperature
Cd3 = K3 / sqrt(v / vs)
Mach = v/vs
"""Determining drag regime"""
if v > 1.2 * vs: #supersonic
Cd = Cd3
elif v < 0.8 * vs: #subsonic
Cd = Cd1
else: #transonic
Cd = ((Cd3 - Cd1)*(v/vs - 0.8)/(0.4)) + Cd1
"""Acceleration due to Coriolis"""
ax_c = -2*omega*((vz_w*sin(L))+ vy*cos(L)*sin(AZ))
ay_c = 2*omega*((vz_w*cos(L)*cos(AZ))+ vx_w*cos(L)*sin(AZ))
az_c = -2*omega*((vy*cos(L)*cos(AZ))- vx_w*sin(L))
"""Total acceleration calcs"""
if vx > 0:
ax = -0.5*rho*((vx-wx)**2)*Cd*Area/m + ax_c
else:
ax = 0
""" Vy before and after peak"""
if vy > 0:
ay = (-0.5 * rho * (vy ** 2) * Cd * Area / m) - g + ay_c
else:
ay = (0.5 * rho * (vy ** 2) * Cd * Area / m) - g + ay_c
az = az_c
vx = vx + ax*dt # vx without wind
# vx_w = vx with drag and no wind + wind
vx_w = vx + 2*wx*(1-(vx/v0*np.cos(radians(phi0))))
vy = vy + ay*dt
vz = vz + az*dt
vz_w = vz + wz*(1-(vx/v0*np.cos(radians(phi0))))
"""projectile velocity"""
v = sqrt(vx_w**2 + vy**2 + vz**2)
"""new x, y, z positions"""
x = x + vx_w*dt
y = y + vy*dt
z = z + vz_w*dt
if y_max <= y:
y_max = y
phi_impact = degrees(atan(vy/vx)) #impact angle in degrees
""" appends selected data for ability to plot"""
t_list.append(t)
x_list.append(x)
y_list.append(y)
vy_list.append(vy)
v_list.append(v)
phi0_list.append(phi0)
Mach_list.append(Mach)
if y < 0:
break
t += dt
"""Range table output"""
table[i] = ('%.f' % phi0, '%.3f' % phi_impact, '%.1f' % x,'%.2f' % z, '%.1f' % y_max, '%.1f' % vx_w,'%.1f' % vz,'%.1f' % vy,'%.1f' % v,'%.2f' %Mach, '%.1f' % t)
""" Plot"""
plt.plot(x_list, y_list, label='%d°' % phi0)#plt.plot(x_list, y_list, label='%d°' % phi0)
plt.title('Altitude versus Range')
plt.ylabel('Altitude (m)')
plt.xlabel('Range (m)')
plt.axis([0, 30000, 0, 15000])
plt.grid(True)
print(table)
legend = plt.legend(title="Firing Angle",loc=0, fontsize='small', fancybox=True)
plt.show()
Thank you in advance
Which Editor Should I Use?
Personally, I prefer VSCode, but Sublime is also pretty popular. If you really want to go barebones, try Vim. All three are completely free.
Code Errors
After scanning your code snippet, it appears that you are caught in an infinite loop, which you enter with the statement while y >= 0. The reason you always get line 103 when you hit Ctrl+C is likely because that takes the longest, making it more likely to land there at any given time.
Note that currently, you can only escape your while loop through this branch:
if y_max <= y:
y_max= y
phi_impact = degrees(atan(vy/vx)) #impact angle in degrees
""" appends selected data for ability to plot"""
t_list.append(t)
x_list.append(x)
y_list.append(y)
vy_list.append(vy)
v_list.append(v)
phi0_list.append(phi0)
Mach_list.append(Mach)
if y < 0:
break
t += dt
This means that if ymax never drops below y, or y never drops below zero, then you will infinitely loop. Granted, I haven't looked at your code in any great depth, but from the surface it appears that y_max is never decremented (meaning it will always be at least equal to y). Furthermore, y is only updated when you do y = y + vy*dt, which will only ever increase y if vy >= 0 (I assume dt is always positive).
Debugging
As #Giacomo Catenazzi suggested, try printing out y and y_max at the top of the while loop and see how they change as your code runs. I suspect they are not decrementing like you expected.

Sympy TypeError - cannot interpret float object as Integer

i am using the Sympy package and trying to integrate, for this i have written a small function:
from sympy import *
from __future__ import division
init_printing()
x, Rn = symbols('x Rn')
def minimum(k):
expr = (x**(k/2-1)*exp(-x/2))/(2**(k/2)*gamma(k/2))
linkes_int = integrate(x*expr,(x,0,Rn))
rechtes_int = integrate(Rn*expr,(x,Rn,oo))
minimum1 = linkes_int + rechtes_int
return(minimum1,linkes_int,rechtes_int)
ergebnis = minimum(6)
Now when calling the method with an even parameter of 6 or higher i get a TypeError stating that a float object cannot be interpreted as an Integer. I am not sure why this is the case especially since it´s only for k>=6.
Another question i have is, is there a possibility to give boundaries to symbols in Sympy ? Something like it has to be greater than zero or and Integer or something?
My error is:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-4-1a7d2848e620> in <module>()
7 return(minimum1,linkes_int,rechtes_int)
8 k = Symbol("k", properties = [lambda s: (s/2).is_Integer])
----> 9 ergebnis = minimum(6)
10 #ergebnis[0]
11 asd = simplify(ergebnis[0])
<ipython-input-4-1a7d2848e620> in minimum(k)
2 def minimum(k):
3 expr = (x**(k/2-1)*exp(-x/2))/(2**(k/2)*gamma(k/2))
----> 4 linkes_int = integrate(x*expr,(x,1,Rn))
5 rechtes_int = integrate(Rn*expr,(x,Rn,oo))
6 minimum1 = linkes_int + rechtes_int
C:\ProgramData\Anaconda3\lib\site-packages\sympy\integrals\integrals.py in integrate(*args, **kwargs)
1293 if isinstance(integral, Integral):
1294 return integral.doit(deep=False, meijerg=meijerg, conds=conds,
-> 1295 risch=risch, manual=manual)
1296 else:
1297 return integral
C:\ProgramData\Anaconda3\lib\site-packages\sympy\integrals\integrals.py in doit(self, **hints)
484 function, xab[0],
485 meijerg=meijerg1, risch=risch, manual=manual,
--> 486 conds=conds)
487 if antideriv is None and meijerg1 is True:
488 ret = try_meijerg(function, xab)
C:\ProgramData\Anaconda3\lib\site-packages\sympy\integrals\integrals.py in _eval_integral(self, f, x, meijerg, risch, manual, conds)
906 # rewrite using G functions
907 try:
--> 908 h = meijerint_indefinite(g, x)
909 except NotImplementedError:
910 from sympy.integrals.meijerint import _debug
C:\ProgramData\Anaconda3\lib\site-packages\sympy\integrals\meijerint.py in meijerint_indefinite(f, x)
1610 results = []
1611 for a in sorted(_find_splitting_points(f, x) | {S(0)}, key=default_sort_key):
-> 1612 res = _meijerint_indefinite_1(f.subs(x, x + a), x)
1613 if not res:
1614 continue
C:\ProgramData\Anaconda3\lib\site-packages\sympy\integrals\meijerint.py in _meijerint_indefinite_1(f, x)
1675 if b < 0 or f.subs(x, 0).has(nan, zoo):
1676 place = None
-> 1677 r = hyperexpand(r.subs(t, a*x**b), place=place)
1678
1679 # now substitute back
C:\ProgramData\Anaconda3\lib\site-packages\sympy\simplify\hyperexpand.py in hyperexpand(f, allow_hyper, rewrite, place)
2471 if not r.has(nan, zoo, oo, -oo):
2472 return r
-> 2473 return f.replace(hyper, do_replace).replace(meijerg, do_meijer)
C:\ProgramData\Anaconda3\lib\site-packages\sympy\core\basic.py in replace(self, query, value, map, simultaneous, exact)
1406 return expr
1407
-> 1408 rv = bottom_up(self, rec_replace, atoms=True)
1409
1410 # restore original expressions for Dummy symbols
C:\ProgramData\Anaconda3\lib\site-packages\sympy\simplify\simplify.py in bottom_up(rv, F, atoms, nonbasic)
997 if args != rv.args:
998 rv = rv.func(*args)
--> 999 rv = F(rv)
1000 elif atoms:
1001 rv = F(rv)
C:\ProgramData\Anaconda3\lib\site-packages\sympy\core\basic.py in rec_replace(expr)
1391 result = _query(expr)
1392 if result or result == {}:
-> 1393 new = _value(expr, result)
1394 if new is not None and new != expr:
1395 mapping[expr] = new
C:\ProgramData\Anaconda3\lib\site-packages\sympy\core\basic.py in <lambda>(expr, result)
1334 _value = lambda expr, result: value(*expr.args)
1335 elif callable(value):
-> 1336 _value = lambda expr, result: value(*expr.args)
1337 else:
1338 raise TypeError(
C:\ProgramData\Anaconda3\lib\site-packages\sympy\simplify\hyperexpand.py in do_meijer(ap, bq, z)
2468 def do_meijer(ap, bq, z):
2469 r = _meijergexpand(G_Function(ap[0], ap[1], bq[0], bq[1]), z,
-> 2470 allow_hyper, rewrite=rewrite, place=place)
2471 if not r.has(nan, zoo, oo, -oo):
2472 return r
C:\ProgramData\Anaconda3\lib\site-packages\sympy\simplify\hyperexpand.py in _meijergexpand(func, z0, allow_hyper, rewrite, place)
2345
2346 t = Dummy('t')
-> 2347 slater1, cond1 = do_slater(func.an, func.bm, func.ap, func.bq, z, z0)
2348
2349 def tr(l):
C:\ProgramData\Anaconda3\lib\site-packages\sympy\simplify\hyperexpand.py in do_slater(an, bm, ap, bq, z, zfinal)
2279 premult = (t/k)**bh
2280 hyp = _hyperexpand(Hyper_Function(nap, nbq), harg, ops,
-> 2281 t, premult, bh, rewrite=None)
2282 res += fac * hyp
2283 else:
C:\ProgramData\Anaconda3\lib\site-packages\sympy\simplify\hyperexpand.py in _hyperexpand(func, z, ops0, z0, premult, prem, rewrite)
2002 # Try to recognise a shifted sum.
2003 p = S(0)
-> 2004 res = try_shifted_sum(func, z0)
2005 if res is not None:
2006 func, nops, p = res
C:\ProgramData\Anaconda3\lib\site-packages\sympy\simplify\hyperexpand.py in try_shifted_sum(func, z)
1656
1657 ops = []
-> 1658 for n in range(r - 1):
1659 ops.append(ShiftA(n + 1))
1660 ops.reverse()
TypeError: 'Float' object cannot be interpreted as an integer
The problem is this integral:
In [59]: Integral(0.0625*x**3.0*exp(-x/2), (x, 0, Rn))
Out[59]:
Rn
⌠
⎮ -x
⎮ ───
⎮ 3.0 2
⎮ 0.0625⋅x ⋅ℯ dx
⌡
0
It should be fine but sympy gives a TypeError. That's just a bug in sympy:
In [60]: Integral(0.0625*x**3.0*exp(-x/2), (x, 0, Rn)).doit()
---------------------------------------------------------------------------
TypeError
However the bug is only seen when there are floats in the input and there's no need for these floats:
In [61]: nsimplify(Integral(0.0625*x**3.0*exp(-x/2), (x, 0, Rn)))
Out[61]:
Rn
⌠
⎮ -x
⎮ ───
⎮ 3 2
⎮ x ⋅ℯ
⎮ ─────── dx
⎮ 16
⌡
0
In [62]: nsimplify(Integral(0.0625*x**3.0*exp(-x/2), (x, 0, Rn))).doit()
Out[62]:
-Rn
────
⎛ 3 2 ⎞ 2
⎝- Rn - 6⋅Rn - 24⋅Rn - 48⎠⋅ℯ
────────────────────────────────── + 6
8
The simplest solution is to sympify the integer 6: ergebnis = minimum(S(6))

Manually implementing approximation functions

I have a dataset from kaggle of 45,253 rows and a single column for temperature in Kelvin for the city of Detroit. It's mean = 282.97, std = 11, min = 243.48, max = 308.05.
This is the result when plotted as a histogram of 100 bins with density=True:
I am expected to write the following two functions and see whichever one approximates the closest to the histogram:
Like this one here using scipy.stats.norm.pdf:
I generated the above image using:
x = np.linspace(dataset.Detroit.min(), dataset.Detroit.max(), 1001)
P_norm = norm.pdf(x, dataset.Detroit.mean(), dataset.Detroit.std())
plot_pdf_single(x, P_norm)
However, whenever I try to implement any of the two approximation functions all of my values for P_norm result in 0s or infs.
This is what I tried:
P_norm = [(1.0/(np.sqrt(2.0*pi*(std*std))))*np.exp(((-x_i-mu)*(-x_i-mu))/(2.0*(std*std))) for x_i in x]
I also broke it down into parts for a single x_i:
part1 = ((-x[0] - mu)*(-x[0] - mu)) / (2.0*(std * std))
part2 = np.exp(part1)
part3 = 1.0 / (np.sqrt(2.0 * pi * (std*std)))
total = part3*part2
I got the following values:
1145.3913234604413
inf
0.036267480036493875
inf
Since both of the equations use the same formula:
def pdf_approximation(x_i, mu, std):
return (1.0 / (np.sqrt(2.0 * pi * (std*std)))) * np.exp((-(x_i-mu)*(x_i-mu)) / (2.0 * (std*std)))
The code for the first approximation is:
mu = 283
std = 11
P_norm = np.array([pdf_approximation(x_i, mu, std) for x_i in x])
plot_pdf_single(x, P_norm)
The code for the second approximation is:
mu1 = 276
std1 = 6
mu2 = 293
std2 = 6.5
P_norm = np.array([(pdf_approximation(x_i, mu1, std1) * 0.5) + (pdf_approximation(x_i, mu2, std2) * 0.5) for x_i in x])
plot_pdf_single(x, P_norm)

How to use visibility_graph to generate network?

I will then learn to convert my time series data into a network map using visibility_graph. But when I typed in my data, I didn't get a very obvious network, and most of them just presented a ring.
https://github.com/rgarcia-herrera/visibility_graph
Below is my data example
[ 8.34 3.24 9.82 2.09 6.43 2.88 6.51 6.47 12.41 6.52 5.65 6.13
5.28 6.87 13.22 7.05 13.65 5.7 16.88 3.43 15.81 4.87 9.74 4.43
18.77 8.24 16.2 10.58 18.31 10.4 12.33 8.21 22.74 5.67 19.18 8.55
16.9 10.22 21.68 8.61 17.81 11.4 27.51 11.19 25.78 8.31 29.87 6.35
24.14 10.36 20.13 12.01 25.47 6.66 14.09 10.72 23.52 7.11 24.88 9.75
22.6 7.24]
Below is the code I tried (I tried smoothing)
from visibility_graph import visibility_graph
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import scipy.ndimage
for user, group in data.groupby('cons_no'):
for i in range(12): # every month
u = pd.DataFrame(group)
u.sort_index(inplace=True) # Time sorting
values = []
for row in u.itertuples():
if row.Index.year == 2017 and row.Index.month == i + 1:
values.append((round(row.pap_r1, 1), round(row.pap_r2, 1)))
temp = []
for v in values:
temp.append(v[0])
temp.append(v[1])
temp = np.array(temp)
# Min = np.min(temp)
# Max = np.max(temp)
# temp = [max_min(x, Max, Min) for x in temp]
temp = sp.ndimage.gaussian_filter(temp, sigma=1, mode='constant')
print(temp)
temp = [round(x, 1) for x in temp]
temp = np.log10(temp)
values = temp
# print(values)
G = visibility_graph(values)
plt.subplot(121)
nx.draw_networkx(G, with_labels=False, node_size=50)
plt.title(str(user))
plt.savefig('./user_' + str(user) + '_com.png')
print('./user_' + str(user) + '_com.png')
# plt.show()
I hope someone can help me understand how to properly modify my data so that visibility_graph appears on the network.
Answer my own question.
I think there is a bug in this package.
So I modified the source code to make it work properly.
from itertools import combinations
import networkx as nx
def visibility_graph(series):
g = nx.Graph()
# convert list of magnitudes into list of tuples that hold the index
tseries = []
n = 0
for magnitude in series:
tseries.append( (n, magnitude ) )
n += 1
# contiguous time points always have visibility
for n in range(0,len(tseries)-1):
(ta, ya) = tseries[n]
(tb, yb) = tseries[n+1]
g.add_node(ta, mag=ya)
g.add_node(tb, mag=yb)
g.add_edge(ta, tb)
for a,b in combinations(tseries, 2):
# two points, maybe connect
(ta, ya) = a
(tb, yb) = b
connect = True
# let's see all other points in the series
for tc, yc in tseries:
# other points, not a or b
if tc != ta and tc != tb and ta < tc < tb: # The condition here is the key.
# does c obstruct?
if yc > yb + (ya - yb) * ( (tb - tc) / (tb - ta) ):
connect = False
if connect:
g.add_edge(ta, tb)
return g
Modified renderings.

Resources