I am doing a molecular dynamics simulation. It consists of numerical integration, many for loops, manipulating large NumPy arrays. I have tried to use NumPy function and arrays wherever possible. But the code is still too slow. I thought of using numba jit as a speedup. But it always throws an error message.
Here is the code.
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020
#author: Sandipan
"""
import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys
# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM =3
N =500
density =0.776
Rcutoff =3
#----------------------Function Definitions---------------------
#------------------Initialise Configuration--------
#jit(nopython=True)
def initialise_config(N,DIM,density):
velocity = (np.random.randn(N,DIM)-0.5)
# Set initial momentum to zero
COM_V = np.sum(velocity)/N #Center of mass velocity
velocity = velocity - COM_V # Fix any center-of-mass drift
# Calculate initial kinetic energy
k_energy=0
for i in range (N):
k_energy+=np.dot(velocity[i],velocity[i])
vscale=np.sqrt(DIM*temp/k_energy)
velocity*=vscale
#Initialize with zeroes
coords = np.zeros([N,DIM]);
# Get the cooresponding box size
L = (N/density)**(1.0/DIM)
""" Find the lowest perfect cube greater than or equal to the number of
particles"""
nCube = 2
while (nCube**3 < N):
nCube = nCube + 1
# Assign particle positions
ip=-1
x=0
y=0
z=0
for i in range(0,nCube):
for j in range(0,nCube):
for k in range(0,nCube):
if(ip<N):
x=(i+0.5)*(L/nCube)
y=(j+0.5)*(L/nCube)
z=(k+0.5)*(L/nCube)
coords[ip]=np.array([x,y,z])
ip=ip+1
else:
break
return coords,velocity,L
#jit(nopython=True)
def wrap(pos,L):
'''Apply perodic boundary conditions.'''
for i in range (len(pos)):
for k in range(DIM):
if (pos[i][k]>0.5):
pos[i][k]=pos[i][k]-1
if (pos[i][k]<-0.5):
pos[i][k]=pos[i][k]+1
return (pos)
#jit(nopython=True)
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
# Compute forces on positions using the Lennard-Jones potential
# Uses double nested loop which is slow O(N^2) time unsuitable for large systems
Sij = np.zeros(DIM) # Box scaled units
Rij = np.zeros(DIM) # Real space units
#Set all variables to zero
ene_pot = np.zeros(N)
acc = acc*0
virial=0.0
# Loop over all pairs of particles
for i in range(N-1):
for j in range(i+1,N): #i+1 to N ensures we do not double count
Sij = pos[i]-pos[j] # Distance in box scaled units
for l in range(DIM): # Periodic interactions
if (np.abs(Sij[l])>0.5):
Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5 (scaled units) then subtract 0.5 to find periodic interaction distance.
Rij = L*Sij # Scale the box to the real units in this case reduced LJ units
Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance
if(Rsqij < Rcutoff**2):
# Calculate LJ potential inside cutoff
# We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
rm2 = 1.0/Rsqij # 1/r^2
rm6 = rm2**3
forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
phi =4*(rm6**2-rm6)
ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy
ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy
virial = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
acc[i] = acc[i]+forcefact*Sij # Accumulate forces
acc[j] = acc[j]-forcefact*Sij # (Fji=-Fij)
return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient
#jit(nopython=True)
def Calculate_Temperature(vel,L,DIM,N):
ene_kin = 0.0
for i in range(N):
real_vel = L*vel[i]
ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)
ene_kin_aver = 1.0*ene_kin/N
temperature = 2.0*ene_kin_aver/DIM
return ene_kin_aver,temperature
# Main MD loop
#jit(nopython=True)
def main():
# Vectors to store parameter values at each step
ene_kin_aver = np.ones(NSteps)
ene_pot_aver = np.ones(NSteps)
temperature = np.ones(NSteps)
virial = np.ones(NSteps)
pressure = np.ones(NSteps)
pos,vel,L = initialise_config(N,DIM,density)
acc = (np.random.randn(N,DIM)-0.5)
volume=L**3
# Open file which we will save the outputs to
if os.path.exists('energy2'):
os.remove('energy2')
f = open('traj.xyz', 'w')
for k in range(NSteps):
# Refold positions according to periodic boundary conditions
pos=wrap(pos,L)
# r(t+dt) modify positions according to velocity and acceleration
pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Rescale velocities and take half step
chi = np.sqrt(temp/temperature[k])
vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2
# Compute forces a(t+dt),ene_pot,virial
acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3
# Complete the velocity step
vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Calculate pressure
pressure[k]= density*temperature[k] + virial[k]/volume
# Print output to file every DumpFreq number of steps
if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values
f.write("%s\n" %(N)) # Write the number of particles to file
# Write all of the quantities at this step to the file
f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
for n in range(N): # Write the positions to file
f.write("X"+" ")
for l in range(DIM):
f.write(str(pos[n][l]*L)+" ")
f.write("\n")
if (k%5==0):
# print("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
sys.stdout.write("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
sys.stdout.flush()
return ene_kin_aver, ene_pot_aver, temperature, pressure, pos
#------------------------------------------------------
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()
# Plot all of the quantities
def plot():
plt.figure(figsize=[7,12])
plt.rc('xtick', labelsize=15)
plt.rc('ytick', labelsize=15)
plt.subplot(4, 1, 1)
plt.plot(ene_kin_aver,'k-')
plt.ylabel(r"$E_{K}", fontsize=20)
plt.subplot(4, 1, 2)
plt.plot(ene_pot_aver,'k-')
plt.ylabel(r"$E_{P}$", fontsize=20)
plt.subplot(4, 1, 3)
plt.plot(temperature,'k-')
plt.ylabel(r"$T$", fontsize=20)
plt.subplot(4, 1, 4)
plt.plot(pressure,'k-')
plt.ylabel(r"$P$", fontsize=20)
plt.show()
plot()
The error I am getting is:
runfile('E:/Project/LJMD4.py', wdir='E:/Project')
Traceback (most recent call last):
File "<ipython-input-8-aeebce887079>", line 1, in <module>
runfile('E:/Project/LJMD4.py', wdir='E:/Project')
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "E:/Project/LJMD4.py", line 226, in <module>
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main()
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 351, in _compile_for_args
error_rewrite(e, 'typing')
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\dispatcher.py", line 318, in error_rewrite
reraise(type(e), e, None)
File "C:\Users\Sandipan\Anaconda3\lib\site-packages\numba\six.py", line 658, in reraise
raise value.with_traceback(tb)
TypingError: cannot determine Numba type of <class 'builtin_function_or_method'>
When I searched on the internet, I found numba may not support some function I am using. But I am not using any Pandas or other data frame. I am just using pure python loop or NumPy which as far numba documentation suggests are well supported. I have tried removing numba from some functions and making nopython=0 but still, it sends different error messages. I can't figure out what is wrong with it. Without numba the code will not be feasible for actual use. Any further tips on speedup will be of great help.
Thank you in advance.
A few common errors
Use of unsupported functions
file operations, many string operation. These can be in a objmode block.
In this example I commented these things out.
Wrong way of initializing arrays
Only tuples are supported, not lists (Numpy accepts both but the documentation there are only tuples mentioned)
Checking for division by zero and throwing an exception
This is the standard behavior of Python, but not Numpy. If you want to avoid this overhead (if/else on every division) turn on the Numpy default behaviour (error_model="numpy").
Use of globals
Globals are hard coded into the compiled code (as you would directly write them into the code). They cannot be changed without recompilation.
Wrong indexing of Numpy arrays
pos[i][k] instead of pos[i,k]. Numba may optimize this away, but this has a quite noticeable negative impact in Pure Python code.
Working version
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 28 12:10:42 2020
#author: Sandipan
"""
import numpy as np
import matplotlib.pyplot as plt
from numba import jit
import os
import sys
# All globals are compile time constants
# recompilation needed if you change this values
# Better way: hand a tuple of all needed vars to the functions
# params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)
# Setting up the simulation
NSteps =100 # Number of steps
deltat = 0.005 # Time step in reduced time units
temp = 0.851# #Reduced temperature
DumpFreq = 100 # Save the position to file every DumpFreq steps
epsilon = 1.0 # LJ parameter for the energy between particles
DIM =3
N =500
density =0.776
Rcutoff =3
params=(NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff)
#----------------------Function Definitions---------------------
#------------------Initialise Configuration--------
#error_model=True
#Do you really want to search for division by zeros (additional cost)?
#jit(nopython=True,error_model="numpy")
def initialise_config(N,DIM,density):
velocity = (np.random.randn(N,DIM)-0.5)
# Set initial momentum to zero
COM_V = np.sum(velocity)/N #Center of mass velocity
velocity = velocity - COM_V # Fix any center-of-mass drift
# Calculate initial kinetic energy
k_energy=0
for i in range (N):
k_energy+=np.dot(velocity[i],velocity[i])
vscale=np.sqrt(DIM*temp/k_energy)
velocity*=vscale
#wrong array initialization (use tuple)
#Initialize with zeroes
coords = np.zeros((N,DIM))
# Get the cooresponding box size
L = (N/density)**(1.0/DIM)
""" Find the lowest perfect cube greater than or equal to the number of
particles"""
nCube = 2
while (nCube**3 < N):
nCube = nCube + 1
# Assign particle positions
ip=-1
x=0
y=0
z=0
for i in range(0,nCube):
for j in range(0,nCube):
for k in range(0,nCube):
if(ip<N):
x=(i+0.5)*(L/nCube)
y=(j+0.5)*(L/nCube)
z=(k+0.5)*(L/nCube)
coords[ip]=np.array([x,y,z])
ip=ip+1
else:
break
return coords,velocity,L
#jit(nopython=True)
def wrap(pos,L):
'''Apply perodic boundary conditions.'''
#correct array indexing
for i in range (len(pos)):
for k in range(DIM):
if (pos[i,k]>0.5):
pos[i,k]=pos[i,k]-1
if (pos[i,k]<-0.5):
pos[i,k]=pos[i,k]+1
return (pos)
#jit(nopython=True,error_model="numpy")
def LJ_Forces(pos,acc,epsilon,L,DIM,N):
# Compute forces on positions using the Lennard-Jones potential
# Uses double nested loop which is slow O(N^2) time unsuitable for large systems
Sij = np.zeros(DIM) # Box scaled units
Rij = np.zeros(DIM) # Real space units
#Set all variables to zero
ene_pot = np.zeros(N)
acc = acc*0
virial=0.0
# Loop over all pairs of particles
for i in range(N-1):
for j in range(i+1,N): #i+1 to N ensures we do not double count
Sij = pos[i]-pos[j] # Distance in box scaled units
for l in range(DIM): # Periodic interactions
if (np.abs(Sij[l])>0.5):
Sij[l] = Sij[l] - np.copysign(1.0,Sij[l]) # If distance is greater than 0.5 (scaled units) then subtract 0.5 to find periodic interaction distance.
Rij = L*Sij # Scale the box to the real units in this case reduced LJ units
Rsqij = np.dot(Rij,Rij) # Calculate the square of the distance
if(Rsqij < Rcutoff**2):
# Calculate LJ potential inside cutoff
# We calculate parts of the LJ potential at a time to improve the efficieny of the computation (most important for compiled code)
rm2 = 1.0/Rsqij # 1/r^2
rm6 = rm2**3
forcefact=(rm2**4)*(rm6-0.5) # 1/r^6
phi =4*(rm6**2-rm6)
ene_pot[i] = ene_pot[i]+0.5*phi # Accumulate energy
ene_pot[j] = ene_pot[j]+0.5*phi # Accumulate energy
virial = virial-forcefact*Rsqij # Virial is needed to calculate the pressure
acc[i] = acc[i]+forcefact*Sij # Accumulate forces
acc[j] = acc[j]-forcefact*Sij # (Fji=-Fij)
#If you want to get get the best performance, sum directly in the loop intead of
#summing at the end np.sum(ene_pot)
return 48*acc, np.sum(ene_pot)/N, -virial/DIM # return the acceleration vector, potential energy and virial coefficient
#jit(nopython=True,error_model="numpy")
def Calculate_Temperature(vel,L,DIM,N):
ene_kin = 0.0
for i in range(N):
real_vel = L*vel[i]
ene_kin = ene_kin + 0.5*np.dot(real_vel,real_vel)
ene_kin_aver = 1.0*ene_kin/N
temperature = 2.0*ene_kin_aver/DIM
return ene_kin_aver,temperature
# Main MD loop
#jit(nopython=True,error_model="numpy")
def main(params):
NSteps,deltat,temp,DumpFreq,epsilon,DIM,N,density,Rcutoff=params
# Vectors to store parameter values at each step
ene_kin_aver = np.ones(NSteps)
ene_pot_aver = np.ones(NSteps)
temperature = np.ones(NSteps)
virial = np.ones(NSteps)
pressure = np.ones(NSteps)
pos,vel,L = initialise_config(N,DIM,density)
acc = (np.random.randn(N,DIM)-0.5)
volume=L**3
# Open file which we will save the outputs to
# Unsupported operations have to be in an objectmode block
# or simply write the outputs at the end in a pure Python function
"""
if os.path.exists('energy2'):
os.remove('energy2')
f = open('traj.xyz', 'w')
"""
for k in range(NSteps):
# Refold positions according to periodic boundary conditions
pos=wrap(pos,L)
# r(t+dt) modify positions according to velocity and acceleration
pos = pos + deltat*vel + 0.5*(deltat**2.0)*acc # Step 1
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Rescale velocities and take half step
chi = np.sqrt(temp/temperature[k])
vel = chi*vel + 0.5*deltat*acc # v(t+dt/2) Step 2
# Compute forces a(t+dt),ene_pot,virial
acc, ene_pot_aver[k], virial[k] = LJ_Forces(pos,acc,epsilon,L,DIM,N) # Step 3
# Complete the velocity step
vel = vel + 0.5*deltat*acc # v(t+dt/2) Step 4
# Calculate temperature
ene_kin_aver[k],temperature[k] = Calculate_Temperature(vel,L,DIM,N)
# Calculate pressure
pressure[k]= density*temperature[k] + virial[k]/volume
# Print output to file every DumpFreq number of steps
"""
if(k%DumpFreq==0): # The % symbol is the modulus so if the Step is a whole multiple of DumpFreq then print the values
f.write("%s\n" %(N)) # Write the number of particles to file
# Write all of the quantities at this step to the file
f.write("Energy %s, Temperature %.5f\n" %(ene_kin_aver[k]+ene_pot_aver[k],temperature[k]))
for n in range(N): # Write the positions to file
f.write("X"+" ")
for l in range(DIM):
f.write(str(pos[n][l]*L)+" ")
f.write("\n")
#Simple prints without formating are supported
if (k%5==0):
#print("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
#sys.stdout.write("\rStep: {0} KE: {1} PE: {2} Energy: {3}".format(k, ene_kin_aver[k], ene_pot_aver[k],ene_kin_aver[k]+ene_pot_aver[k]))
#sys.stdout.flush()
"""
return ene_kin_aver, ene_pot_aver, temperature, pressure, pos
#------------------------------------------------------
ene_kin_aver, ene_pot_aver, temperature, pressure, pos = main(params)
# Plot all of the quantities
def plot():
plt.figure(figsize=[7,12])
plt.rc('xtick', labelsize=15)
plt.rc('ytick', labelsize=15)
plt.subplot(4, 1, 1)
plt.plot(ene_kin_aver,'k-')
plt.ylabel(r"$E_{K}", fontsize=20)
plt.subplot(4, 1, 2)
plt.plot(ene_pot_aver,'k-')
plt.ylabel(r"$E_{P}$", fontsize=20)
plt.subplot(4, 1, 3)
plt.plot(temperature,'k-')
plt.ylabel(r"$T$", fontsize=20)
plt.subplot(4, 1, 4)
plt.plot(pressure,'k-')
plt.ylabel(r"$P$", fontsize=20)
plt.show()
plot()
Related
This is my first time posting a question here. Please correct me if I am not putting the right information.
I am trying to implement DDPG for the cartpole problem from here: https://spinningup.openai.com/en/latest/user/algorithms.html
Its giving the error
act_limit = env.action_space.high[0] #AD
AttributeError: 'Discrete' object has no attribute 'high'
can you suggest how to fix this. I think because cartpole is a continous action space, I am getting this error as act_dim return discrete values
from copy import deepcopy
import numpy as np
import torch
from torch.optim import Adam
import gym
import time
import spinningup.spinup.algos.pytorch.ddpg.core as core
from spinningup.spinup.utils.logx import EpochLogger
class ReplayBuffer:
"""
A simple FIFO experience replay buffer for DDPG agents.
"""
def __init__(self, obs_dim, act_dim, size):
self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32) #AD action_memory
self.rew_buf = np.zeros(size, dtype=np.float32) #AD reward mem
self.done_buf = np.zeros(size, dtype=np.float32) #AD Terminal memory
self.ptr, self.size, self.max_size = 0, 0, size
def store(self, obs, act, rew, next_obs, done): #AD Store tranisiton
self.obs_buf[self.ptr] = obs
self.obs2_buf[self.ptr] = next_obs
self.act_buf[self.ptr] = act
self.rew_buf[self.ptr] = rew
self.done_buf[self.ptr] = done
self.ptr = (self.ptr+1) % self.max_size
self.size = min(self.size+1, self.max_size)
def sample_batch(self, batch_size=32):
idxs = np.random.randint(0, self.size, size=batch_size)
batch = dict(obs=self.obs_buf[idxs],
obs2=self.obs2_buf[idxs],
act=self.act_buf[idxs],
rew=self.rew_buf[idxs],
done=self.done_buf[idxs])
return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()}
def ddpg(env_fn, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0,
steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99,
polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000,
update_after=1000, update_every=50, act_noise=0.1, num_test_episodes=10,
max_ep_len=1000, logger_kwargs=dict(), save_freq=1):
"""
Deep Deterministic Policy Gradient (DDPG)
Args:
env_fn : A function which creates a copy of the environment.
The environment must satisfy the OpenAI Gym API.
actor_critic: The constructor method for a PyTorch Module with an ``act``
method, a ``pi`` module, and a ``q`` module. The ``act`` method and
``pi`` module should accept batches of observations as inputs,
and ``q`` should accept a batch of observations and a batch of
actions as inputs. When called, these should return:
=========== ================ ======================================
Call Output Shape Description
=========== ================ ======================================
``act`` (batch, act_dim) | Numpy array of actions for each
| observation.
``pi`` (batch, act_dim) | Tensor containing actions from policy
| given observations.
``q`` (batch,) | Tensor containing the current estimate
| of Q* for the provided observations
| and actions. (Critical: make sure to
| flatten this!)
=========== ================ ======================================
ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object
you provided to DDPG.
seed (int): Seed for random number generators.
steps_per_epoch (int): Number of steps of interaction (state-action pairs)
for the agent and the environment in each epoch.
epochs (int): Number of epochs to run and train agent.
replay_size (int): Maximum length of replay buffer.
gamma (float): Discount factor. (Always between 0 and 1.)
polyak (float): Interpolation factor in polyak averaging for target
networks. Target networks are updated towards main networks
according to:
.. math:: \\theta_{\\text{targ}} \\leftarrow
\\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta
where :math:`\\rho` is polyak. (Always between 0 and 1, usually
close to 1.)
pi_lr (float): Learning rate for policy.
q_lr (float): Learning rate for Q-networks.
batch_size (int): Minibatch size for SGD.
start_steps (int): Number of steps for uniform-random action selection,
before running real policy. Helps exploration.
update_after (int): Number of env interactions to collect before
starting to do gradient descent updates. Ensures replay buffer
is full enough for useful updates.
update_every (int): Number of env interactions that should elapse
between gradient descent updates. Note: Regardless of how long
you wait between updates, the ratio of env steps to gradient steps
is locked to 1.
act_noise (float): Stddev for Gaussian exploration noise added to
policy at training time. (At test time, no noise is added.)
num_test_episodes (int): Number of episodes to test the deterministic
policy at the end of each epoch.
max_ep_len (int): Maximum length of trajectory / episode / rollout.
logger_kwargs (dict): Keyword args for EpochLogger.
save_freq (int): How often (in terms of gap between epochs) to save
the current policy and value function.
"""
logger = EpochLogger(**logger_kwargs)
logger.save_config(locals())
torch.manual_seed(seed)
np.random.seed(seed)
env, test_env = env_fn(), env_fn()
obs_dim = env.observation_space.shape
# act_dim = env.action_space.shape[0] #AD
if len(env.action_space.shape) > 1:
action_dim = env.action_space.shape[0]
else:
action_dim = env.action_space.n
# Action limit for clamping: critically, assumes all dimensions share the same bound!
act_limit = env.action_space.high[0] #AD
# act_limit = env.action_space.high
# Create actor-critic module and target networks
ac = actor_critic(env.observation_space, env.action_space, **ac_kwargs)
ac_targ = deepcopy(ac)
# Freeze target networks with respect to optimizers (only update via polyak averaging)
for p in ac_targ.parameters():
p.requires_grad = False
# Experience buffer
replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size)
# Count variables (protip: try to get a feel for how different size networks behave!)
var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.q])
logger.log('\nNumber of parameters: \t pi: %d, \t q: %d\n'%var_counts)
# Set up function for computing DDPG Q-loss
def compute_loss_q(data):
o, a, r, o2, d = data['obs'], data['act'], data['rew'], data['obs2'], data['done']
q = ac.q(o,a)
# Bellman backup for Q function
with torch.no_grad():
q_pi_targ = ac_targ.q(o2, ac_targ.pi(o2))
backup = r + gamma * (1 - d) * q_pi_targ
# MSE loss against Bellman backup
loss_q = ((q - backup)**2).mean()
# Useful info for logging
loss_info = dict(QVals=q.detach().numpy())
return loss_q, loss_info
# Set up function for computing DDPG pi loss
def compute_loss_pi(data):
o = data['obs']
q_pi = ac.q(o, ac.pi(o))
return -q_pi.mean()
# Set up optimizers for policy and q-function
pi_optimizer = Adam(ac.pi.parameters(), lr=pi_lr)
q_optimizer = Adam(ac.q.parameters(), lr=q_lr)
# Set up model saving
logger.setup_pytorch_saver(ac)
def update(data):
# First run one gradient descent step for Q.
q_optimizer.zero_grad()
loss_q, loss_info = compute_loss_q(data)
loss_q.backward()
q_optimizer.step()
# Freeze Q-network so you don't waste computational effort
# computing gradients for it during the policy learning step.
for p in ac.q.parameters():
p.requires_grad = False
# Next run one gradient descent step for pi.
pi_optimizer.zero_grad()
loss_pi = compute_loss_pi(data)
loss_pi.backward()
pi_optimizer.step()
# Unfreeze Q-network so you can optimize it at next DDPG step.
for p in ac.q.parameters():
p.requires_grad = True
# Record things
logger.store(LossQ=loss_q.item(), LossPi=loss_pi.item(), **loss_info)
# Finally, update target networks by polyak averaging.
with torch.no_grad():
for p, p_targ in zip(ac.parameters(), ac_targ.parameters()):
# NB: We use an in-place operations "mul_", "add_" to update target
# params, as opposed to "mul" and "add", which would make new tensors.
p_targ.data.mul_(polyak)
p_targ.data.add_((1 - polyak) * p.data)
def get_action(o, noise_scale):
a = ac.act(torch.as_tensor(o, dtype=torch.float32))
a += noise_scale * np.random.randn(act_dim)
return np.clip(a, -act_limit, act_limit)
def test_agent():
for j in range(num_test_episodes):
o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0
while not(d or (ep_len == max_ep_len)):
# Take deterministic actions at test time (noise_scale=0)
o, r, d, _ = test_env.step(get_action(o, 0))
ep_ret += r
ep_len += 1
logger.store(TestEpRet=ep_ret, TestEpLen=ep_len)
# Prepare for interaction with environment
total_steps = steps_per_epoch * epochs
start_time = time.time()
o, ep_ret, ep_len = env.reset(), 0, 0
# Main loop: collect experience in env and update/log each epoch
for t in range(total_steps):
# Until start_steps have elapsed, randomly sample actions
# from a uniform distribution for better exploration. Afterwards,
# use the learned policy (with some noise, via act_noise).
if t > start_steps:
a = get_action(o, act_noise)
else:
a = env.action_space.sample()
# Step the env
o2, r, d, _ = env.step(a)
ep_ret += r
ep_len += 1
# Ignore the "done" signal if it comes from hitting the time
# horizon (that is, when it's an artificial terminal signal
# that isn't based on the agent's state)
d = False if ep_len==max_ep_len else d
# Store experience to replay buffer
replay_buffer.store(o, a, r, o2, d)
# Super critical, easy to overlook step: make sure to update
# most recent observation!
o = o2
# End of trajectory handling
if d or (ep_len == max_ep_len):
logger.store(EpRet=ep_ret, EpLen=ep_len)
o, ep_ret, ep_len = env.reset(), 0, 0
# Update handling
if t >= update_after and t % update_every == 0:
for _ in range(update_every):
batch = replay_buffer.sample_batch(batch_size)
update(data=batch)
# End of epoch handling
if (t+1) % steps_per_epoch == 0:
epoch = (t+1) // steps_per_epoch
# Save model
if (epoch % save_freq == 0) or (epoch == epochs):
logger.save_state({'env': env}, None)
# Test the performance of the deterministic version of the agent.
test_agent()
# Log info about epoch
logger.log_tabular('Epoch', epoch)
logger.log_tabular('EpRet', with_min_and_max=True)
logger.log_tabular('TestEpRet', with_min_and_max=True)
logger.log_tabular('EpLen', average_only=True)
logger.log_tabular('TestEpLen', average_only=True)
logger.log_tabular('TotalEnvInteracts', t)
logger.log_tabular('QVals', with_min_and_max=True)
logger.log_tabular('LossPi', average_only=True)
logger.log_tabular('LossQ', average_only=True)
logger.log_tabular('Time', time.time()-start_time)
logger.dump_tabular()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--env', type=str, default='CartPole-v0')
parser.add_argument('--hid', type=int, default=256)
parser.add_argument('--l', type=int, default=2)
parser.add_argument('--gamma', type=float, default=0.99) #change this
parser.add_argument('--seed', '-s', type=int, default=0) #change this
parser.add_argument('--epochs', type=int, default=50)
parser.add_argument('--exp_name', type=str, default='ddpg')
args = parser.parse_args()
from spinningup.spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args.exp_name, args.seed)
ddpg(lambda : gym.make(args.env), actor_critic=core.MLPActorCritic,
ac_kwargs=dict(hidden_sizes=[args.hid]*args.l),
gamma=args.gamma, seed=args.seed, epochs=args.epochs,
logger_kwargs=logger_kwargs)
I am learning particle-in-cell (PIC) python code. PIC currently represents one of the most important plasma simulation tools. It is particularly suited to the study of kinetic or non-Maxwellian effects.
Given the following dispersion relation
I found the range of wave numbers k for which the oscillation frequency is imaginary to be -|\frac{w}{v_0}| < k < |\frac{w}{v_0}|
What I am trying to understand is how to find the minimum grid length L_{min} as a function of \frac{v_0}{w}. L_{min} indicates the needed minimum grid length to support such unstable modes.
I think we should be able to study the plasma behaviour for both L < L_{min} and L > L_{min}. I was told I should adjust the number of simulation particles to grid points to improve the statistics. Besides, the number of particles per cell (i.e. npart/ngrid) should be fixed and should be much greater than 1, in order to reduce numerical noise. The runtime needed (here in units of ω_p^−1) to observe the instability can be estimated from the maximum growth rate.
Here's the full python 3 code I am working with. Please note I have little experience with coding so I might ask lots of follow up questions. Thank you.
#! /usr/bin/python
#
# Python script for computing and plotting single charged particle
# trajectories in prescribed electric and magnetic fields.
# Roughly equivalent to boris.m matlab program
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.widgets import Slider, Button, RadioButtons
from mpl_toolkits.mplot3d import Axes3D
import os
import os.path
import sys
from sys import exit
from time import sleep
# ===================================
#
# Function to integrate particle trajectory
# in given E, B fields
#
# ===================================
def integrate(E0, B0, vz0):
global dt, v0, x0, xp, yp, zp, qom, larmor, nsteps
wc=qom*B0 # cyclotron frequency
larmor=vperp/wc
print ("Cyclotron frequency =",wc)
print ("Perpendicular velocity v_p=",vperp)
print ("Larmor radius=",larmor)
norm = 1. # choose whether to normalise plot axes dimensions to Larmor radius
trun=5*2*np.pi/wc # total runtime
dt=.1/wc # timestep - adjust to current B-field
nsteps=int(trun/dt) # timesteps
E=np.array([0.,E0,0.]) # initial E-field
B=np.array([0.,0.,B0]) # initial B-field
u=np.array([0.,0.,0.]) # intermediate velocity
h=np.array([0.,0.,0.]) # normalized B-field
xp[0]=x0[0]
yp[0]=x0[1]
zp[0]=x0[2]
v0[2]=vz0 # z-component
v=v0+.5*dt*qom*(E+np.cross(v0,B)) # shift initial velocity back 1/2 step
x=x0
for itime in range(1,nsteps):
x=x+dt*v
xp[itime]=x[0] /norm
yp[itime]=x[1] /norm
zp[itime]=x[2] /norm
tp[itime]=itime*dt
#
# Boris mover: solves dv/dt = q/m*(E + vxB) to 2nd order accuracy in dt
#
qomdt2 = dt*qom/2
h = qomdt2*B
s=2*h/(1+np.dot(h,h))
u = v + qomdt2*E
up=u+np.cross(u+np.cross(u,h),s)
v=up+qomdt2*E
# vxp[itime] = v[0]
# ===================================
# Make 2D plots of particle orbit
#
# ===================================
def plot_track2D():
global xp,yp,nsteps,ax1
fig = plt.figure(figsize=(8,8)) # initialize plot
xmin=np.min(xp)
xmax=np.max(xp)
ymin=np.min(yp)
ymax=np.max(yp)
fig.add_subplot(221) # 1st subplot in 2x2 arrangement
plt.cla()
plt.grid(True, which='both')
plt.xlim( (xmin, xmax) )
plt.ylim( (ymin, ymax) )
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.plot(xp[0:nsteps],yp[0:nsteps],c='b')
fig.add_subplot(222) # 2nd subplot
# fig.add_subplot(223) # 2nd subplot
# fig.add_subplot(224) # 2nd subplot
plt.draw()
plt.savefig('./particle_orbit.png') # Save plot to file
# ===================================
#
# Make 3D plot of particle orbit
#
# ===================================
def plot_track3D():
global xp,yp,zp,nsteps,ax1
xmin=np.min(xp)
xmax=np.max(xp)
ymin=np.min(yp)
ymax=np.max(yp)
zmin=np.min(zp)
zmax=np.max(zp)
ax1.cla()
plt.ion()
plt.grid(True, which='both')
ax1.set_xlim( (xmin, xmax) )
ax1.set_ylim( (ymin, ymax) )
ax1.set_zlim( (zmin, zmax) )
ax1.set_xlabel('$x $ [m]')
ax1.set_ylabel('$y $ [m]')
ax1.set_zlabel('$z $ [m]')
#ax1.set_aspect(1.)
ax1.scatter(xp,yp,zp,c=tp,marker='o') # tracks coloured by elapsed time since start
plt.draw()
# =============================================
#
# Main program
#
# =============================================
print ("Charged particle orbit solver")
plotboxsize = 8.
animated = True
x0=np.array([0.,0.,0.]) # initial coords
vz0=0.
v0=np.array([-1e2,0.,vz0]) # initial velocity
vperp = np.sqrt(v0[0]**2+v0[2]**2)
E0=0.
B0=.1
e=1.602176e-19 # electron charge
m=9.109e-31 # electron mass
qom=e/m # charge/mass ratio
wc=qom*B0 # cyclotron frequency
larmor=vperp/wc
print (wc,vperp,larmor)
trun=5*2*np.pi/wc # total runtime
dt=.1/wc # timestep - adjust to current B-field
nsteps=int(trun/dt) # timesteps
B1=np.array([0.,0.,0.1]) # gradient B perturbation
#wc=qom*np.linalg.norm(B) # cyclotron frequency
#nsteps=2
tp = np.zeros(nsteps) # variables to store particle tracks
xp = np.zeros(nsteps)
yp = np.zeros(nsteps)
zp = np.zeros(nsteps)
vxp = np.zeros(nsteps)
vyp = np.zeros(nsteps)
vzp = np.zeros(nsteps)
# Compute orbit
integrate(E0, B0, vz0)
# 2D orbit plotter
plot_track2D()
exit(0) # Quit script before 3D plot - comment out to continue!
# Start 3D interactive mode with sliders for B, E and v0
plt.ion() # Turn on interactive plot display
fig = plt.figure(figsize=(8,8))
# Get instance of Axis3D
ax1 = fig.add_subplot(111, projection='3d')
# Get current rotation angle
print (ax1.azim)
# Set initial view to x-y plane
ax1.view_init(elev=90,azim=0)
ax1.set_xlabel('$x $[microns]')
ax1.set_ylabel('$y $[microns]')
ax1.set_zlabel('$z $[microns]')
plot_track3D()
#filename = 'a0_45/parts_p0000.%0*d'%(6, ts)
#plot_from_file(filename):
axcolor = 'lightgoldenrodyellow'
axe0 = fig.add_axes([0.1, 0.95, 0.3, 0.03])#, facecolor=axcolor) # box position, color & size
axb0 = fig.add_axes([0.5, 0.95, 0.3, 0.03])#, facecolor=axcolor)
axv0 = fig.add_axes([0.1, 0.9, 0.3, 0.03])#, facecolor=axcolor)
sefield = Slider(axe0, 'Ey [V/m]', -5.0,5.0, valinit=E0)
sbfield = Slider(axb0, 'Bz [T]', -1.0, 1.0, valinit=B0)
svz = Slider(axv0, 'vz [m/s]', 0.0, 1.0, valinit=0.)
def update(val):
E0 = sefield.val
B0 = sbfield.val
vz0 = svz.val
integrate(E0,B0,vz0)
plot_track3D()
plt.draw()
sefield.on_changed(update)
sbfield.on_changed(update)
svz.on_changed(update)
resetax = fig.add_axes([0.8, 0.025, 0.1, 0.04])
button = Button(resetax, 'Reset', color=axcolor, hovercolor='0.975')
def reset(event):
global ax1
sefield.reset()
sbfield.reset()
svz.reset()
ax1.cla()
ax1.set_xlabel('$x $[microns]')
ax1.set_ylabel('$y $[microns]')
ax1.set_xlim( (0., 10.) )
# ax1.set_ylim( (-sigma, sigma) )
ax1.grid(True, which='both')
plt.draw()
button.on_clicked(reset)
#plt.show()
plt.show(block=False)
$$x=3$$
From algorithmic stability concerns there are only upper bounds on the two variables grid length and time step, dx and dt taken together. Put very informally, in one time step a particle must not travel a longer distance than from a grid cell to one of its direct neighbours. There is no lower limit on grid spacing until you come into conflict with the resolution of your numerical data type or run out of computing memory, or the time to execute one simulation run takes far too long.
I am rehearsing to improve my skills on developing a game, using a reinforcement learning tutorial, but then I run into trouble making the np.split performing correctly (i.e. not resulting in an equal division). The code below shows the reward definition session, while defining the parameters of the environment in another py file. The following line then causes the problem displayed in the traceback info part.
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
Tried to look for a solution to the issue, yet in vain. Any suggestions to a solution will be highly appreciated...;o)
Env definition:
import numpy as np
import gym
import random
import time
from IPython.display import clear_output
env = gym.make("FrozenLake-v1")
env.reset()
# Construct Q-table, and initialize all the Q-values to zero for each state-action pair
action_space_size = env.action_space.n
state_space_size = env.observation_space.n
q_table = np.zeros((state_space_size, action_space_size))
print("\nq_table")
print(q_table)
# Initializing Q-Learning Parameters
num_episodes = 10000 # total number of episodes the agent is to play during training
max_steps_per_episode = 100 # maximum number of steps that agent is allowed to take within a single episode
learning_rate = 0.1
discount_rate = 0.99
exploration_rate = 1
max_exploration_rate = 1 # bounds to how small exploration rate can be
min_exploration_rate = 0.01 # bounds to how large exploration rate can be
# rate at which the exploration_rate will decay
# LR changed to 0.001 due to inconsistencies in results with larger rate - https://youtu.be/HGeI30uATws?t=57
exploration_decay_rate = 0.001
Reward definition:
import numpy as np
import random
from .... import \
num_episodes, env, max_steps_per_episode, q_table, learning_rate, exploration_rate, discount_rate, \
min_exploration_rate, max_exploration_rate, exploration_decay_rate
rewards_all_episodes = []
# Q-learning algorithm
for episode in range(num_episodes):
state = env.reset()
# Exploration/exploitation trade-off
done = False
rewards_current_episode = 0
for step in range(max_steps_per_episode):
exploration_rate_threshold = random.uniform(0,1)
if exploration_rate_threshold > exploration_rate:
action = np.argmax(q_table[state,:])
else:
action = env.action_space.sample()
new_state, reward, done, info = env.step(action)
#Update Q-table for Q(s,a)
q_table[state, action] = \
q_table[state, action] * (1 - learning_rate) + learning_rate * (reward + discount_rate *
np.max(q_table[new_state, :]))
state = new_state
rewards_current_episode +- reward
if done == True:
break
# Exploration rate decay - exploration rate update - https://youtu.be/HGeI30uATws?t=298
exploration_rate = min_exploration_rate + (max_exploration_rate - min_exploration_rate) * np.exp(
-exploration_decay_rate * episode)
rewards_all_episodes.append(rewards_current_episode)
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
count = 1000
print("\n********Average reward per thousand episodes********")
for r in rewards_per_thousand_episodes:
print(count, ": ", str(sum(r/1000)))
count += 1000
print("\n********Updated Q-table********")
print(q_table)
Traceback info:
Traceback (most recent call last):
File "C:\Users\jcst\PycharmProjects\...\_test_Q_learning_and_Gym_run.py", line 48, in <module>
rewards_per_thousand_episodes = np.split(np.array(rewards_all_episodes), num_episodes/1000)
File "<__array_function__ internals>", line 5, in split
File "C:\Users\jcst\PycharmProjects\Python_3_9_test\venv\lib\site-packages\numpy\lib\shape_base.py", line 872, in split
raise ValueError(
ValueError: array split does not result in an equal division
I'm processing wav files for amplitude and frequency analysis with FFT, but I am having trouble getting the data out to csv in a time series format.
Using #Beginner's answer heavily from this post: How to convert a .wav file to a spectrogram in python3, I'm able to get the spectrogram output in an image. I'm trying to simplify that somewhat to get to a text output in csv format, but I'm not seeing how to do so. The outcome I'm hoping to achieve would look something like the following:
time_in_ms, amplitude_in_dB, freq_in_kHz
.001, -115, 1
.002, -110, 2
.003, 20, 200
...
19000, 20, 200
For my testing, I have been using http://soundbible.com/2123-40-Smith-Wesson-8x.html, (Notes: I simplified the wav down to a single channel and removed metadata w/ Audacity to get it to work.)
Heavy props to #Beginner for 99.9% of the following, anything nonsensical is surely mine.
import numpy as np
from matplotlib import pyplot as plt
import scipy.io.wavfile as wav
from numpy.lib import stride_tricks
filepath = "40sw3.wav"
""" short time fourier transform of audio signal """
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(int(np.floor(frameSize/2.0))), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(int(cols), frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
""" scale frequency axis logarithmically """
def logscale_spec(spec, sr=44100, factor=20.):
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) ** factor
scale *= (freqbins-1)/max(scale)
scale = np.unique(np.round(scale))
# create spectrogram with new freq bins
newspec = np.complex128(np.zeros([timebins, len(scale)]))
for i in range(0, len(scale)):
if i == len(scale)-1:
newspec[:,i] = np.sum(spec[:,int(scale[i]):], axis=1)
else:
newspec[:,i] = np.sum(spec[:,int(scale[i]):int(scale[i+1])], axis=1)
# list center freq of bins
allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
freqs = []
for i in range(0, len(scale)):
if i == len(scale)-1:
freqs += [np.mean(allfreqs[int(scale[i]):])]
else:
freqs += [np.mean(allfreqs[int(scale[i]):int(scale[i+1])])]
return newspec, freqs
""" compute spectrogram """
def compute_stft(audiopath, binsize=2**10):
samplerate, samples = wav.read(audiopath)
s = stft(samples, binsize)
sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
return ims, samples, samplerate, freq
""" plot spectrogram """
def plot_stft(ims, samples, samplerate, freq, binsize=2**10, plotpath=None, colormap="jet"):
timebins, freqbins = np.shape(ims)
plt.figure(figsize=(15, 7.5))
plt.imshow(np.transpose(ims), origin="lower", aspect="auto", cmap=colormap, interpolation="none")
plt.colorbar()
plt.xlabel("time (s)")
plt.ylabel("frequency (hz)")
plt.xlim([0, timebins-1])
plt.ylim([0, freqbins])
xlocs = np.float32(np.linspace(0, timebins-1, 5))
plt.xticks(xlocs, ["%.02f" % l for l in ((xlocs*len(samples)/timebins)+(0.5*binsize))/samplerate])
ylocs = np.int16(np.round(np.linspace(0, freqbins-1, 10)))
plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
if plotpath:
plt.savefig(plotpath, bbox_inches="tight")
else:
plt.show()
plt.clf()
"" HERE IS WHERE I'm ATTEMPTING TO GET IT OUT TO TXT """
ims, samples, samplerate, freq = compute_stft(filepath)
""" Print lengths """
print('ims len:', len(ims))
print('samples len:', len(samples))
print('samplerate:', samplerate)
print('freq len:', len(freq))
""" Write values to files """
np.savetxt(filepath + '-ims.txt', ims, delimiter=', ', newline='\n', header='ims')
np.savetxt(filepath + '-samples.txt', samples, delimiter=', ', newline='\n', header='samples')
np.savetxt(filepath + '-frequencies.txt', freq, delimiter=', ', newline='\n', header='frequencies')
In terms of values out, the file I'm analyzing is approx 19.1 seconds long and the sample rate is 44100, so I’d expect to have about 842k values for any given variable. But I'm not seeing what I expected. Instead here is what I see:
freqs comes out with just a handful of values, 512 and while they appear to be correct range for expected frequency, they are ordered least to greatest, not in time series like I expected. The 512 values, I assume, is the "fast" in FFT, basically down-sampled...
ims, appears to be amplitude, but values seem too high, although sample size is correct. Should be seeing -50 up to ~240dB.
samples . . . not sure.
In short, can someone advise on how I'd get the FFT out to a text file with time, amp, and freq values for the entire sample set? Is savetxt the correct route, or is there a better way? This code can certainly be used to make a great spectrogram, but how can I just get out the data?
Your output format is too limiting, as the audio spectrum at any interval in time usually contains a range of frequencies. e.g the FFT of a 1024 samples will contain 512 frequency bins for one window of time or time step, each with an amplitude. If you want a time step of one millisecond, then you will have to offset the window of samples you feed each STFT to center the window at that point in your sample vector. Although with an FFT about 23 milliseconds long, that will involve a high overlap of windows. You could use shorter windows, but the time-frequency trade-off will result in proportionately less frequency resolution.
from numpy import mean,cov,cumsum,dot,linalg,size,flipud
import numpy as np
from pylab import imread,subplot,imshow,title,gray,figure,show,NullLocator
def princomp(A,numpc=0):
#computing eigenvalues and eigenvectors of covariance matrix
M = (A-mean(A.T,axis=1)).T # subtract the mean (along columns)
[latent,coeff] = linalg.eig(cov(M))
p = size(coeff,axis=1)
idx = np.argsort(latent) # sorting the eigenvalues
idx = idx[::-1] # in ascending order
#sorting eigenvectors according to the sorted eigenvalues
coeff = coeff[:,idx]
latent = latent[idx] # sorting eigenvalues
if numpc < p and numpc >= 0:
coeff = coeff[:,range(numpc)] # cutting some PCs if needed
score = dot(coeff.T,M) # projection of the data in the new space
return coeff,score,latent
A = imread('beatles.jpg') # load an image
A = mean(A,2) # to get a 2-D array
full_pc = size(A,axis=1) # numbers of all the principal components
i = 1
dist = []
for numpc in range(0,full_pc+10,10): # 0 10 20 ... full_pc
coeff, score, latent = princomp(A,numpc)
Ar = dot(coeff,score).T+mean(A,axis=0) # image reconstruction
# difference in Frobenius norm
dist.append(linalg.norm(A-Ar,'fro'))
# showing the pics reconstructed with less than 50 PCs
if numpc <= 50:
ax = subplot(2,3,i,frame_on=False)
ax.xaxis.set_major_locator(NullLocator()) # remove ticks
ax.yaxis.set_major_locator(NullLocator())
i += 1
imshow(flipud(Ar))
title('PCs # '+str(numpc))
gray()
figure()
imshow(flipud(A))
title('numpc FULL')
gray()
show()
The error is coming as shown above. This is a sample code on PCA image compression that I found on the net. Suggestions would be helpful. The entire error is :
Traceback (most recent call last):
File "try.py", line 36, in <module>
imshow(flipud(Ar))
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/pyplot.py", line 3157, in imshow
**kwargs)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/__init__.py", line 1898, in inner
return func(ax, *args, **kwargs)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/axes/_axes.py", line 5124, in imshow
im.set_data(X)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/image.py", line 596, in set_data
raise TypeError("Image data can not convert to float")
TypeError: Image data can not convert to float
You cannot plot a complex array with matplotlib imshow. So you need to take the real or imaginary part alone, e.g.
Ar = dot(coeff,score).T+mean(A,axis=0) # image reconstruction
Ar = Ar.real