Why is this while loop only doing the first loop? - python-3.x

I am trying to produce some mechanical stress info from windspeed data for a crane boom when it is between 0 and 90 degrees, with data from each angle saved into it's own file. I have the script working fine when doing just one file/angle, however when I try and use any sort of loop to do it for all angles it will create the files, but only the first has any data in it. I am a beginner and am not very savvy with Python, so I was hoping someone could spot something simple I have missed. I have included a short example file of the source data: Windspeed source file - cut down
import math
file = open("C:/Users/Jacob/Desktop/BOM Data/HD01D_Data_074272_999999999523840.txt", 'r')
boomDirection = 0
vaneSpeed = 120
maxShear = 75.97043478
maxVonMises = 500.0216811
while boomDirection < 91:
data_file = open("Bolt Stress - " + str(boomDirection) + " Degrees.csv", 'w')
line = file.readline()
line = file.readline()
while line != '':
try:
if len(line.split(','))>1:
windSpeedHigh = int(line.split(',')[19])
windSpeedLow = int(line.split(',')[22])
windDirection = int(line.split(',')[14])
relSpeedHigh = math.sin(math.radians((90-(boomDirection - windDirection))))*windSpeedHigh
relSpeedLow = math.sin(math.radians((90-(boomDirection - windDirection))))*windSpeedLow
VonMisesHigh = (maxVonMises/vaneSpeed)* relSpeedHigh
VonMisesLow = (maxVonMises/vaneSpeed)* relSpeedLow
data_file.write(str(round(VonMisesHigh,1)) + ('\n'))
data_file.write(str(round(VonMisesLow,1)) + ('\n'))
except ValueError:
pass
line = file.readline()
data_file.close()
boomDirection = boomDirection + 1

Related

Issue in modifying a for loop using joblib

I have a sequential set of code which generates a tuple of values for different stocks, which is passed to a multiprocessing pool to apply technical indicators. Below is the sequential piece of code, which is working as expected.
child_fn_arg_tuple_list = []
for stock in m1_ts_consistent_stock_list: # prev_day_stock_list:
f_prev_ts_stock_merged_mdf_row =
m1_df_in_mdf[(m1_df_in_mdf['stock_id']==stock) &
(m1_df_in_mdf['datetimestamp'] == prev_ts)] # previous timestamp
if f_prev_ts_stock_merged_mdf_row.empty:
f_filtered_stock_list.remove(stock)
else:
f_stock_prev_ts_merged_ohlcv_df_list_of_dict =
f_prev_ts_stock_merged_mdf_row['merged_ohlcv_df'].iloc[0]
f_current_ts_stock_ohlcv_row_df =
period_ts_ohlcv_df[(period_ts_ohlcv_df['stock_id'] == stock)].copy()
if f_current_ts_stock_ohlcv_row_df.shape[0] == 1:
pass
else:
error_string = f_current_fn + 'Expected
f_current_ts_stock_ohlcv_row_df shape for stock ' + stock \
+ 'at ts ' + str(m1_time) + ' is not 1 - ' +
str(f_current_ts_stock_ohlcv_row_df.shape[0])
f_current_ts_stock_ohlcv_row_df =
period_ts_ohlcv_df[(period_ts_ohlcv_df['stock_id'] == stock) &
(period_ts_ohlcv_df['datetimestamp'] == (m1_time -
timedelta(minutes=1)))].copy()
fn_arg_tuple = (f_from_date_list,f_run_folder_name,stock,
f_period,m1_time, f_stock_prev_ts_merged_ohlcv_df_list_of_dict,
f_current_ts_stock_ohlcv_row_df,f_grouped_column_list_dict)
child_fn_arg_tuple_list.append(fn_arg_tuple)
result_list = []
pool = multiprocessing.Pool(7)
for result in pool.starmap(single_stock_apply_indicator_df_in_df_v3, child_fn_arg_tuple_list):
result_list.append(result)
pool.close()
Since the for loop runs for around 400 stocks every minute, I am trying to speed up the for loop over stocks, before passing them for applying multiprocessing using python inner function and joblib - parallel , delayed.
def create_child_fn_arg_tuple_list(cp_stock): # cp = child parameter
f_prev_ts_stock_merged_mdf_row = m1_df_in_mdf[
(m1_df_in_mdf['stock_id'] == cp_stock) &
(m1_df_in_mdf['datetimestamp'] == prev_ts)].copy()
if f_prev_ts_stock_merged_mdf_row.empty:
f_filtered_stock_list.remove(cp_stock)
else:
f_stock_prev_ts_merged_ohlcv_df_list_of_dict = \
f_prev_ts_stock_merged_mdf_row['merged_ohlcv_df'].iloc[0]
f_current_ts_stock_ohlcv_row_df = period_ts_ohlcv_df[
(period_ts_ohlcv_df['stock_id'] == cp_stock)].copy()
if f_current_ts_stock_ohlcv_row_df.shape[0] == 1:
pass
else:
error_string = f_current_fn + 'Expected f_current_ts_stock_ohlcv_row_df
shape for stock ' + \
cp_stock + 'at ts ' + str(m1_time) + ' is not 1 - ' + \
str(f_current_ts_stock_ohlcv_row_df.shape[0])
f_current_ts_stock_ohlcv_row_df =
period_ts_ohlcv_df[(period_ts_ohlcv_df['stock_id'] == cp_stock)
& (period_ts_ohlcv_df['datetimestamp'] ==
(m1_time - timedelta(minutes=1)))].copy()
fn_arg_tuple = (f_from_date_list, f_run_folder_name, cp_stock, f_period,
m1_time,f_stock_prev_ts_merged_ohlcv_df_list_of_dict,
f_current_ts_stock_ohlcv_row_df,f_grouped_column_list_dict)
child_fn_arg_tuple_list.append(fn_arg_tuple)
return child_fn_arg_tuple_list
child_fn_arg_tuple_list = Parallel(n_jobs=7, backend='multiprocessing')\
(delayed(create_child_fn_arg_tuple_list)(in_stock) for in_stock in
m1_ts_consistent_stock_list)
result_list = []
pool = multiprocessing.Pool(7)
for result in pool.starmap(single_stock_apply_indicator_df_in_df_v3, child_fn_arg_tuple_list):
result_list.append(result)
pool.close()
I am getting an error -
AttributeError: Can't pickle local object 'multiple_stock_apply_indicator_df_in_df_v6..create_child_fn_arg_tuple_list' and occurs in the line line where I am trying to apply the joblib parallel and delayed.
Please note that there are some common variables between the main function and inner function - m1_df_in_mdf, f_filtered_stock_list
1] m1_df_in_mdf is not affected as it is used only in read only mode
2] f_filtered_stock_list is affected as some stocks are removed
My objective is to get the for loop of stocks run faster, any other approaches are also welcome.

How to create multiple Python scripts and run them at the same time?

I have a Python script that is used to find some stresses on a structure (a crane boom) when exposed to the wind from all directions. This means it creates 360 text files, 1 for each degree the structure is facing. Instead of doing 360 consecutive loops running on a single core, I want to break the task up into maybe 10 or 20 processes. Is there a way I could modify the following code so it created and ran multiple scripts with different degree ranges i.e. one script would do 0 to 20 degrees, the next 20 to 40 etc.?
import math
import csv
boomDirection = 0
time = 0
maxStress = 650
with open("SomeWindAndHeadingStressMatrix.csv") as f:
data = [row for row in csv.reader(f)]
while boomDirection < 361:
file = open("SomeWindSpeedSourceData.txt", 'r')
data_file = open("Bolt Stress - " + str(boomDirection) + " Degrees.csv", 'w')
line = file.readline()
while line != '':
try:
if len(line.split(','))>1:
windSpeedHigh = int(int(line.split(',')[19])*1.32)
windSpeedLow = int(int(line.split(',')[22])*1.32)
windDirection = int(line.split(',')[14]) - boomDirection
if windDirection < 0:
windDirection += 360
stressHigh = float(data[windSpeedHigh][windDirection])
stressLow = float(data[windSpeedLow][windDirection])
if time % 10080 == 0:
data_file.write(str(time) + ', ' + str(maxStress) + ('\n'))
time += 0.5
else:
data_file.write(str(time) + ', ' + str(round(stressHigh,1)) + ('\n'))
time += 0.5
data_file.write(str(time) + ', ' + str(round(stressLow,1)) + ('\n'))
time += 0.5
except ValueError:
pass
line = file.readline()
data_file.close()
time = 0
boomDirection = boomDirection + 1

Trouble downloading data from a website and reading the data with Dataset from netCDF4

The goal of my code is to download GFS data from the date specified (whether user inputted or just grabbing today's data) and have it downloaded and read using netCDF4. I need to download the data package so that when my code runs, it isn't taking more than 15 minutes to run and then being shut down by the DOS server since it is accessing so much data. This is what I have so far:
def accessGFS():
baseURL = 'http://nomads.ncep.noaa.gov:9090/dods/gfs_0p25/'
GFSDate = int(time.strftime("%Y%m%d"))
currentHour = time.gmtime()[3]
gfsTimeHeader = 'gfs_0p25_'
if currentHour > 22:
timeURL = gfsTimeHeader + '18z'
GFSTime = 18
elif currentHour > 16:
timeURL = gfsTimeHeader + '12z'
GFSTime = 12
elif currentHour > 10:
timeURL = gfsTimeHeader + '06z'
GFSTime = 6
elif currentHour > 4:
timeURL = gfsTimeHeader + '00z'
GFSTime = 0
else:
timeURL = gfsTimeHeader + '18z'
GFSTime = 18
GFSDate -= 1
GFSDate = str(GFSDate)
GFSDateTime = datetime.datetime(int(GFSDate[:4]),int(GFSDate[4:6]),int(GFSDate[6:]),GFSTime, 0, 0)
dateURL = 'gfs' + GFSDate + '/'
url = baseURL + dateURL + timeURL
values = {}
data = urllib.parse.urlencode(values)
data = data.encode('utf-8')
req = urllib.request.Request(url, data)
gfs_download = urllib.request.urlopen(req)
gfsData = gfs_download.read()
saveFile = open('GFS%sdata.nc' %GFSDate, 'w')
saveFile.write(str(gfsData))
saveFile.close()
gfs = Dataset(gfsData)
return GFSDateTime, gfs
Which is then called upon the line of code:
gfs, gfsDate = GFSReader.accessGFS()
When I run the code it does access the GFS server and downloads the file into the right folder, but it throws me the error:
FileNotFoundError: [Errno 2] No such file or directory: b'b\'<html>\\n<head>\\n
There is way more to that error though. It basically copies and pastes the entire '.nc' file I created in accessGFS() and throws that in the error code. These are the trackbacks:
File "C:/Users/Desktop/Predictions/GFSDriver.py", line 65 in <module>
gfs, gfsDate = GFSReader.accessGFS()
File "C:\Users\Desktop\Predictions\GFSReader.py", line 53. in accessGFS
gfs = Dataset(gfsData)
File "netCDF4\_netCDF4.pyx", line 2111, in netCDF4._netCDF4.Dataset.__init__
File "netCDF4\_netCDF4.pyx", line 1731, in netCDF4._ensure_nc_success
So I know it has something to do with the way I downloaded the file or the way it is being read through netCDF4, but I'm not sure what it is. The code has worked without downloading the data at all, and just getting the Dataset every time it was called on. So that's what makes me think that for some reason the function within netCDF4, Dataset, isn't reading the file I am downloading properly.
Any suggestions?

How can I read a file and calculate values with data in strings?

Let me explain what I mean: I have gotten data from nine pictures, including r,g,b values and brightness, calculated the averages for each picture, and then written those values for each onto a .txt file (in Notepad) in a string for each picture. I am now now trying to take any two strings from that file and calculate differences in the values through a difference formula (difference = val.1 - val. 2 / val.2). What I cannot figure out is how to read the .txt file and use two different strings for calculations. I would greatly appreciate any tips or help.
Here's what I have so far:
from graphics import *
def loadImage(image):
totalBrightness = 0
totalR = 0
totalG = 0
totalB = 0
win = GraphWin("Picture", 400, 500)
testImage = Image(Point(200,250), image)
testImage.draw(win)
throwAway = win.getMouse()
theWidth = testImage.getWidth()
theHeight = testImage.getHeight()
for i in range(0,theWidth):
for j in range(0,theHeight):
r,g,b = testImage.getPixel(i,j)
brightness = int(round(0.299*r + 0.587*g + 0.114*b))
totalR = totalR + r
totalG = totalG + g
totalB = totalB + b
totalBrightness = totalBrightness + brightness
return (image, totalBrightness, totalR, totalG, totalB)
def ImageAnalysis():
infile = open("Picture_Features.txt","a+") #cousin told me 'a+' work here if that's important
print(loadImage("mystery1.GIF"), file = infile)
print(loadImage("mystery2.GIF"), file = infile)
print(loadImage("mystery3.GIF"), file = infile)
From what I understand, the method is fairly straightforward:
Read file by line and store as list of lines (string)
Convert all strings in list to a list/tuple
Take 2 entries in the list and use difference formula
Should look something like this:
f = open("Picture_Features.txt", 'r')
l = f.readlines()
f.close()
# Convert string to list with split
for i in range(len(l)):
l[i] = l[i][1:-1].split(', ')
# Let's take the first 2 entries (not the image string in the front)
e1, e2 = map(int, l[0][1:]), map(int, l[1][1:])
diff_formula = lambda x1, x2: (x1 - x2) / x2
result = map(diff_formula, e1, e2)

str does not support the buffer interface using .find

trying to search within a .thumbdata3 file for thumbnail images. This was someone else's sample code, but I am getting an error
"str does not support the buffer interface using .find"
"""extract files from Android thumbdata3 file"""
f=open('thumbdata3.dat','rb')
tdata = f.read()
f.close()
ss = '\xff\xd8'
se = '\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d03.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2
ok, turned out to be very simple.
just add b in front of the data I am trying to match
so
ss = '\xff\xd8'
se = '\xff\xd9'
becomes
ss = b'\xff\xd8'
se = b'\xff\xd9'
It's all right.
With Python 3.x like python-3.6.2
Rename .thumbdata3-1763508120 file to thumbdata3.dat
Rename .thumbdata3--1967290299 file to thumbdata4.dat
enter code here
"""extract files from Android thumbdata3 file"""
f=open('thumbdata3.dat','rb')
tdata = f.read()
f.close()
ss = b'\xff\xd8'
se = b'\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d03.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2
enter code here
"""extract files from Android thumbdata4 file"""
f=open('thumbdata4.dat','rb')
tdata = f.read()
f.close()
ss = b'\xff\xd8'
se = b'\xff\xd9'
count = 0
start = 0
while True:
x1 = tdata.find(ss,start)
if x1 < 0:
break
x2 = tdata.find(se,x1)
jpg = tdata[x1:x2+1]
count += 1
fname = 'extracted%d04.jpg' % (count)
fw = open(fname,'wb')
fw.write(jpg)
fw.close()
start = x2+2

Resources