how to repeat a script in another code with another input data - multithreading

I am newbie in python I wrote a code that in this I load a txt file and I get my result in another txt file, and I want to repeat this code for other txt files that I have all of them in same folder. I want to load almost 300 txt files and do this, but I don't know how do that. thanks
dat = np.loadtxt('test1.txt')
x = dat[:, 0]
y = dat[:, 2]
peak = LorentzianModel()
constant = ConstantModel()
pars = peak.guess(y, x=x)
pars.update( constant.make_params())
pars['c'].set(1.04066)
mod = peak + constant
out=mod.fit(y, pars, x=x)
comps = out.eval_components(x=x)
writer = (out.fit_report(min_correl=0.25))
path = '/Users/dellcity/Desktop/'
filename = 'output.txt'
with open(path + filename, 'wt') as f:
f.write(writer)

you need to define a function that gets the filename as a parameter and in the main part of your programm create a loop in which you find all files which you want to load and then call the function, e.g.:
import os
def myFunction(filename):
dat = np.loadtxt(filename)
x = dat[:, 0]
y = dat[:, 2]
peak = LorentzianModel()
constant = ConstantModel()
pars = peak.guess(y, x=x)
pars.update( constant.make_params())
pars['c'].set(1.04066)
mod = peak + constant
out=mod.fit(y, pars, x=x)
comps = out.eval_components(x=x)
writer = (out.fit_report(min_correl=0.25))
path = '/Users/dellcity/Desktop/'
filename = 'output.txt'
# open in mode a = append
with open(path + filename, 'at') as f:
f.write(writer)
# the parameter of os.listdir is the path to your file,
# change to the path of your data files
for filename in os.listdir('.'):
if filename.endswith(".txt"):
myFunction(filename)

Related

Rename multiple files with python

I'm trying to create a program to rename multiple files at once. This would be through Python, and I realize I'm recreating the wheel but I'm trying to understand what I'm doing wrong. Any help would be greatly appreciated. Program.......
import os
path = "LOCATION"
dir_list = os.listdir(path)
myList = []
for x in dir_list:
if x.endswith(".mp3"):
f1 = x.split("-")
ln1 = f1[0] # Band Name
ln2 = f1[1] # Album Title
ln3 = f1[2] # Track number
ln4 = f1[3] # Song Name
newname = x.join(ln2 + ln3)
os.rename(x, newname)
print(newname)
Your error:
line 14, in <module> os.rename(x, newname) -> FileNotFoundError: [WinError 2] The system cannot find the file specified:
...Is likely due to the path not being included in your os.rename() call, I suggest changing os.rename(x, newname) to os.rename(path + x, path + newname) which will solve that issue.
I also noticed some funky behavior with the way you were grabbing the song information, so if you have any further issues, here's the code I used to debug your original issue which seems to have the result you're going for:
import os
path = "C:\\Users\\Pepe\\Documents\\StackOverflow\\73430533\\"
dir_list = os.listdir(path)
for x in dir_list:
if x.endswith(".mp3"):
# I ignore the ".mp3" to keep the file names clean
nameDetails = x.split('.mp3')[0].split('-')
bandName = nameDetails[0]
albumTitle = nameDetails[1]
trackNumber = nameDetails[2]
songName = nameDetails[3]
newName = f"{albumName} | {trackName}.mp3"
print(f"Renaming \"{x}\" to \"{newName}\"")
os.rename(path + x, path + newName)

NETCDF4 file doesn't grow beyond 2GB

I have a NETCDF4 file which doesn't grow beyond 2GB.
I am using the following sample data - I am converting over 200 txt files to netcdf4 file
STATIONS_ID;MESS_DATUM; QN;FF_10;DD_10;eor
3660;201912150000; 3; 4.6; 170;eor
3660;201912150010; 3; 4.2; 180;eor
3660;201912150020; 3; 4.3; 190;eor
3660;201912150030; 3; 5.2; 190;eor
3660;201912150040; 3; 5.1; 190;eor
3660;201912150050; 3; 4.8; 190;eor
The code looks like:
files = [f for f in os.listdir('.') if os.path.isfile(f)]
count = 0
for f in files:
filecp = open(f, "r", encoding="ISO-8859-1")
# NC file setup
mydata = netCDF4.Dataset('v5.nc', 'w', format='NETCDF4')
mydata.description = 'Measurement Data'
mydata.createDimension('STATION_ID',None)
mydata.createDimension('MESS_DATUM',None)
mydata.createDimension('QN',None)
mydata.createDimension('FF_10',None)
mydata.createDimension('DD_10',None)
STATION_ID = mydata.createVariable('STATION_ID',np.short,('STATION_ID'))
MESS_DATUM = mydata.createVariable('MESS_DATUM',np.long,('MESS_DATUM'))
QN = mydata.createVariable('QN',np.byte,('QN'))
FF_10 = mydata.createVariable('FF_10',np.float64,('FF_10'))
DD_10 = mydata.createVariable('DD_10',np.short,('DD_10'))
STATION_ID.units = ''
MESS_DATUM.units = 'Central European Time yyyymmddhhmi'
QN.units = ''
FF_10.units = 'meters per second'
DD_10.units = "degree"
txtdata = pd.read_csv(filecp, delimiter=';').values
#txtdata = np.genfromtxt(filecp, dtype=None, delimiter=';', names=True, encoding=None)
if len(txtdata) > 0:
df = pd.DataFrame(txtdata)
sh = txtdata.shape
print("txtdata shape is ", sh)
mydata['STATION_ID'][:] = df[0]
mydata['MESS_DATUM'][:] = df[1]
mydata['QN'][:] = df[2]
mydata['FF_10'][:] = df[3]
mydata['DD_10'][:] = df[4]
mydata.close()
filecp.close()
count +=1
Your problem is that you create the same file in the loop. So your file size is limited to the biggest initial data file.
Open the file once, and add each new data to the end of netcdf data arrays.
If you get 124 values in the first file, you put:
mydata['STATION_ID'][0:124] = df[0]
and you get 224 from the second file, you put
mydata['STATION_ID'][124:124+224] = df[0]
So, in case data files are downloaded from https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/10_minutes/wind/recent/ to <text file path>
import netCDF4
import codecs
import pandas as pd
import os
import numpy as np
mydata = netCDF4.Dataset('v5.nc', 'w', format='NETCDF4')
mydata.description = 'Wind Measurement Data'
mydata.createDimension('STATION_ID',None)
mydata.createDimension('MESS_DATUM',None)
mydata.createDimension('QN',None)
mydata.createDimension('FF_10',None)
mydata.createDimension('DD_10',None)
STATION_ID = mydata.createVariable('STATION_ID',np.short,('STATION_ID'))
MESS_DATUM = mydata.createVariable('MESS_DATUM',np.long,('MESS_DATUM'))
QN = mydata.createVariable('QN',np.byte,('QN'))
FF_10 = mydata.createVariable('FF_10',np.float64,('FF_10'))
DD_10 = mydata.createVariable('DD_10',np.short,('DD_10'))
STATION_ID.units = ''
MESS_DATUM.units = 'Central European Time yyyymmddhhmi'
QN.units = ''
FF_10.units = 'meters per second'
DD_10.units = "degree"
fpath = <text file path>
files = [f for f in os.listdir(fpath)]
count = 0
mydata_startindex=0
for f in files:
filecp = open(fpath+f, "r", encoding="ISO-8859-1")
txtdata = pd.read_csv(filecp, delimiter=';')
chunksize = len(txtdata)
if len(txtdata) > 0:
mydata['STATION_ID'][mydata_startindex:mydata_startindex+chunksize] = txtdata['STATIONS_ID']
mydata['MESS_DATUM'][mydata_startindex:mydata_startindex+chunksize] = txtdata['MESS_DATUM']
mydata['QN'][mydata_startindex:mydata_startindex+chunksize] = txtdata[' QN']
mydata['FF_10'][mydata_startindex:mydata_startindex+chunksize] = txtdata['FF_10']
mydata['DD_10'][mydata_startindex:mydata_startindex+chunksize] = txtdata['DD_10']
mydata_startindex += chunksize

".DS" meaning in python

I know It's probably a very silly question, but could someone please tell me what's the meaning of ".DS" in the following function?? does it has a special python meaning or is it only used in this project and it's my bad that I didn't get it?
def load_paired_img_wrd(folder, word_vectors, use_word_vectors=True):
class_names = [fold for fold in os.listdir(folder) if ".DS" not in fold]
image_list = []
labels_list = []
paths_list = []
for cl in class_names:
splits = cl.split("_")
if use_word_vectors:
vectors = np.array([word_vectors[split] if split in word_vectors else np.zeros(shape=300) for split in splits])
class_vector = np.mean(vectors, axis=0)
subfiles = [f for f in os.listdir(folder + "/" + cl) if ".DS" not in f]
for subf in subfiles:
full_path = os.path.join(folder, cl, subf)
img = image.load_img(full_path, target_size=(224, 224))
x_raw = image.img_to_array(img)
x_expand = np.expand_dims(x_raw, axis=0)
x = preprocess_input(x_expand)
image_list.append(x)
if use_word_vectors:
labels_list.append(class_vector)
paths_list.append(full_path)
img_data = np.array(image_list)
img_data = np.rollaxis(img_data, 1, 0)
img_data = img_data[0]
return img_data, np.array(labels_list), paths_list
this is probably trying to filter out the junk .DS_Store files that appear on macos
The file .DS_Store is created in any directory (folder) accessed by the Finder application
That's a text value. It could as easily have been "BS" or "Foo!" and the code would operate in the same way.
In this case, the program is looking to see if there are files with the string ".DS" in them, and removing them from a list.

Python ingestion of csv files

I am trying to ingest daily csv data into Python. I have different files such as follows for each day.I need help in appending two columns where the values from the columns are from the file name, for eg first column should take the value before '_' and the second column takes the date part from the file name.
board_2019-08-08.csv
sign_2019-08-08.csv
Summary_2019-08-08.csv
Code :
path = "C:\xyz\Files\ETL\Dashboard"
all_files = glob.glob(os.path.join(path, "*.csv"))
for file in all_files:
file_name = os.path.splitext(os.path.basename(file))[0]
dfn = pd.read_csv(file, skiprows = 17)
dfn['Page'] = 'Dashboard'
del dfn['Dimension']
dfn = dfn.iloc[1:]
dfn.columns = ['LoanId', 'Impressions', 'Page']
`
Try this
path = "C:\xyz\Files\ETL\Dashboard"
files = list(filter(lambda x: '.csv' in x, os.listdir('path')))
for file in files:
pre,post = file.split("_")
post = post.split(".")[0]
dfn = pd.read_csv(f"{path}/{file}", skiprows = 17)
# assume your inital values for column 0 and 1 is 1
dfn.insert(0,"column1",value=pre)
dfn.insert(1,"column2",value=post)
// rest of your code

How to convert a two dimensional data into three dimensional with third dimension as time with single value?

I have daily wind data from quickscat ftp://ftp.ifremer.fr/ifremer/cersat/products/gridded/mwf-quikscat/data/daily
The problem is zonal and meridional winds are two dimensional i.e. they contain only (lon, lat) as dimension not (time, lon,lat) as dimension. File contain all the information about time as variable and as dimension. I tried the copy all the dimension and variable data from input file to an output file but something goes wrong. It copy successfully lat, lon and time but does not copy the values of winds. In source file wind is 2-dimensional, but I want wind in output file as 3-dimensional with time as third dimension.Anyway time dimesion has length=1
import netCDF4 as nc
import numpy as np
import os
in_path = '2000'
out_path = '2000_new'
files = os.listdir(in_path)
fd=0
for names in files:
# print(names)
x_file = os.path.join(in_path,names)
y_file = os.path.join(out_path,names)
fd +=1
i_file = nc.Dataset(x_file, 'r')
z_w = i_file.variables['zonal_wind_speed'][:,:]
m_w = i_file.variables['meridional_wind_speed'][:,:]
y = i_file.variables['latitude'][:]
x = i_file.variables['longitude'][:]
t = i_file.variables['time'][:]
os.system("'rm y_file")
o_file = nc.Dataset(y_file, 'w', format='NETCDF4')
latitude = o_file.createDimension('latitude', y.size)
longitude = o_file.createDimension('longitude', x.size)
time = o_file.createDimension('time',None)
var = o_file.createVariable('latitude','f4',('latitude'), zlib=True)
o_file.variables['latitude'].units = 'degree_north'
o_file.variables['latitude'].long_name ='latitude'
o_file.variables['latitude'].axis = 'X'
var = o_file.createVariable('longitude','f4',('longitude'), zlib=True)
o_file.variables['longitude'].units = 'degree_east'
o_file.variables['longitude'].long_name = 'longitude'
o_file.variables['longitude'].axis = 'Y'
var = o_file.createVariable('time','d',('time'), zlib=True)
o_file.variables['time'].long_name = 'time'
o_file.variables['time'].units = "hours since 1900-1-1 0:0:0"
o_file.variables['time'].calendar = 'standard'
o_file.variables['time'].axis = 'T'
var = o_file.createVariable('u','f4',('time','latitude','longitude'),fill_value=-1.e+23, zlib=True)
o_file.variables['u'].long_name='zonal wind speed component'
o_file.variables['u'].units = 'meter second-1'
o_file.variables['u'].coordinates = 'longitude latitude'
o_file.variables['u'].time = 'time'
var = o_file.createVariable('v','f4',('time','latitude','longitude'),fill_value=-1.e+23, zlib = True)
o_file.variables['v'].long_name = 'meridional wind speed component'
o_file.variables['v'].units = 'meter second-1'
o_file.variables['v'].coordinates = 'longitude latitude'
o_file.variables['v'].time = 'time'
o_file.variables['latitude'][:] = y
o_file.variables['longitude'][:] =x
o_file.variables['time'][:] = t
o_file.variables['u'] = z_w
o_file.variables['v'] = m_w
i_file.close()
o_file.close()
Actually, your time dimension does not have length 1, it has unlimited length. If you actually want it to have length 1, you need to use
#time = o_file.createDimension('time',None)
time = o_file.createDimension('time',1)
instead.
Then, to set all data of the first (and only) time index to your values, use
o_file.variables['u'][0] = z_w
o_file.variables['v'][0] = m_w
If you do end up saving multiple times in the file, replace the 0 with the appropriate index for the data you are copying in.
Alternatively, because time dimension now has length 1, you could also copy it using numpy.expand_dims
o_file.variables['u'][:] = np.expand_dims(z_w, 0)
o_file.variables['v'][:] = np.expand_dims(m_w, 0)

Resources