How do I change a text dictionary from file into usable dictionary - python-3.x

Right so, I need to make this function that basically saves a player's username in a dictionary which is next saved in a text file to be reused again.
The problem is on reusing it I can't manage to get the str that I get from the file into a dictionary.
Here is my code:
from ast import eval
def verification(j, d):
if j in d.keys():
return d
else:
d[j] = [0,0]
return d
savefile = open("save.txt", "r")
'''d = dict()
for line in savefile:
(key, val) = line.split(".")
d[key] = val
print(d)'''
d = savefile.read()
python_dict = literal_eval(d)
savefile.close()
j = input("name? ")
result = verification(j, python_dict)
savefile = open("save.txt", "w")
'''for i in result:
text = i + "." + str(result[i]) + " \n"
savefile.write(text)'''
savefile.write(str(result))
savefile.close()
As you can see I tried with the literal_eval from ast. I also tried to do a .split() but that wouldn't work. So I'm stuck. Any ideas? It would be of great help.
Thanks

There is no need to do your own encoding/decoding from scratch when you have existing libraries to do it for you.
One good example is JSON which is also not Python exclusive so the database you create can be used by other applications.
This can be done easily by:
import json
def verification(j, d):
if j not in d:
d[j] = [0,0]
return d
with open("save.txt", "r") as savefile:
python_dict = json.load(savefile)
j = input("name? ")
result = verification(j, python_dict)
with open("save.txt", "w") as savefile:
json.dump(result, savefile)

Related

Read data from txt file, store it, use it for analyzing, write it to the txt file

The task is to read from given txt file the data add the numbers in there to the list[], so that every number in a row will be a element/object in this list. After reading the file created list will be sent to the main().
this list with the objects will be parameters for the def Analyze part in where at the same time
will be found min, max, average and sum.
def lueTiedosto(data):
Tiedosto = open("L07T4D1.txt", 'r', encoding="UTF-8")
Rivi = Tiedosto.readline()
while (len(Rivi) > 0):
data.append(int(Rivi))
Rivi = Tiedosto.readline()
for element in data:
print(element)
print(f"Tiedosto L07T4D1.txt luettu.")
Tiedosto.close()
return element
The fixed code which works:
def lueTiedosto(data):
Lue = input("Luettavan tiedoston nimi on ''.\n")
print(f"Anna uusi nimi, enter säilyttää nykyisen: ", end='')
Tiedosto = open(Lue, 'r', encoding="UTF-8")
Rivi = Tiedosto.readline()
while (len(Rivi) > 0):
data.append(int(Rivi))
Rivi = Tiedosto.readline()
print(f"Tiedosto '{Lue}' luettu.")
Tiedosto.close()
return data
Making an assumption that your input file is similar to the following:
10000
12345
10008
12000
I would do the following:
filepath = r".......\L07T4D1.txt" # Path to file being loaded
def readData(filepath: str) -> list[int]:
# Returns a list of integers from file
rslt = []
with open (filepath, 'r') as f:
data = f.readline().strip()
while data:
data = data.split(' ')
rslt.append(int(data[0]))
data = f.readline().strip()
return rslt
def analyze(data: list[int]) -> None:
# prints results of data analysis
print(f'Max Value = {max(data)}')
print(f'Min Value = {min(data)}')
print(f'Sum Value = {sum(data)}')
print(f'Avg Value = {sum(data)/len(data)}')
Running analyze(readData(filepath)) Yields:
Max Value = 12345
Min Value = 10000
Sum Value = 44353
Avg Value = 11088.25

Plotting multiple lines with a Nested Dictionary, and unknown variables to Line Graph

I was able to find somewhat of an answer to my question, but it was not as nested as my dictionary and so I am really unsure how to proceed as I am still very new to python. I currently have a nested dictionary like
{'140.10': {'46': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}, '28': {'1': '-49.50918', '2': '-50.223637', '3': '49.824406'}}}:
I am wanting to plot it so that '140.10' becomes the title of the graph and '46' and '28' become the individual lines and key '1' for example is on the y axis and the x axis is the final number (in this case '-49.50918). Essentially a graph like this:
I generated this graph with a csv file that is written at another part of the code just with excel:
[![enter image description here][2]][2]
The problem I am running into is that these keys are autogenerated from a larger csv file and I will not know their exact value until the code has been run. As each of the keys are autogenerated in an earlier part of the script. As I will be running it over various files called the Graph name, and each file will have a different values for:
{key1:{key2_1: {key3_1: value1, key3_2: value2, key3_3: value3}, key_2_2 ...}}}
I have tried to do something like this:
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in time_an_dict:
atom = list(time_an_dict[s])
ion = time_an_dict[s]
for f in time_an_dict[s]:
x_val = []
y_val = []
fz = ion[f]
for i in time_an_dict[s][f]:
pos = (fz[i])
frame = i
y_val.append(frame)
x_val.append(pos)
'''ions = atom
frame = frames
position = pos
plt.plot(frame, position, label = frames)
plt.xlabel("Frame")
plt.ylabel("Position")
plt.show()
#plt.savefig('{}_Pos.png'.format(s))'''
But it has not run as intended.
I have also tried:
for filename in os.listdir(Directory):
if filename.endswith('_Atom.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
new = '{}_A_pos.csv'.format(s)
ions = list(time_an_dict.values())[0].keys()
for i in ions:
x_axis_values = []
y_axis_values = []
frame = list(time_an_dict[s][i])
x_axis_values.append(frame)
empty = []
print(x_axis_values)
for x in frame:
values = time_an_dict[s][i][x]
empty.append(values)
y_axis_values.append(empty)
plt.plot(x_axis_values, y_axis_values, label = x )
plt.show()
But keep getting the error:
Traceback (most recent call last): File "Atoms_pos.py", line 175, in
plt.plot(x_axis_values, y_axis_values, label = x ) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/pyplot.py",
line 2840, in plot
return gca().plot( File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_axes.py",
line 1743, in plot
lines = [*self._get_lines(*args, data=data, **kwargs)] File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py",
line 273, in call
yield from self._plot_args(this, kwargs) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axes/_base.py",
line 394, in _plot_args
self.axes.xaxis.update_units(x) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/axis.py",
line 1466, in update_units
default = self.converter.default_units(data, self) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 107, in default_units
axis.set_units(UnitData(data)) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 176, in init
self.update(data) File "/Users/hxb51/opt/anaconda3/lib/python3.8/site-packages/matplotlib/category.py",
line 209, in update
for val in OrderedDict.fromkeys(data): TypeError: unhashable type: 'numpy.ndarray'
Here is the remainder of the other parts of the code that generate the files and dictionaries I am using. I was told in another question I asked that this could be helpful.
# importing dependencies
import math
import sys
import pandas as pd
import MDAnalysis as mda
import os
import numpy as np
import csv
import matplotlib.pyplot as plt
################################################################################
###############################################################################
Directory = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/Blah'
os.chdir(Directory)
################################################################################
''' We are only looking at the positions of the CLAs and SODs and not the DRUDE counterparts. We are assuming the DRUDE
are very close and it is not something that needs to be concerned with'''
def Positions(dcd, topo):
fields = ['Window', 'ION', 'ResID', 'Location', 'Position', 'Frame', 'Final']
with open('{}_Atoms.csv'.format(s), 'a') as d:
writer = csv.writer(d)
writer.writerow(fields)
d.close()
CLAs = u.select_atoms('segid IONS and name CLA')
SODs = u.select_atoms('segid IONS and name SOD')
CLA_res = len(CLAs)
SOD_res = len(SODs)
frame = 0
for ts in u.trajectory[-10:]:
frame +=1
CLA_pos = CLAs.positions[:,2]
SOD_pos = SODs.positions[:,2]
for i in range(CLA_res):
ids = i + 46
if CLA_pos[i] < 0:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'CLA', ids, 'Bottom', CLA_pos[i], frame,10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
else:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'CLA', ids, 'Top', CLA_pos[i], frame, 10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
for i in range(SOD_res):
ids = i
if SOD_pos[i] < 0:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'SOD', ids, 'Bottom', SOD_pos[i], frame,10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
else:
with open('{}_Atoms.csv'.format(s), 'a') as q:
new_line = [s,'SOD', ids, 'Top', SOD_pos[i], frame, 10]
writes = csv.writer(q)
writes.writerow(new_line)
q.close()
csv_Data = pd.read_csv('{}_Atoms.csv'.format(s))
filename = s + '_Atom.csv'
sorted_df = csv_Data.sort_values(["ION", "ResID", "Frame"],
ascending=[True, True, True])
sorted_df.to_csv(filename, index = False)
os.remove('{}_Atoms.csv'.format(s))
''' this function underneath looks at the ResIds, compares them to make sure they are the same and then counts how many
times the ion flip flops around the boundaries'''
def turn_dict(f):
read = open(f)
reader = csv.reader(read, delimiter=",", quotechar = '"')
my_dict = {}
new_list = []
for row in reader:
new_list.append(row)
for i in range(len(new_list[:])):
prev = i - 1
if new_list[i][2] == new_list[prev][2]:
if new_list[i][3] != new_list[prev][3]:
if new_list[i][2] in my_dict:
my_dict[new_list[i][2]] += 1
else:
my_dict[new_list[i][2]] = 1
return my_dict
def plot_flips(f):
dict = turn_dict(f)
ions = list(dict.keys())
occ = list(dict.values())
plt.bar(range(len(dict)), occ, tick_label = ions)
plt.title("{}".format(s))
plt.xlabel("Residue ID")
plt.ylabel("Boundary Crosses")
plt.savefig('{}_Flip.png'.format(s))
def analyze_time(f, dicts):
read = open(f)
reader = csv.reader(read, delimiter=",", quotechar='"')
new_list = []
keys = list(dicts.keys())
time_dict = {}
pos_matrix = {}
for row in reader:
new_list.append(row)
fields = ['ResID', 'Position', 'Frame']
with open('{}_A_pos.csv'.format(s), 'a') as k:
writer = csv.writer(k)
writer.writerow(fields)
k.close()
for i in range(len(new_list[:])):
if new_list[i][2] in keys:
with open('{}_A_pos.csv'.format(s), 'a') as k:
new_line = [new_list[i][2], new_list[i][4], new_list[i][5]]
writes = csv.writer(k)
writes.writerow(new_line)
k.close()
read = open('{}_A_pos.csv'.format(s))
reader = csv.reader(read, delimiter=",", quotechar='"')
time_list = []
for row in reader:
time_list.append(row)
for j in range(len(keys)):
for i in range(len(time_list[1:])):
if time_list[i][0] == keys[j]:
pos_matrix[time_list[i][2]] = time_list[i][1]
time_dict[keys[j]] = pos_matrix
return time_dict
window_dict = {}
for filename in os.listdir(Directory):
s = filename.split('.dcd')[0]
fors = s + '.txt'
topos = '/Users/hxb51/Desktop/Q_prof/Displacement_Charge/topo.psf'
if filename.endswith('.dcd'):
print('We are starting with {} \n '.format(s))
u = mda.Universe(topos, filename)
Positions(filename, topos)
name = s + '_Atom.csv'
plot_flips(name)
window_dict[s] = turn_dict(name)
continue
time_an_dict = {}
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
for filename in os.listdir(Directory):
if filename.endswith('.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in time_an_dict:
atom = list(time_an_dict[s])
ion = time_an_dict[s]
for f in time_an_dict[s]:
x_val = []
y_val = []
fz = ion[f]
for i in time_an_dict[s][f]:
pos = (fz[i])
frame = i
y_val.append(frame)
x_val.append(pos)
'''ions = atom
frame = frames
position = pos
plt.plot(frame, position, label = frames)
plt.xlabel("Frame")
plt.ylabel("Position")
plt.show()
#plt.savefig('{}_Pos.png'.format(s))'''
Everything here runs well except this last bottom block of code. That deals with trying to make a graph from a nested dictionary. Any help would be appreciated!
Thanks!
I figured out the answer:
for filename in os.listdir(Directory):
if filename.endswith('_Atom.csv'):
q = filename.split('.csv')[0]
s = q.split('_')[0]
if s in window_dict:
name = s + '_Atom.csv'
time_an_dict[s] = analyze_time(name,window_dict[s])
new = '{}_A_pos.csv'.format(s)
ions = list(time_an_dict[s])
plt.yticks(np.arange(-50, 50, 5))
plt.xlabel('Frame')
plt.ylabel('Z axis position(Ang)')
plt.title([s])
for i in ions:
x_value = []
y_value = []
time_frame =len(time_an_dict[s][i]) +1
for frame in range(1,time_frame):
frame = str(frame)
x_value.append(int(frame))
y_value.append(float(time_an_dict[s][i][frame]))
plt.plot(x_value, y_value, label=[i])
plt.xticks(np.arange(1, 11, 1))
plt.legend()
plt.savefig('{}_Positions.png'.format(s))
plt.clf()
os.remove("{}_A_pos.csv".format(s))
From there, with the combo of the other parts of the code, it produces these graphs:
For more than 1 file as long as there is more '.dcd' files.

Adding new strings line by line from a file to a new one

I have a data output file in the format below from the script I run.
1. xxx %percentage1
2. yyy %percentage1
.
.
.
I am trying to take the percentages only, and append them to the same formatted file line by line (writing a new file once in the process).
1. xxx %percentage1 %percentage2
2. yyy %percentage1 %percentage2
The main idea is every time I run the code with a source data file I want it to add those percentages to the new file line by line.
1. xxx %percentage1 %percentage2 %percentage3 ...
2. yyy %percentage1 %percentage2 %percentage3 ...
This is what I could come up with:
import os
os.chdir("directory")
f = open("data1", "r")
n=3
a = f.readlines()
b = []
for i in range(n):
b.append(a[i].split(" ")[2])
file_lines = []
with open("data1", 'r') as f:
for t in range(n):
for x in f.readlines():
file_lines.append(''.join([x.strip(), b[t], '\n']))
print(b[t])
with open("data2", 'w') as f:
f.writelines(file_lines)
With this code I get the new file but the appending percentages are all from the first line, not different for each line. And I can only get one set of percentages added only and it is overwriting it rather than adding more down the lines.
I hope I explained it properly, if you can give some help I would be glad.
You can use a dict as a structure to load and write your data. This dict can then be pickled to store the data.
EDIT: added missing return statement
EDIT2: Fix return list of get_data
import pickle
import os
output = 'output'
dump = 'dump'
output_dict = {}
if os.path.exists(dump):
with open(dump, 'rb') as f:
output_dict = pickle.load(f)
def read_data(lines):
""" Builds a dict from a list of lines where the keys are
a tuple(w1, w2) and the values are w3 where w1, w2 and w3
are the 3 words composing each line.
"""
d = {}
for line in lines:
elts = line.split()
assert(len(elts)==3)
d[tuple(elts[:2])] = elts[2]
return d
def get_data(data):
""" Recover data from a dict as a list of strings.
The formatting for each element of the list is the following:
k[0] k[1] v
where k and v are the key/values of the data dict.
"""
lines = []
for k, v in data.items():
line = list(k)
line += [v, '\n']
lines.append(' '.join(line))
return lines
def update_data(output_d, new_d):
""" Update a data dict with new data
The values are appended if the key already exists.
Otherwise a new key/value pair is created.
"""
for k, v in new_d.items():
if k in output_d:
output_d[k] = ' '.join([output_d[k], v])
else:
output_d[k] = v
for data_file in ('data1', 'data2', 'data3'):
with open(data_file) as f:
d1 = read_data(f.readlines())
update_data(output_dict, d1)
print("Dumping data", output_dict)
with open(dump, 'wb') as f:
pickle.dump(output_dict, f)
print("Writing data")
with open(output, 'w') as f:
f.write('\n'.join(get_data(output_dict)))

Is there a better way to compare strings in python then what I have written?

So I work for a company and they asked me to write some code to compare some CSV files to see if companies exist in both of them.
The issue they were having with the tools they were using is if a company for example was Apple in one CSV and Apple, Inc in another, they would be considered not a match. So I wrote a script that does the job after a lot of trial and error and a huge cut down from the original code I wrote. But I feel like there should be a package that does this for you already written.
import csv
import copy
newData = open("test.csv", "r")
dataBase = open("CMPList.csv", "r")
testList = list(newData)
test1List = list(dataBase)
preclin = []
cmp = []
newData.close()
dataBase.close()
notInBoth = []
for i in testList:
new = i.strip()
preclin.append(new)
for j in test1List:
new = j.strip()
cmp.append(new)
notInBoth = copy.deepcopy(preclin)
for a in preclin:
for b in cmp:
print(a, b)
if a[0] == b[0]:
if a[:4] in b[:4]:
userinput = input("Are these the same company: [" + a + "] and [" + b + "] [y for yes, n for no]\n")
if userinput == "y":
print("--------------------------------------------------------------\n")
print("["a + "] has been confirmed as the same company as [" + b + "]\n")
print("--------------------------------------------------------------\n")
notInBoth.remove(a)
if userinput == "n":
print("----------------------------------------------------------\n")
print("These companies do not match. Continuing matching process.\n")
print("----------------------------------------------------------\n")
print("All comparisons complete, creating new CSV of companies not in our database.\n")
csvFile = open('NotInDatabase.csv', 'w')
writer = csv.writer(csvFile)
for item in notInBoth:
writer.writerow([item])
csvFile.close()
print("CSV creation complete. Exiting...")

How to avoid repetion with my code

I've written a code that extracts all the words from two files, and only returns the words that are in both of the file.
However, i have done some repetition and that is not considered a good style, so i wondering if it would be possible to avoid this with my code?
import re
def print_common_words(filename_1, filename_2):
try:
input_file = open(filename_1, 'r')
source_string = input_file.read().lower()
input_file.close()
all_words1 = set(re.findall('[a-zA-Z]+', source_string))
input_file = open(filename_2, 'r') #Repetition
source_string = input_file.read().lower() #Repetition
input_file.close() #Repetition
all_words2 = set(re.findall('[a-zA-Z]+', source_string)) #Repetition
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)
except FileNotFoundError:
print("A file could not be found.")
Use a method to factor out the duplicated code.
def get_file(file):
input_file = open(file, 'r')
source_string = input_file.read().lower()
input_file.close()
return set(re.findall('[a-zA-Z]+', source_string))
Call it like:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
Eg:
all_words1 = get_file(filename_1)
all_words2 = get_file(filename_2)
intersection_list = all_words1.intersection(all_words2)
union_list = []
for word in intersection_list:
union_list += [word]
union_list.sort()
for i in union_list:
print(i)

Resources