I have a text file that I am trying to write to a JSON file. Some of the values are returned as None, True or False. I need to replace None with "None" (string), True with "True" and False with "False"
I tried adding the line
data=data.replace(None,"None")
However, I get an error
Traceback (most recent call last):
File "parse_get_drivers.py", line 17, in <module>
data=data.replace(None,"None")
TypeError: replace() argument 1 must be str, not None
Here is my script
import json
import re
from pprint import pprint
import pandas as pd
inHandler = open('get_drivers.txt', 'r')
outHandler = open('drivers.json', 'w')
data = ''
for line in inHandler.readlines():
print('src:' + line)
line = line.replace("}]},","}]},\r")
data += line
print('replace:' + line)
data=data.replace("'", '"')
data=data.replace(None,"None")
outHandler.write(data)
inHandler.close()
outHandler.close()
The required result is to replace None, True and False values with "None", "True" and "False".
You should parse the input as JSON instead of parsing it line by line as separate strings, so that you can recursively traverse the data structure to replace None (or in JSON's terms, null) with "None":
def replace(data, search, replacement, parent=None, index=None):
if data == search:
parent[index] = replacement
elif isinstance(data, (list, dict)):
for index, item in enumerate(data) if isinstance(data, list) else data.items():
replace(item, search, replacement, parent=data, index=index)
so that:
import json
d = json.loads('{"a": 1, "b": [1, null], "c": {"d": null}}')
print(d)
replace(d, None, 'None')
print(d)
print(json.dumps(d))
outputs:
{'a': 1, 'b': [1, None], 'c': {'d': None}}
{'a': 1, 'b': [1, 'None'], 'c': {'d': 'None'}}
{"a": 1, "b": [1, "None"], "c": {"d": "None"}}
Related
as in the title I am using an a*search method for this graph. The most of the code was provided, I modified it to go from A to M with the appropriate values. However, I keep getting this error:
Traceback (most recent call last):
File "main.py", line 94, in <module>
graph1.a_star_algorithm('A','M')
File "main.py", line 57, in a_star_algorithm
for (m,weight) in self.get_neighbors(n):
ValueError: not enough values to unpack (expected 2, got 1)
I have tried making M null M [] that has not worked. I am just not sure on where to go from here. Any advice would be great.
from collections import deque
class Graph:
def __init__(self,adjac_lis):
self.adjac_lis=adjac_lis
def get_neighbors(self,v):
return self.adjac_lis[v]
def h(self, n):
H={
'A': 1,
'B': 1,
'C': 1,
'D': 1,
'E': 1,
'F': 1,
'G': 1,
'H': 1,
'I': 1,
'J': 1,
'K': 1,
'L': 1,
'N': 1,
'M': 1
}
return H[n]
def a_star_algorithm(self,start, stop):
open_lst=set([start])
closed_lst=set([])
dist={}
dist[start] = 0
par={}
par[start]=start
while len(open_lst) > 0:
n = None
for v in open_lst:
if n==None or dist[v]+self.h(v)<dist[n]+self.h(n):
n=v
if n==None:
print('Path does not exist!')
return None
if n==stop:
reconst_path=[]
while par[n]!=n:
reconst_path.append(n)
n=par[n]
reconst_path.append(start)
reconst_path.reverse()
print('Path found: {}'.format(reconst_path))
return reconst_path
for (m,weight) in self.get_neighbors(n):
if m not in open_lst and m not in closed_lst:
open_lst.add(m)
par[m]=n
dist[m]=dist[n]+weight
else:
if dist[m]>dist[n]+weight:
dist[m]=dist[n]+weight
par[m]=n
if m in closed_lst:
closed_lst.remove(m)
open_lst.add(m)
open_lst.remove(n)
closed_lst.add(n)
print('Path does not exist!')
return None
adjac_lis={
'A':[('B',10),('C',4)],
'B':[('G',2),('E'),9],
'C':[('D',10),('F',7),('E',8)],
'D':[('C',10)],
'E':[('J',7),('H',3)],
'F':[('E',4)],
'G':[('I',5)],
'H':[('I',2),('L',6)],
'I':[('L',3)],
'J':[('E',7),('L',2),('K',3),('M',10)],
'K':[('M',6)],
'L':[('N',3)],
'N':[('M',5)],
'M':[('N',5)],
}
graph1=Graph(adjac_lis)
graph1.a_star_algorithm('A','M')
I have a list of dictionaries in the form:
my_list = [{'a': 'Jane', 'b': 32}, {'a': 'Jack', 'b': 54}]
I want to re-order this to the form:
new_dt = [{'b': 32, 'a': 'Jane'}, {'b': 54, 'a': 'Jack'}]
I have used the following code:
order_dict = ['b', 'a']
for dt in my_list:
for k in order_dict:
new_dt = my_list[k]
Traceback
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-87-0d451ee34800> in <module>
3 for dt in my_list:
4 for k in order_dict:
----> 5 new_dt = my_list[k]
TypeError: list indices must be integers or slices, not str
You can simply do a:
my_list = [{'a': 'Jane', 'b': 32}, {'a': 'Jack', 'b': 54}]
print([dict(sorted(x.items(),key = lambda x:x[0],reverse=True)) for x in my_list])
The OUTPUT:
[{'b': 32, 'a': 'Jane'}, {'b': 54, 'a': 'Jack'}]
Explanation
sorted(x.items(),key = lambda x:x[0],reverse=True)
Here we are just sorting the items (keys and values) in the dictionary to get the order as required.
key = lambda x:x[0]
This component ensures that we are sorting based on the first element in items(essentially the key of dictionary)
We have reverse Set to true to get the desired sequence.
I have a text file below:
A test B echo C delete
A test B echo C delete D modify
A test B echo C delete
I want to parse the text file above, translate to list of list, and then to a dictionary.
Expected list of list is:
[['A', 'test', 'B', 'echo', 'C', 'delete'], ['A', 'test', 'B', 'echo', 'C', 'delete', 'D', 'modify'], ['A', 'test', 'B', 'echo', 'C', 'delete']]
Final result for dictionary is:
[{'A':'test','B':'echo','C':'delete'},{'A':'test','B':'echo','C':'delete','D': 'modify'},{'A':'test', 'B':'echo', 'C':'delete'}]
This is my script:
#!/usr/bin/python3
def listToDict(list):
listDict = {list[i]: list[i + 1] for i in range (0, len(list), 2)}
return listDict
def parse_file(filepath):
string_to_listoflist = []
with open(filepath, 'r') as file_object:
lines = file_object.readlines()
for line in lines:
string_to_listoflist.append(line.rstrip().split())
dictionary = listToDict(string_to_listoflist)
print(dictionary)
if __name__ == '__main__':
filepath = 'log.txt'
parse_file(filepath)
with the above script will produce an error below:
Traceback (most recent call last):
File "parse.py", line 19, in <module>
parse_file(filepath)
File "parse.py", line 14, in parse_file
dictionary = listToDict(string_to_listoflist)
File "parse.py", line 4, in listToDict
listDict = {list[i]: list[i + 1] for i in range (0, len(list), 2)}
File "parse.py", line 4, in <dictcomp>
listDict = {list[i]: list[i + 1] for i in range (0, len(list), 2)}
TypeError: unhashable type: 'list'
Now I create another loop in the list of list below:
#!/usr/bin/python3
def listToDict(list):
listDict = {list[i]: list[i + 1] for i in range (0, len(list), 2)}
return listDict
def parse_file(filepath):
string_to_listoflist = []
dictionary = {}
with open(filepath, 'r') as file_object:
lines = file_object.readlines()
for line in lines:
string_to_listoflist.append(line.rstrip().split())
for e in string_to_listoflist:
dictionary = listToDict(e)
print(dictionary)
if __name__ == '__main__':
filepath = 'log.txt'
parse_file(filepath)
The script above will produce unexpected result even I define the dictionary variable before the loop:
{'A': 'test', 'B': 'echo', 'C': 'delete'}
Then change the position of print command as below:
#!/usr/bin/python3
def listToDict(list):
listDict = {list[i]: list[i + 1] for i in range (0, len(list), 2)}
return listDict
def parse_file(filepath):
string_to_listoflist = []
dictionary = {}
with open(filepath, 'r') as file_object:
lines = file_object.readlines()
for line in lines:
string_to_listoflist.append(line.rstrip().split())
for e in string_to_listoflist:
dictionary = listToDict(e)
print(dictionary)
if __name__ == '__main__':
filepath = 'log.txt'
parse_file(filepath)
Unexpected result for the script above is:
{'A': 'test', 'B': 'echo', 'C': 'delete'}
{'A': 'test', 'B': 'echo', 'C': 'delete', 'D': 'modify'}
{'A': 'test', 'B': 'echo', 'C': 'delete'}
Can anyone help how to resolve my issue?
Thanks
In your first attempt, your variable string_to_listoflist is a list of lists.
When you pass it to your function listToDict, the function iterates on the parent level of the list instead of iterating over each list within the parent list. Thus, the first entry attempted in the dictionary is
['A', 'test', 'B', 'echo', 'C', 'delete']:['A', 'test', 'B', 'echo', 'C', 'delete', 'D', 'modify']
rather than your intended
'A':'test'
This causes the error you observe TypeError: unhashable type: 'list' since a list (mutable) is attempted to be used as a key in a dictionary, which requires immutable keys.
Adding the extra loop surrounding each element of the parent list is the correct way to resolve this. However, if you want your final result to be inside a list, you simply need to append the result to a list.
In other words, perhaps the following
dictionaries=[]
for e in string_to_listoflist:
dictionary = listToDict(e)
dictionaries.append(dictionary)
print(dictionaries)
You can use re module to obtain your desired dict.
For example:
import re
with open('file.txt', 'r') as f_in:
out = [dict(re.findall(r'([A-Z]+) ([^\s]+)', line)) for line in f_in]
print(out)
Prints:
[{'A': 'test', 'B': 'echo', 'C': 'delete'}, {'A': 'test', 'B': 'echo', 'C': 'delete', 'D': 'modify'}, {'A': 'test', 'B': 'echo', 'C': 'delete'}]
I have a library function that returns a compound object containing generators, which can't be pickled (trying to pickle generates the error TypeError: can't pickle dict_keys objects).
When I try to parallelize via Spark, it fails on the collect step, due to pickle failure (nb. running via DataBricks with default sc).
Here is a minimal repro:
test_list = [{"a": 1, "b": 2, "c": 3},
{"a": 7, "b": 3, "c": 5},
{"a": 2, "b": 3, "c": 4},
{"a": 9, "b": 8, "c": 7}]
parallel_test_list = sc.parallelize(test_list)
parallel_results = parallel_test_list.map(lambda x: x.keys())
local_results = parallel_results.collect()
The stack trace I receive is long, I think the relevant part is:
Traceback (most recent call last):
File "/databricks/spark/python/pyspark/worker.py", line 403, in main
process()
File "/databricks/spark/python/pyspark/worker.py", line 398, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/databricks/spark/python/pyspark/serializers.py", line 418, in dump_stream
bytes = self.serializer.dumps(vs)
File "/databricks/spark/python/pyspark/serializers.py", line 597, in dumps
return pickle.dumps(obj, protocol)
TypeError: can't pickle dict_keys objects
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:490)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:626)
at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:609)
You can write a recursive helper function to "consume" all the nested generator objects, and map all your rows in your rdd with this function.
For example, here's a function that will turn nested generators into lists:
from inspect import isgenerator, isgeneratorfunction
def consume_all_generators(row):
if isinstance(row, str):
return row
elif isinstance(row, dict):
return {k: consume_all_generators(v) for k, v in row.items()}
output = []
try:
for val in row:
if isgenerator(val) or isgeneratorfunction(val):
output.append(list(consume_all_generators(val)))
else:
output.append(consume_all_generators(val))
return output
except TypeError:
return row
Now call map(consume_all_generators) before collect:
local_results = parallel_results.map(consume_all_generators).collect()
print(local_results)
#[['a', 'c', 'b'], ['a', 'c', 'b'], ['a', 'c', 'b'], ['a', 'c', 'b']]
I have a text file that I'm trying to get the most commonly used words with. I'm using Counter, but it just seems to return 1 for each one.
I'm learning, so I'm using the Simple Sabotage Field Manual for my text file.
import re
from collections import Counter
my_file = "fieldManual.txt"
#### GLOBAL VARIABLES
lst = [] # used in unique_words
cnt = Counter()
#########
def clean_word(the_word):
#new_word = re.sub('[^a-zA-Z]', '',the_word)
new_word = re.sub('^[^a-zA-z]*|[^a-zA-Z]*$', '', the_word)
return new_word
def unique_words():
with open(my_file, encoding="utf8") as infile:
for line in infile:
words = line.split()
for word in words:
edited_word = clean_word(word)
if edited_word not in lst:
lst.append(edited_word)
cnt[edited_word] += 1
lst.sort()
word_count = Counter(lst)
return(lst)
return (cnt)
unique_words()
test = ['apple','egg','apple','banana','egg','apple']
print(Counter(lst)) # returns '1' for everything
print(cnt) # same here
So, print(Counter(test)) returns, correctly,
Counter({'apple': 3, 'egg': 2, 'banana': 1})
But my attempts to print the most frequent words in my lst returns
Counter({'': 1, 'A': 1, 'ACTUAL': 1, 'AGREE': 1, 'AGREEMENT': 1, 'AK': 1, 'AND': 1, 'ANY': 1, 'ANYTHING': 1, 'AR': 1, 'AS-IS': 1, 'ASCII': 1, 'About': 1, 'Abstract': 1, 'Accidentally': 1, 'Act': 1, 'Acts': 1, 'Add': 1, 'Additional': 1, 'Adjust': 1, 'Advocate': 1, 'After': 1, 'Agriculture': 1, ...
Following the answer from here, I tried adding cnt.Update(edited_word) in the if edited_word not in lst:, but then printing cnt I just get single characters:
Counter({'e': 2401, 'i': 1634, 't': 1470, 's': 1467, 'n': 1455, 'r': 1442, 'a': 1407, 'o': 1244, 'l': 948, 'c': 862, 'd': 752, 'u': 651, 'p': 590, 'g': 564, 'm': 436, ...
How do I return the frequency of each unique word from my .txt file?
You only append the word to the list if it isn't already found. As such, every word will only show up once.
There are a few things wrong here. You should either increment the counter regardless of whether the word is in the list, or just call the counter on the list from the split string. You have back to back return statements(the second won't be executed). You are finding the count of the list with word_count and then ignoring that output(which would also be 1 for every word). Just cleaning up this code probably would have helped solve the problem.