Using Map in Open Binary - python-3.x

trying to use map() in my script which reads files and converts them into binary form.
Cant get the below to work, any help?
def binary_file_reader(file_data):
with open(file_data, 'rb') as binary_file_data:
binary_file_data = binary_file_data.read()
print(binary_file_data)
binary_data = binascii.hexlify(binary_file_data)
binary_data = binary_data.decode("utf-8")
return binary_data
Then the main which calls the above
if __name__ == "__main__":
device_directory = os.getcwd()
for r, d, f in os.walk(device_directory):
for file in f:
file_data = os.path.join(r, file)
all_file_names.append(file_data)
try:
binary_data = map(binary_file_reader, all_file_names)
print(binary_data)
except IOError:
print("cannot read")

Because map applies binary_file_reader to every element inside file_data, it doesn't do what you think it is.
In your case, file_data is your actual file path as a str, e.g., /tmp/a.txt. If you use map on a str, it is applied on every letter, so what you do will be expanded to
binary_file_reader('/')
binary_file_reader('t')
binary_file_reader('m')
binary_file_reader('p')
binary_file_reader('/')
binary_file_reader('a')
binary_file_reader('.')
binary_file_reader('t')
binary_file_reader('x')
binary_file_reader('t')
binary_file_reader(file_data) should produce the desired result.

Related

List is empty when appending when using recursion

I have two functions. The first one is used to get a list of paths to text files, and the second one is used to iterate over this list of paths and then check if they include the word password. But because of the Try Except statement in the second function, I had to use recursion to make it continue running unless there's another way if possible to provide below. My problem is that the list returned in the second function is empty why and how to fix it?
def search_txt():
"""Function to search the C:\\ for .txt files -> then add them (including full path to file) to a list."""
list_of_txt = []
for dir_path, sub_dir, files in os.walk("C:\\"):
"""Method 1 -> checks the end of the file name (could be used for specific extensions)"""
for file in files:
if file.endswith(".txt"):
list_of_txt.append(os.path.join(dir_path, file))
return list_of_txt
def search_pass_file(list_of_files: list):
"""Function to iterate over each text file, searching if the word "password" is included -> Returns the text
file's path """
list_of_pass = []
if len(list_of_files) != 0:
for i in range(len(list_of_files)):
file = list_of_files.pop()
try:
with open(file, encoding="utf8") as f:
for line in f.readlines():
if "password" in line:
list_of_pass.append(file)
except UnicodeDecodeError:
return search_pass_file(list_of_files)
except PermissionError:
return search_pass_file(list_of_files)
else:
return list_of_pass
if __name__ == '__main__':
myList = search_txt()
print(search_pass_file(myList))
You're returning list_of_pass only if len(list_of_files) == 0 (it's in the else block). Your return statement should occur after the loop (which should be a while one btw)
You can except several errors in one line by putting them in parenthesis: except (UnicodeDecodeError, PermissionError) of except all exceptions (for instance, you're not handling FileNotFoundError).
I'd reduce your function to:
def search_pass_file(list_of_files: list):
"""Function to iterate over each text file, searching if the word "password" is included -> Returns the text
file's path """
list_of_pass = []
while list_of_files:
file = list_of_files.pop()
try:
with open(file, encoding="utf8") as f:
for line in f.readlines():
if "password" in line:
list_of_pass.append(file)
break
except Exception:
list_of_pass += search_pass_file(list_of_files)
return list_of_pass
Edit: also in your except block, you should append the returned value of the recursive function to list_of_pass otherwise you'll lose the files found after the error occurs.

Why does configparser overwrite my existing files when reading from within an open() loop?

I'm trying to manipulate a config file that will look like this:
[DEFAULT]
first_api_key = foobar
second_api_key = helloworld
Now say I'd like to replace first_api_key with "guccimane" instead of "foobar". Here's the function I'm using to do that:
def my_func():
k, v = "first_api_key", "guccimane"
file = os.path.join("/example/path", "settings.ini")
config = configparser.ConfigParser()
with open(file, "w") as f:
config.read(file)
config["DEFAULT"][k] = v
config.write(f)
However when I run that, second_api_key is removed and I'm left with:
[DEFAULT]
first_api_key = foobar
However when I change my function slightly to be:
def my_func():
k, v = "first_api_key", "guccimane"
file = os.path.join("/example/path", "settings.ini")
config = configparser.ConfigParser()
config.read(file)
with open(file, "w") as f:
config["DEFAULT"][k] = v
config.write(f)
It works exactly the way I want it to, and it only alters the first_api_key value and the second_api_key remains. Does anyone know why this is getting overwritten?

Type hinting a method that returns a string or error in Python 3.7+

I have a Python script that starts with a method searching for a CSV file in the current directory or downloads a directory to make some processing. If no CSV file is found, the program should not run and exit with an error message.
I type annotated a tentative method as follows:
import glob
import os
def get_csv_filename() -> str:
""" Returns first csv filename in current folder or Downloads folder """
csv_filenames = glob.glob('*.csv')
if csv_filenames:
return csv_filenames[0]
home = os.path.expanduser("~")
csv_filenames = glob.glob(home + "/Downloads/*.csv")
if csv_filenames:
return csv_filenames[0]
# If I don't use return, I also get problems with pylint
return exit("Error: no file found, check the documentation for more info.")
def main() -> None:
""" Reads a CSV and does some processing """
filename = get_csv_filename()
If I type check with eg. pytype I get the error:
get_csv_filename: bad option in return type [bad-return-type]
Expected: str
Actually returned: None
What would you recommend doing to make this code compliant?
This section of PEP 484 may be helpful. I don't have mypy or pytype installed to try it, but maybe this would work:
from typing import NoReturn
def get_csv_filename() -> str:
""" Returns first csv filename in current folder or Downloads folder """
csv_filenames = glob.glob('*.csv')
if csv_filenames:
return csv_filenames[0]
...
stop("Error: no file found, check the documentation for more info.")
return ""
def stop(msg) -> NoReturn:
exit(msg)
Another option would be:
from typing import Union
def get_csv_filename() -> Union[None, str]:
...
From the ideas of Steve Bremer's response, the problem can be solved with a simpler approach:
from typing import Optional
def get_csv_filename() -> Optional[str]:
...
In fact, Optional[something] is equivalent to Union[None, something].

Store scrape results and search in results with Python and Pandas?

as part of my Ph.D. research, I am scraping numerous webpages and search for keywords within the scrape results.
This is how I do it thus far:
# load data with as pandas data frame with column df.url
df = pd.read_excel('sample.xls', header=0)
# define keyword search function
def contains_keywords(link, keywords):
try:
output = requests.get(link).text
return int(any(x in output for x in keywords))
except:
return "Wrong/Missing URL"
# define the relevant keywords
mykeywords = ('for', 'bar')
# store search results in new column 'results'
df['results'] = df.url.apply(lambda l: contains_keywords(l, mykeywords))
This works just fine. I only have one problem: the list of relevant keywords mykeywordschanges frequently, whilst the webpages stay the same. Running the code takes a long time, since I request over and over.
I have two questions:
(1) Is there a way to store the results of request.get(link).text?
(2) And if so, how to I search within the saved file(s) producing the same result as with the current script?
As always, thank you for your time and help! /R
You can download the content of the urls and save them in separate files in a directory (eg: 'links')
def get_link(url):
file_name = os.path.join('/path/to/links', url.replace('/', '_').replace(':', '_'))
try:
r = requests.get(url)
except Exception as e:
print("Failded to get " + url)
else:
with open(file_name, 'w') as f:
f.write(r.text)
Then modify the contains_keywords function to read local files, so you won't have to use requests every time you run the script.
def contains_keywords(link, keywords):
file_name = os.path.join('/path/to/links', link.replace('/', '_').replace(':', '_'))
try:
with open(file_name) as f:
output = f.read()
return int(any(x in output for x in keywords))
except Exception as e:
print("Can't access file: {}\n{}".format(file_name, e))
return "Wrong/Missing URL"
Edit: i just added a try-except block in get_link and used absolute path for file_name

File won't open when being passed from a function

How come it's not opening the file I put into the function? It opens when I plug the file name directly into the main program, but not when I try to pass it through the function. It gives me a FileNotFoundError.
def get_valid_filename(prompt):
'''Use prompt (a string) to ask the user to type the name of a file. If
the file does not exist, keep asking until they give a valid filename.
Return the name of that file.'''
filename = input(prompt)
if os.path.isfile(filename) == False:
print ("That file does not exist.")
filename = input(prompt)
return filename
if __name__ == '__main__':
prompt = 'enter the name of the file with unknown author:'
mystery_filename = get_valid_filename(prompt)
# readlines gives us the file as a list of strings each ending in '\n'
text = open(mystery_filename, 'r').read()
print (text)
get_valid_filename should look like this:
def get_valid_filename(prompt):
while True:
filename = input(prompt):
if os.path.exists(filename):
return filename
print('that file does not exist')

Resources