Gooey from argument to read file - python-3.x

So the first argument is the file to open and the second argument is the pattern (or text) to search for.
The program is made to scan a document and find items equal to "Pattern" and print the detector address in "DetectorPattern". I got this program working without Gooey but i thought about adding it for ease of use. My problem lies when the argument get passed to the "with open(filename)" line.
This is the error i get:
Traceback (most recent call last):
File "C:/Users/haral/Google Drive (synkroniseres ikke)/Programmering/Programmer/LogSearch/LogSearchGooey.py", line 42, in <module>
main()
File "C:\Users\haral\PycharmProjects\AutomateBoringStuff\venv\lib\site-packages\gooey\python_bindings\gooey_decorator.py", line 134, in <lambda>
return lambda *args, **kwargs: func(*args, **kwargs)
File "C:/Users/haral/Google Drive (synkroniseres ikke)/Programmering/Programmer/LogSearch/LogSearchGooey.py", line 27, in main
with open(filename, 'r') as reader:
TypeError: expected str, bytes or os.PathLike object, not Namespace
import os
import re
from gooey import Gooey, GooeyParser
pattern = ""
# Chosen search pattern
detectorPattern = re.compile(r'\d\d\.\d\d\d')
# Fire alarm detector pattern, etc. 03.040
filename = ""
foundDetector = []
#Gooey
def main():
parser = GooeyParser(description="Testing")
parser.add_argument(
"Filename",
help="Choose a file",
widget="FileChooser"
)
parser.add_argument(
"store",
help="Choose a pattern to search for"
)
filename = parser.parse_args()
with open(filename, 'r') as reader:
# Read and print the entire file line by line
for line in reader:
findLine = re.search(pattern, line)
if findLine is not None:
mo = detectorPattern.search(findLine.string)
mog = mo.group()
if mog not in foundDetector:
foundDetector.append(mog)
for x in foundDetector:
print(x)
if __name__ == '__main__':
main()

Related

Pygtk Liststore most recent call last after Liststore update

I created simple code in PyGtk that displays a list of names. After clicking the Update button, the list of names will be updated. Unfortunately, I can't get rid of the "most recent call last" error that appears after clicking the Update button. Can anyone advise me on the problem? This is probably a problem with a line of code under "# select and row".
EDIT:
Script returns error message:
Traceback (most recent call last):
File "/home/radek/Desktop/stackowr1.py", line 18, in choiceRow
line = model[treeiter][0]
File "/usr/lib/python3/dist-packages/gi/overrides/Gtk.py", line 849, in __getitem__
aiter = self._getiter(key)
File "/usr/lib/python3/dist-packages/gi/overrides/Gtk.py", line 837, in _getiter
aiter = self.get_iter(key)
File "/usr/lib/python3/dist-packages/gi/overrides/Gtk.py", line 871, in get_iter
path = self._coerce_path(path)
File "/usr/lib/python3/dist-packages/gi/overrides/Gtk.py", line 846, in _coerce_path
return TreePath(path)
File "/usr/lib/python3/dist-packages/gi/overrides/Gtk.py", line 1210, in __new__
path = ":".join(str(val) for val in path)
TypeError: 'NoneType' object is not iterable
My code:
```python
# -*- coding: utf-8 -*-
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk
class MyWindow(Gtk.Window):
def __init__(self):
super(MyWindow, self).__init__()
self.set_border_width(3)
self.set_default_size(800, 600)
self.name1 = "John"
self.name2 = "George"
def choiceRow(selection):
model, treeiter = selection.get_selected()
line = model[treeiter][0]
print(line)
def update_name(self):
self.name1 = "Jeane"
self.name2 = "Margot"
print(self.name1, self.name2)
win.liststore.clear()
win.liststore.append([self.name1])
win.liststore.append([self.name2])
button = Gtk.Button(label = "Update")
button.connect("clicked", update_name)
self.layout = Gtk.Layout()
self.tree = Gtk.TreeView()
self.liststore = Gtk.ListStore(str)
self.tree.set_model(self.liststore)
self.liststore.append([self.name1])
self.liststore.append([self.name2])
render = Gtk.CellRendererText()
self.column = Gtk.TreeViewColumn("ID", render, text=0)
self.tree.append_column(self.column)
# select a row
selectetRow = self.tree.get_selection()
selectetRow.connect("changed", choiceRow)
self.layout.put(self.tree, 0,0)
self.layout.put(button, 0,100)
self.add(self.layout)
win = MyWindow()
win.connect("destroy", Gtk.main_quit)
win.show_all()
Gtk.main()
Gtk.TreeSelection.get_selected returns None when nothing is selected, after the update call there's nothing selected so you get None for treeiter and you try to access model[None][0] which obviously must fail. You need to check whether the returned iter is valid before trying to use it so just change your choiceRow function to
def choiceRow(selection):
model, treeiter = selection.get_selected()
if treeiter:
line = model[treeiter][0]
print(line)

Pathlib is taking an os.PathLike object, and interpereting it as bool

The Problem
The project I'm currently working on has an inexplicable error, which either I'm too dumb to figure out, or is just that obscure and technical.
I'm trying to first locate a directory, returning the path to it, and then checking if a sub-directory exists within it.
from pathlib import Path
from os.path import isdir
from os import getenv
from subprocess import Popen
def find_dlDir():
example_dlHomeDirs = [
Path(getenv("HOME"), 'downloads'),
Path(getenv("HOME"), 'Downloads'),
]
if dlHome := getenv("DOWNLOADS_HOME") != None:
return dlHome
else:
for path in example_dlHomeDirs:
if isdir(path):
return path
return None
def dirExists(dirName: str):
if dlHome := find_dlHome() != None:
if isdir(Path(dlHome, dirName)):
return True
else:
return False
else:
print("No Downloads Folder found.\nTo resolve, create a new folder in \
your home folder with one of the following names:")
[print(name) for name in ['downloads', 'Downloads']]
exit(1)
def mkdir(path: Path, dirToMake: str):
"""
Make a directory with the name <dirToMake> at <path>
kwargs["path"]? Parent directory of <dirToMake>
kwargs["dirToMake"]? Name of the to-be-made directory
"""
Popen("mkdir", f"{str(path)}/{dirToMake}")
if __name__ == "__main__":
dir = "example"
if not dirExists(dirName=dir):
mkdir(path=getenv("DOWNLOADS_HOME"), dirToMake=dir)
The following code should-- with the filesystem below-- run: mkdir $HOME/Downloads/example.
/Users/dickssau000 ¬
----Downloads ¬
--------github.com
--------youtube.com
--------mega.nz
Instead, I get a traceback:
Traceback (most recent call last):
File "/Users/dickssau000/.local/src/github.com/Saul-Dickson/dl/test.py", line 48, in <module>
if not dirExists(dirName=dir):
File "/Users/dickssau000/.local/src/github.com/Saul-Dickson/dl/test.py", line 24, in dirExists
if isdir(Path(dlHome, dirName)):
File "/usr/local/Cellar/python#3.9/3.9.1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/pathlib.py", line 1071, in __new__
self = cls._from_parts(args, init=False)
File "/usr/local/Cellar/python#3.9/3.9.1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/pathlib.py", line 696, in _from_parts
drv, root, parts = self._parse_args(args)
File "/usr/local/Cellar/python#3.9/3.9.1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/pathlib.py", line 680, in _parse_args
a = os.fspath(a)
TypeError: expected str, bytes or os.PathLike object, not bool
I'm absolutely sure that on line 24, the variables dlHome and dirName are os.PathLike and str respectively. Neither of those variables should have the type of bool, and I'm completely stumped on how to fix it. Does anyone have a clue as to what's going on here?
python: 3.9
if dlHome := find_dlHome() != None:
...
This is equivalent to
if dlHome := (find_dlHome() != None):
...
meaning dlHome is of type bool, not the result of find_dlHome()! You want this instead:
if (dlHome := find_dlHome) is not None:
...
(you should also do None checks using is/is not instead of ==/!=)

Dataclass that can 'yeld' a new line from a file when requested

With something like the following code I've used until now to create a generator that would read a very big file line by line and allow me to work on each line as I wish.
def readfile(self):
with open(self.filename) as infile:
for line in infile:
yield line
What would be a good way to edit this so as to get the new line every time I am calling a function, e.g.:
#dataclass
class Reader:
filename: str
line: int = field(default=None)
def __post_init__(self):
self.file = open(self.filename)
self.line = 1
def __del__(self):
self.file.close()
def next_line(self):
...
So ideally I would call next_line and get back the next line of file filename.
I don't quite understand why you want to create a class just to call a method that returns the next line of a file so I created just a function. If you need to, you can use it as a method in some class as well.
A file object IS already a generator. Therefore you can call the __next()__ method directly on the file object. __next__() returns the next value of a generator.
def next_line():
return f.__next__()
with open('file.txt') as f:
print(next_line()) # a line
second = next_line() # next line
print(next_line()) # next line
Or you can even omit the function completely:
with open('file.txt') as f:
print(f.__next__()) # a line
second = f.__next__() # next line
print(f.__next__()) # next line

Variables that should be defined are giving me NameErrors, how do I fix it?

I'm trying to append values to data to dump into a json file, but I keep getting this error:
Traceback (most recent call last):
File "C:\Users\techn\Anaconda3\lib\site-packages\scrapy\utils\defer.py", line 102, in iter_errback
yield next(it)
File "C:\Users\techn\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\offsite.py", line 30, in process_spider_output
for x in result:
File "C:\Users\techn\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\referer.py", line 339, in <genexpr>
return (_set_referer(r) for r in result or ())
File "C:\Users\techn\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "C:\Users\techn\Anaconda3\lib\site-packages\scrapy\spidermiddlewares\depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "C:\Users\techn\scrapy\KYCSpider\KYCSpider\spiders\kycspider.py", line 92, in parse
data['Government Members'].append({
NameError: name 'data' is not defined
This problem occurred earlier with another variable, where it would not say it's defined, despite being defined outside of the function. I'm at a loss as to what I'm doing wrong here.
class KYCSpider(scrapy.Spider):
name = 'kycspider'
start_urls = [
'http://www.vlada.si/en/about_the_government/members_of_government/'
]
allowed_domains = ['www.vlada.si']
maxdepth = 1
isNewDoc = False
oldData = ''
newFile = ''
data = {}
data['Government Members'] = []
def spider_opened(self):
print("OPENED SPIDER")
global newFile, oldData, isNewDoc
#If data.json exists, copy its data into a string and trunctuate it
try:
oldFile = open('data.json', 'r')
oldData = oldFile.read()
isNewDoc = False
#If data.json file doesn't exist, tell spider that this is a new doc
except FileNotFoundError:
isNewDoc = True
newFile = open('data.json', 'w')
newFile.write("[")
def parse(self, response):
global data, isNewDoc
#code that assigns values to from_name, from_designation, etc.
data['Government Members'].append({
'name': from_name,
'designation': from_designation,
'dob': dob,
'address': address,
'email': email,
'phone': phone,
'website': website,
'sourceURL': sourceURL,
'operation': operation
})
I expect data to have any scraped information appended to it, so I can dump it into a JSON file when it's done crawling.
Its not global, its just class wide so its in self. I.e. use self.data wherever you use data in that method.
Example:
# remove the global statment
self.data['Government Members'].append(...)
Though you should use a constructor instead of just defining variables out in the open such as:
def __init__(self):
self.data = {'Government Members': []}
def parse(self):
print(self.data)

Python3 script stops working after scrapy update

I am on macOS 10.14.2 using "homebrewed" python3.7.2, scrapy 1.5.1 and twisted 18.9.0 as a python novice with the following script to download old newspaper archived on a website:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# A scrapy script to download issues of the Gaceta (1843-1961)
import errno
import json
import os
from datetime import datetime
import scrapy
from scrapy import FormRequest, Request
os.chdir("/Volumes/backup/Archives/Gaceta_Nicaragua_1843-1961") # directory path
print((os.getcwd()))
# date range, format DD/MM/YYYY
start = '01/01/1843' # 01/01/1843
end = '31/12/1860' # 31/12/1961
date_format = '%d/%m/%Y'
start = datetime.strptime(start, date_format)
end = datetime.strptime(end, date_format)
class AsambleaSpider(scrapy.Spider):
name = 'asamblea'
allowed_domains = ['asamblea.gob.ni']
start_urls = ['http://digesto.asamblea.gob.ni/consultas/coleccion/']
papers = {
"Diario Oficial": "28",
}
def parse(self, response):
for key, value in list(self.papers.items()):
yield FormRequest(url='http://digesto.asamblea.gob.ni/consultas/util/ws/proxy.php',
headers= {
'X-Requested-With': 'XMLHttpRequest'
}, formdata= {
'hddQueryType': 'initgetRdds',
'cole': value
}
, meta={'paper': key},
callback=self.parse_rdds
)
pass
def parse_rdds(self, response):
data = json.loads(response.body_as_unicode())
for r in data["rdds"]:
if not r['fecPublica']:
continue
r_date = datetime.strptime(r['fecPublica'], date_format)
if start <= r_date <= end:
r['paper'] = response.meta['paper']
rddid = r['rddid']
yield Request("http://digesto.asamblea.gob.ni/consultas/util/pdf.php?type=rdd&rdd=" + rddid,
callback=self.download_pdf, meta=r)
def download_pdf(self, response):
filename = "{paper}/{anio}/".format(**response.meta) + "{titulo}-{fecPublica}.pdf".format(**response.meta).replace("/", "_")
if not os.path.exists(os.path.dirname(filename)):
try:
os.makedirs(os.path.dirname(filename))
except OSError as exc: # guard against race condition
if exc.errno != errno.EEXIST:
raise
with open(filename, 'wb') as f:
f.write(response.body)
It worked perfectly fine (although slow), however, I have two persisting issues with the script.
Firstly, I get the following error since the update:
2019-01-07 11:53:34 [scrapy.core.scraper] ERROR: Spider error processing <POST http://digesto.asamblea.gob.ni/consultas/util/ws/proxy.php> (referer: http://digesto.asamblea.gob.ni/consultas/coleccion/)
Traceback (most recent call last):
File "/usr/local/lib/python3.7/site-packages/scrapy/utils/defer.py", line 102, in iter_errback
yield next(it)
File "/usr/local/lib/python3.7/site-packages/scrapy/spidermiddlewares/offsite.py", line 30, in process_spider_output
for x in result:
File "/usr/local/lib/python3.7/site-packages/scrapy/spidermiddlewares/referer.py", line 339, in <genexpr>
return (_set_referer(r) for r in result or ())
File "/usr/local/lib/python3.7/site-packages/scrapy/spidermiddlewares/urllength.py", line 37, in <genexpr>
return (r for r in result or () if _filter(r))
File "/usr/local/lib/python3.7/site-packages/scrapy/spidermiddlewares/depth.py", line 58, in <genexpr>
return (r for r in result or () if _filter(r))
File "gaceta_downloader.py", line 58, in parse_rdds
if not r['fecPublica']:
KeyError: 'fecPublica'
Secondly, once the script runs again (as it did some days ago before updating python and packages) I ran into an issue where the script would sometimes complain that UnicodeEncodeError: ‘ascii’ codec can’t encode character u’\xb0’ in position 27: ordinal not in range(128) which I guess let sometimes to zero byte files. Do you see the encoding error in the source code? Is this related to the above problem?

Resources