My python program does not write the accents in the Spanish language - python-3.x

this is my code
import pyautogui
import time
f = open('spambot\spam.txt', 'r')
time.sleep(5)
for words in f:
print(words)
pyautogui.typewrite(words)
pyautogui.press("enter")
time.sleep(3)
the content of spam.txt is this
"Yo sé que no me te importó"
but this is what it prints
"Yo s que no te import"

You can edit this line adding an encoding that is appropriate for Spanish: 'utf', 'utf-8' or 'utf-8-sig' work fine:
f = open('spam.txt', 'r', encoding='utf-8-sig')

Related

Python: binary mode vs encoding and its sha256

I am again in the mysteries of python:
Can you tell me why created sha256 are not the same (release.md stays the same):
import re, os, os.path, hashlib, time, sys
with open("release.md", "rb") as f:
h = f.read()
#print(h)
print(hashlib.sha256(h).hexdigest())
with open("release.md", "r", encoding=sys.getdefaultencoding()) as f:
h = f.read()
#print(h)
print(hashlib.sha256(h.encode(sys.getdefaultencoding())).hexdigest())
Output:
8c1938c9b495afe666d41a23cb6d108b3c351d6c8b5aca7019e214df1c47e240
32b8f1a46cea09e6c358390c8a81b80e233bd6c991c010cad6ad5489362e20d3
Its python 3.9.1 on windows10
I found out, that its up to windows specific linefeed, the open function will use transitions to "\r\n"
see Parameter newline: https://docs.python.org/3/library/functions.html?highlight=open#open
import re, os, os.path, hashlib, time, sys
with open("release.md", "rb") as f:
h = f.read()
#print(h)
print(hashlib.sha256(h).hexdigest())
e=sys.getdefaultencoding()
with open("release.md", "r", encoding=e, newline="\n") as f:
h = f.read()
#print(h)
print(hashlib.sha256(h.encode(e)).hexdigest())
makes it equal.

Python SyntaxError: EOL while scanning string literal: Open and close command

I tried to program and it always gave error. Where's the problem?
I didn't understand how it works.
I already put the command open and close read .txt
import time
print ("Oi Y")
time.sleep (2)
import time
print ("Sou Crash e fui criado pelo pai CrashX")
time.sleep (2)
import time
print ("O pai me pediu que eu leio para você")
time.sleep (2)
import time
print ("Aguarde")
time.sleep (5)
import time
f = open("C:\P.txt", 'r')
file_data = f.read()
print (file_data)
time.sleep (10)
file_data2 = f.close()
print (file_data2)
import time
print ("Encerrando...")
time.sleep (5)
import time
print ("Obrigado!)
time.sleep (5)

outputting python script results into text

I have a want to save my python script's result into a txt file.
My python code
from selenium import webdriver
bro = r"D:\Developer\Software\Python\chromedriver.exe"
driver=webdriver.Chrome(bro)
duo=driver.get('http://www.lolduo.com')
body=driver.find_elements_by_tag_name('tr')
for post in body:
print(post.text)
driver.close()
Some codes that I've tried
import subprocess
with open("output.txt", "w") as output:
subprocess.call(["python", "./file.py"], stdout=output);
I tried this code and it only makes a output.txt file and has nothing inside it
D:\PythonFiles> file.py > result.txt
Exception:
UnicodeEncodeError: 'charmap' codec can't encode character '\u02c9' in
position 0: character maps to
and only prints out 1/3 of the results of the script into a text file.
You can try below code to write data to text file:
from selenium import webdriver
bro = r"D:\Developer\Software\Python\chromedriver.exe"
driver = webdriver.Chrome(bro)
driver.get('http://www.lolduo.com')
body = driver.find_elements_by_tag_name('tr')
with open("output.txt", "w", encoding="utf8") as output:
output.write("\n".join([post.text for post in body]))
driver.close()
You can try this. This Is my Python Code:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from time import sleep
import time
bro = r"D:\Developer\Software\Python\chromedriver.exe"
driver = webdriver.Chrome(bro)
driver.get('http://www.lolduo.com')
body = driver.find_elements_by_tag_name('tr') .text
with open('output15.txt', mode='w') as f:
for post in body:
print(post)
f.write(post)
time.sleep(2)
driver.close()

Python 3 - TypeError: a bytes-like object is required, not 'str'

I'm working on a lesson from Udacity and am having some issue trying to find out if the result from this site returns true or false. I get the TypeError with the code below.
from urllib.request import urlopen
#check text for curse words
def check_profanity():
f = urlopen("http://www.wdylike.appspot.com/?q=shit")
output = f.read()
f.close()
print(output)
if "b'true'" in output:
print("There is a profane word in the document")
check_profanity()
The output prints b'true' and I'm not really sure where that 'b' is coming from.
In python 3 strings are by default unicode. The b in b'true' means that the string is a byte string and not unicode. If you don't want that you can do:
from urllib.request import urlopen
#check text for curse words
def check_profanity():
with urlopen("http://www.wdylike.appspot.com/?q=shit") as f:
output = f.read().decode('utf-8')
if output:
if "true" in output:
print("There is a profane word in the document")
check_profanity()
Using with will close the urlopen connection automatically.

Getting <generator object <genexpr> at 0x1193417d8> as output

#Reading files with txt extension
def get_sentences():
for root, dirs, files in os.walk("/Users/Documents/test1"):
for file in files:
if file.endswith(".txt"):
x_ = codecs.open(os.path.join(root,file),"r", "utf-8-sig")
for lines in x_.readlines():
yield lines
formoreprocessing = get_sentences()
#Tokenizing sentences of the text files
from nltk.tokenize import sent_tokenize
for i in formoreprocessing:
raw_docs = sent_tokenize(i)
tokenized_docs = [sent_tokenize(i) for sent in raw_docs]
'''Removing Stop Words'''
stopword_removed_sentences = []
from nltk.corpus import stopwords
stopset = set(stopwords.words("English"))
def strip_stopwords(sentence):
return ' '.join(word for word in sentence.split() if word not in stopset)
stopword_removed_sentences = (strip_stopwords(sentence) for sentence in raw_docs)
print(stopword_removed_sentences)
The above code is not printing what it is supposed to be. Instead it is throwing:
at 0x1193417d8>
as output. What is the mistake here?
I am using python 3.5.
Try print(list(stopword_removed_sentences)). This turns the generator into a list before printing it
This is the final answer, the provides the best result resolving the problem that i have mentioned in my previous comment.
from nltk.tokenize import sent_tokenize
raw_docs = sent_tokenize(''.join(formoreprocessing))
#print(raw_docs)
tokenized_docs = [sent_tokenize(''.join(formoreprocessing)) for sent in raw_docs]
#Removing Stop Words
stopword_removed_sentences = []
from nltk.corpus import stopwords
stopset = set(stopwords.words("English"))
def strip_stopwords(sentence):
return ' '.join(word for word in sentence.split() if word not in stopset)
stopword_removed_sentences = (strip_stopwords(sentence) for sentence in raw_docs)
print(list(stopword_removed_sentences))

Resources