Changing utf-8 string to cp1251 (Python) - python-3.x

I'm trying to convert Excel file with polish chars such as "ęśążćółń" to normal letters "esazcoln". Firstly I've menaged to convert xlsx file to txt, then:
f = open("PATH_TO_TXT_FILE")
r = f.read()
r.upper()
new_word = ""
for char in r:
if char == "Ą":
new_word += "A"
elif char == "Ć":
new_word += "C"
elif char == "Ę":
new_word += "E"
elif char == "Ł":
new_word += "L"
elif char == "Ó":
new_word += "O"
elif char == "Ż" "Ź":
new_word += "Z"
elif char == "Ź":
new_word += "Z"
elif char == "Ś":
new_word += "S"
else:
new_word += char
encoded_bytes = r.encode('utf-8', "replace")
decoded = encoded_bytes.decode(
"cp1252", "replace")
print(decoded)
in file is written : asdżółć
Output: asdżółć
I'd like to recive: asdzolc
Is there anybody who can help me?

I can't find the stack overflow page from which I got the pattern/sub template, but this is the general idea:
#!/usr/bin/env python3
# coding: UTF-8
import re
mapping = {
'Ą': 'A',
'Ć': 'C',
'Ę': 'E',
'Ł': 'L',
'Ó': 'O',
'Ż': 'Z',
'Ź': 'Z',
'Ś': 'S',
'ą': 'a',
'ć': 'c',
'ę': 'e',
'ł': 'l',
'ó': 'o',
'ż': 'z',
'ź': 'z',
'ś': 's',
}
pattern = re.compile("|".join(mapping.keys()))
def replace_by_mapping(text):
return pattern.sub(lambda m: mapping[re.escape(m.group(0))], text)
if __name__ == '__main__':
with open('polish_test.txt', 'r') as f:
contents = f.read()
contents = replace_by_mapping(contents)
print(contents)

Related

Trying to get the longest decreasing substring from a given string, keeping the case sensitivity in mind

I have been trying to get below result out of this Program but for some reason it is not giving the required output.
Required Results:
Input1 : bbaasssrppoccbaaacbaba Output1 : ['bbaa','sssrppoccbaaa','cba','ba']
Input2: hjgAvjhjKLhbfvbZSF Output2 :['h', 'jgA', 'vjh', 'jK', 'L', 'hb', 'f','vbZSF']
What i am getting
Output: ['bbaa', 'sssrppoccbaaa', 'cba'] & Output: ['h', 'jgA', 'vjh', 'jK', 'L', 'hb', 'f'] from below code which is not getting last substring "ba" & "vbZSF".
s1 = 'bbaasssrppoccbaaacbaba'
# s1 = 'hjgAvjhjKLhbfvbZSF'
decSub = ''
listSub = []
i= 0
while i < len(s1):
current = s1[i]
previous = s1[i] if i == 0 else s1[i-1]
if ord(current) <= ord(previous):
decSub += current
else:
listSub.append(decSub)
decSub = ''
decSub += current
i +=1
print(listSub)
It would be great if somebody could suggest a fix or a better way to achieve this result.Thanks in advance
You just need to append the missing decSub in the list.
Updated Code:
s1 = 'bbaasssrppoccbaaacbaba'
# s1 = 'hjgAvjhjKLhbfvbZSF'
decSub = ''
listSub = []
i= 0
while i < len(s1):
current = s1[i]
previous = s1[i] if i == 0 else s1[i-1]
if ord(current) <= ord(previous):
decSub += current
else:
listSub.append(decSub)
decSub = ''
decSub += current
i += 1
listSub.append(decSub)
print(listSub)
Output:
# s1 = 'bbaasssrppoccbaaacbaba'
['bbaa', 'sssrppoccbaaa', 'cba', 'ba']
# s1 = 'hjgAvjhjKLhbfvbZSF'
['h', 'jgA', 'vjh', 'jK', 'L', 'hb', 'f', 'vbZSF']
while loop is better choice for iterations without arbitrary boundaries - in your case for loop is probably a better choice.
Try the following instead:
from typing import List
def split_special(txt: str) -> List[str]:
if len(txt) == 0:
return []
res = [txt[0]]
prev = ord(txt[0])
for l in map(ord, txt[1:]):
if prev >= l:
res[-1] += chr(l)
else:
res.append(chr(l))
prev = l
return res
Outputs:
>>> print(Input1)
bbaasssrppoccbaaacbaba
>>> print(split_special(Input1))
['bbaa', 'sssrppoccbaaa', 'cba', 'ba']
>>> print(Input2)
hjgAvjhjKLhbfvbZSF
>>> print(split_special(Input2))
['h', 'jgA', 'vjh', 'jK', 'L', 'hb', 'f', 'vbZSF']

Homework with strings

Hello guys i got a homework where i get a string and basically i should change the letters in it then return it backward:
A --> T
T --> A
G --> C
C --> G
Here is my code :
def dnaComplement(s):
newWord = ""
for x in s:
if x == "T":
newWord.join('A')
elif x == "A":
newWord.join('T')
elif x == "C":
newWord.join('G')
elif x == "G":
newWord.join('C')
return newWord[::-1]
the input is: ACCGGGTTTT
Your effort so far has got a minor issue with it.
You are using newWord.join('X') in an attempt to add the new character to the string. This doesn't work in the way you are attempting to use it. Read again how join functions in the official documentation.
Instead, you can use the += operator to append the characters to the end of your newWord string:
newWord += 'X'
Your code then becomes:
def dnaComplement(s):
newWord = ""
for x in s:
if x == "T":
newWord += 'A'
elif x == "A":
newWord += 'T'
elif x == "C":
newWord += 'G'
elif x == "G":
newWord += 'C'
return newWord[::-1]
print(dnaComplement('ACCGGGTTTT'))
Output:
AAAACCCGGT
This is the reverse of TGGCCCAAA which is stored in newWord until you return it from dnaComplement.
newWord.join(...) doesn't change the value of network, but rather returns a new string.
So to begin with, you would need to do something like network = newWord.join(...).
That being said, here is a cleaner way IMO:
d = {'T': 'A',
'A': 'T',
'C': 'G',
'G': 'C'
}
def dnaComplement(s):
return ''.join(d[x] for x in s[::-1])

How to delete the vowel from a given string

How to delete the vowel from the given string?
letter = 'raeiou'
new_string = []
for i in letter:
new_string.append(i)
for j in new_string:
if j == 'a' or j == 'e' or j == 'i' or j == 'o' or j == 'u':
new_string.remove(j)
final = ''.join(new_string)
print('The string after removing the vowels is {}'.format(final))
expected output r but reo
When you do:
for j in new_string:
...
new_string.remove(...)
you are modifying a list while looping on it (see e.g. strange result when removing item from a list).
You could simply skip vowels when you create new_list in the first place:
for i in letter:
if not i in 'aeiou':
new_string.append(i)
final = ''.join(new_string)
Here is an alternative suggestion:
def func(s):
for c in 'aeiouAEIOU':
s = ''.join(s.split(c))
return s
You don't need two loops for this!
letter = 'raeiou'
new_string = letter
vowels = ('a', 'e', 'i', 'o', 'u')
for i in letter:
if i in vowels:
new_string = new_string.replace(i,"");
print('The string after removing the vowels is {}'.format(new_string))

Invalid Syntax because of commas

I was trying to do a program that counts vowels, consonants and other chars. The problem is in the returning because of commas and I can't understand why. It is supposed to return, for example, "2 vowels, 3 consonants and 5 others".
This is my program (sorry for any english mistakes):
def count1(word):
vowels = 0
consonants = 0
others = 0
l1 = ['A', 'E', 'I', 'O', 'U', 'a', 'e', 'i', 'o', 'u']
for i in range(len(word)):
if word[i] in l1:
vowels = vowels + 1
elif (word[i] >= 'a' and word[i] <= 'z') or (word[i] >=
'A' and word[i] <= 'Z'):
consonants = consonants + 1
else :
others = others+ 1
return(vowels "vowels," consonants "consonants," outros "others")
You should format the string before you return it:
return '{} vowels, {} consonants, {} others'.format(vowels, consonants, others)
Or using f-string in Python 3.6+
return f'{vowels} vowels, {consonants} consonants, {others} others'

NameError: name '' is not defined Python

i would like to make a blackjack algorithm and i have almost finished the code. Although i get all the time the error NameError: name 'pointCoint' is not defined. I found out on the internet that i should change raw_input into input due to the python version 3.6 which i am using. Can somebody help me and have a look at my code whether i am missing something? dealerCount = pointCoint(dealer)
NameError: name 'pointCoint' is not defined
Thanks
You have created a function called pointCount(...), not pointCoint. Change pointCoint to pointCount.
Complete code:
from random import shuffle
def deck():
deck = []
for suit in ['H', 'D', 'S', 'C']:
for rank in ['A', '2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K']:
deck.append(suit+rank)
shuffle(deck)
return deck
def pointCount(myCards):
myCount = 0
aceCount = 0
for i in myCards:
if(i[1] == 'J' or i[1] == 'Q' or i[1] == 'K' or i[1] == 'T'):
myCount += 10
elif(i[1] != 'A'):
myCount += int(i[1])
else:
aceCount += 1
if(aceCount == 1 and myCount >= 10):
myCount += 11
elif(aceCount != 0):
myCount += 1
return myCount
def createPlayingHands(myDeck):
dealerHand = []
playerHand = []
dealerHand.append(myDeck.pop())
dealerHand.append(myDeck.pop())
playerHand.append(myDeck.pop())
playerHand.append(myDeck.pop())
while(pointCount(dealerHand) <= 16):
dealerHand.append(myDeck.pop())
return [dealerHand, playerHand]
game = ""
myDeck = deck()
hands = createPlayingHands(myDeck)
dealer = hands[0]
player = hands[1]
while(game != "exit"):
dealerCount = pointCount(dealer)
playerCount = pointCount(player)
print("Dealer has:")
print(dealer[0])
print("Player1, you have:")
print(player)
if(playerCount == 21):
print("Blackjack Player wins")
break
elif(playerCount > 21):
print("player Busts with " + str(playerCount) + "points")
break
elif(playerCount > 21):
print("Dealer Busts with " + str(dealerCount) + "points")
break
game = input("What would you like to do? M: Hit me, S: Stand? ")
if(game == 'H'):
player.append(myDeck.pop())
elif(playerCount > dealerCount):
print("Player wins with " + str(playerCount) + "points")
print("Dealer has: " + str(dealer) + "or" + str(dealerCount) + "points")
break
else:
print("Dealer wins")
print("Dealer has: " + str(dealer) + "or" + str(dealerCount) + "points")
break

Resources