I have this code
for letters in itertools.product(charset, repeat=47):
string = "".join(letters)
print(string)
and out from that is
aaaaaaaaaaaa
aaaaaaaaaaab
aaaaaaaaaaac
but im wondering how can I make it not generate same three characters in row so that out put is
dddcccbbbaaa
dddcccbbbaab
dddcccbbbaac
and so on without using something like this
for letters in itertools.product(charset, repeat=47):
string = "".join(letters)
for i in range(1,len(string)-1):
if string[i] is not string[i+1] is not string[i-1]:
print(string)
else:
pass
Here's a slightly modified version of your code:
import itertools
def version1(charset, N):
result = []
for letters in itertools.product(charset, repeat=N):
string = "".join(letters)
for i in range(0, N-2):
if string[i] == string[i+1] == string[i+2]:
break
else: # did not find any ZZZ sequence
result.append(string)
return result
>>> charset = "abc"
>>> N = 5
>>> version1(charset, N)
['aabaa', 'aabab', 'aabac', 'aabba', 'aabbc', 'aabca', 'aabcb', 'aabcc', 'aacaa', 'aacab', 'aacac', 'aacba', 'aacbb', 'aacbc', 'aacca', 'aaccb', 'abaab', 'abaac', 'ababa', 'ababb', 'ababc', 'abaca', 'abacb', 'abacc', 'abbaa', 'abbab', 'abbac', 'abbca', 'abbcb', 'abbcc', 'abcaa', 'abcab', 'abcac', 'abcba', 'abcbb', 'abcbc', 'abcca', 'abccb', 'acaab', 'acaac', 'acaba', 'acabb', 'acabc', 'acaca', 'acacb', 'acacc', 'acbaa', 'acbab', 'acbac', 'acbba', 'acbbc', 'acbca', 'acbcb', 'acbcc', 'accaa', 'accab', 'accac', 'accba', 'accbb', 'accbc', 'baaba', 'baabb', 'baabc', 'baaca', 'baacb', 'baacc', 'babaa', 'babab', 'babac', 'babba', 'babbc', 'babca', 'babcb', 'babcc', 'bacaa', 'bacab', 'bacac', 'bacba', 'bacbb', 'bacbc', 'bacca', 'baccb', 'bbaab', 'bbaac', 'bbaba', 'bbabb', 'bbabc', 'bbaca', 'bbacb', 'bbacc', 'bbcaa', 'bbcab', 'bbcac', 'bbcba', 'bbcbb', 'bbcbc', 'bbcca', 'bbccb', 'bcaab', 'bcaac', 'bcaba', 'bcabb', 'bcabc', 'bcaca', 'bcacb', 'bcacc', 'bcbaa', 'bcbab', 'bcbac', 'bcbba', 'bcbbc', 'bcbca', 'bcbcb', 'bcbcc', 'bccaa', 'bccab', 'bccac', 'bccba', 'bccbb', 'bccbc', 'caaba', 'caabb', 'caabc', 'caaca', 'caacb', 'caacc', 'cabaa', 'cabab', 'cabac', 'cabba', 'cabbc', 'cabca', 'cabcb', 'cabcc', 'cacaa', 'cacab', 'cacac', 'cacba', 'cacbb', 'cacbc', 'cacca', 'caccb', 'cbaab', 'cbaac', 'cbaba', 'cbabb', 'cbabc', 'cbaca', 'cbacb', 'cbacc', 'cbbaa', 'cbbab', 'cbbac', 'cbbca', 'cbbcb', 'cbbcc', 'cbcaa', 'cbcab', 'cbcac', 'cbcba', 'cbcbb', 'cbcbc', 'cbcca', 'cbccb', 'ccaab', 'ccaac', 'ccaba', 'ccabb', 'ccabc', 'ccaca', 'ccacb', 'ccacc', 'ccbaa', 'ccbab', 'ccbac', 'ccbba', 'ccbbc', 'ccbca', 'ccbcb', 'ccbcc']
Your algorithm is not optimal. Look at the first string:
aaaaa
You know that you need len(charset) - 1 iterations (aaaab, aaaac) to arrive to:
aaaba
And then again len(charset) - 1 iterations to arrive to:
aaaca
But you can skip all those iterations, because of the aaa beginning.
Actually, when you find sequence aaa, you can skip len(charset)^K - 1 where
K is the number of remaining chars. This does not change the big O complexity,
but will reduce the time of computation for long sequences, depending on the
size of the charset and the number of characters of the strings.
Intuitively, if the charset has few chars, you will spare a lot of computations.
First, you need to find the first letter after a ZZZ sequence:
def first_after_ZZZ(string):
for i in range(0, len(string)-2):
if string[i] == string[i+1] == string[i+2]:
return i+3
return -1
>>> first_after_ZZZ("ababa")
-1
>>> first_after_ZZZ("aaaba")
3
>>> first_after_ZZZ("aaabaaabb")
3
We use this function in the previous code (intermediate step):
def version2(charset, N):
result = []
for letters in itertools.product(charset, repeat=N):
string = "".join(letters)
f = first_after_ZZZ(string)
if f == -1:
result.append(string)
return result
>>> version2(charset, N) == version1(charset, N)
True
Now, we can skip some elements:
def version3(charset, N):
result = []
it = itertools.product(charset, repeat=N)
for letters in it:
string = "".join(letters)
f = first_after_ZZZ(string)
if f == -1:
result.append(string)
elif f < N:
K = N - f # K > 1
to_skip = len(charset)**K-1
next(itertools.islice(it, to_skip, to_skip), None) # this will skip to_skip tuples
return result
>>> version3(charset, N) == version1(charset, N)
True
Benchmark:
>>> from timeit import timeit as ti
>>> ti(lambda: version1(charset, 15), number=1)
13.14919564199954
>>> ti(lambda: version3(charset, 15), number=1)
6.94705574299951
This is impressive because the charset is small, but may be insignificant with a whole alphabet.
Of course, if you write your own implementation of product, you can skip the
tuples without generating them and this could be faster.
I'm trying to figure out why my list looks ugly when printed:
alfa = []
alfa.append([])
alfa.append([])
a = 0
a = float(a)
print("Input the points, one per line as x,y.\nStop by entering an empty line.")
while a == 0:
start = input()
if start == '':
a = a + 1
else:
alfa[0].append(start.split(",")[0:1])
alfa[1].append(start.split(",")[1:2])
print(alfa)
with input of:
2,3
12,56
1,2
a
I get this:
[[['2'], ['12'], ['1']], [['3'], ['56'], ['2']]]
While if i try this simple Program found online:
elements = []
elements.append([])
elements.append([])
elements[0].append(1)
elements[0].append(2)
elements[1].append(3)
elements[1].append(4)
print(elements[0][0])
print(elements)
I get this:
[[1, 2], [3, 4]]
Why is this result much tidier than mine?
Try:
alfa[0].append(int(start.split(",")[0]))
alfa[1].append(int(start.split(",")[1]))
>>>[[2, 12, 1], [3, 56, 2]]
You're getting the quote marks because input() is interpreting the input as a string. It doesn't know that you want what you've typed to be a number, so it has to handle it in a default way. You have to tell the code that the input should be interpreted as an int.
Secondly, you're slicing the arrays when you use [0:1] to get an array consisting of the entries from 0 to 0, which is the same as getting element 0 directly, except you get an array with one element rather than just the element you want. Essentially, you are inserting [2] rather than 2.
The data from your input is strings, as shown by the quotation marks. Cast your strings to integers after the input. If you want to have the [1, 2] formatting without the extra brackets then you need to place numbers in alfa[0] and alfa[1] etc..
alfa = []
alfa.append([])
alfa.append([])
a = 0
a = float(a)
print("Input the points, one per line as x,y.\nStop by entering an empty line.")
while a == 0:
start = input()
if start == '':
a = a + 1
else:
alfa[0].append(int(start.split(",")[0]))
alfa[1].append(int(start.split(",")[1]))
print(alfa)
Oh, I see #Andrew McDowell has beat me to this. Well here you go anyway...
I'm trying to create a matrix with 4 rows and 10 columns and display the leading 0 for all the single digit numbers that will randomly get generated later. This is what I would like it to look like: My teacher gave me this snippet as a way to format the numbers:
print('{:02}'.format(variable))
But when I use this in my function, it gives me the error: unsupported format string passed to list.__format__
I reworked my code and was able to get the leading zero, but now the 4x10 matrix is just 40 ints side by side. Anyone able to give me some help and an explanation?
My code:
def printMatrix(matrix):
for r in range(ROWS):
for c in range(COLS):
print('{:02}'.format(matrix[r][c]), end=' ')
def main():
matrix = [0]*ROWS
for i in range(ROWS):
matrix[i] = [0]*COLS
printMatrix(matrix)
You're really close, looks like you may just need another print() after the for-loop to put a newline after each row. Try this:
def printMatrix(matrix):
for r in range(ROWS):
for c in range(COLS):
print('{:02}'.format(matrix[r][c]), end=' ')
print()
Demo
you need a 0 in front .. i.e. {0:02}
print('{0:02}'.format(variable))
This 0 refer to the index of the parameters passed in e.g. this should work too:
print('{2:02}'.format("x", "y", variable))
Your code:
def printMatrix(matrix):
for r in range(ROWS):
for c in range(COLS):
print('{0:02}'.format(matrix[r][c]), end=' ')
def main():
matrix = [0]*ROWS
for i in range(ROWS):
matrix[i] = [0]*COLS
printMatrix(matrix)
Is there a method that converts a string of text such as 'you' to a number other than
y = tuple('you')
for k in y:
k = ord(k)
which only converts one character at a time?
In order to convert a string to a number (and the reverse), you should first always work with bytes. Since you are using Python 3, strings are actually Unicode strings and as such may contain characters that have a ord() value higher than 255. bytes however just have a single byte per character; so you should always convert between those two types first.
So basically, you are looking for a way to convert a bytes string (which is basically a list of bytes, a list of numbers 0–255) into a single number, and the inverse. You can use int.to_bytes and int.from_bytes for that:
import math
def convertToNumber (s):
return int.from_bytes(s.encode(), 'little')
def convertFromNumber (n):
return n.to_bytes(math.ceil(n.bit_length() / 8), 'little').decode()
>>> convertToNumber('foo bar baz')
147948829660780569073512294
>>> x = _
>>> convertFromNumber(x)
'foo bar baz'
Treat the string as a base-255 number.
# Reverse the digits to make reconstructing the string more efficient
digits = reversed(ord(b) for b in y.encode())
n = reduce(lambda x, y: x*255 + y, digits)
new_y = ""
while n > 0:
n, b = divmod(n, 255)
new_y += chr(b)
assert y == new_y.decode()
(Note this is essentially the same as poke's answer, but written explicitly rather than using available methods for converting between a byte string and an integer.)
You don't need to convert the string into tuple
k is overwritten. Collect items using something like list comprehension:
>>> text = 'you'
>>> [ord(ch) for ch in text]
[121, 111, 117]
To get the text back, use chr, and join the characters using str.join:
>>> numbers = [ord(ch) for ch in text]
>>> ''.join(chr(n) for n in numbers)
'you'
Though there are a number of ways to fulfill this task, I prefer the hashing way because it has the following nice properties
it ensures that the number you get is highly random, actually uniformly random
it ensures that even a small change in your input string will lead to a significant difference in output integer.
it is an irreversible process, i.e., you can't tell which string is the input based on the integer output.
import hashlib
# there are a number of hashing functions you can pick, and they provide tags of different lengths and security levels.
hashing_func = hashlib.md5
# the lambda func does three things
# 1. hash a given string using the given algorithm
# 2. retrive its hex hash tag
# 3. convert hex to integer
str2int = lambda s : int(hashing_func(s.encode()).hexdigest(), 16)
To see how the resulting integers are uniform randomly distributed, we first need to have some random string generator
import string
import numpy as np
# candidate characters
letters = string.ascii_letters
# total number of candidates
L = len(letters)
# control the seed or prng for reproducible results
prng = np.random.RandomState(1234)
# define the string prng of length 10
prng_string = lambda : "".join([letters[k] for k in prng.randint(0, L, size=(10))])
Now we generate sufficient number of random strings and obtain corresponding integers
ss = [prng_string() for x in range(50000)]
vv = np.array([str2int(s) for s in ss])
Let us check the randomness by comparing the theoretical mean and standard deviation of a uniform distribution and those we observed.
for max_num in [256, 512, 1024, 4096] :
ints = vv % max_num
print("distribution comparsions for max_num = {:4d} \n\t[theoretical] {:7.2f} +/- {:8.3f} | [observed] {:7.2f} +/- {:8.3f}".format(
max_num, max_num/2., np.sqrt(max_num**2/12), np.mean(ints), np.std(ints)))
Finally, you will see the results below, which indicates that the number you got are very uniform.
distribution comparsions for max_num = 256
[theoretical] 128.00 +/- 73.901 | [observed] 127.21 +/- 73.755
distribution comparsions for max_num = 512
[theoretical] 256.00 +/- 147.802 | [observed] 254.90 +/- 147.557
distribution comparsions for max_num = 1024
[theoretical] 512.00 +/- 295.603 | [observed] 512.02 +/- 296.519
distribution comparsions for max_num = 4096
[theoretical] 2048.00 +/- 1182.413 | [observed] 2048.67 +/- 1181.422
It is worthy to call out that other posted answers may not attain these these properties.
For example, #poke's convertToNumber solution will give
distribution comparsions for max_num = 256
[theoretical] 128.00 +/- 73.901 | [observed] 93.48 +/- 17.663
distribution comparsions for max_num = 512
[theoretical] 256.00 +/- 147.802 | [observed] 220.71 +/- 129.261
distribution comparsions for max_num = 1024
[theoretical] 512.00 +/- 295.603 | [observed] 477.67 +/- 277.651
distribution comparsions for max_num = 4096
[theoretical] 2048.00 +/- 1182.413 | [observed] 1816.51 +/- 1059.643
I was trying to find a way to convert a numpy character array into a unique numeric array in order to do some other stuff. I have implemented the following functions including the answers by #poke and #falsetrue (these methods were giving me some trouble when the strings were too large). I have also added the hash method (a hash is a fixed sized integer that identifies a particular value.)
import numpy as np
def str_to_num(x):
"""Converts a string into a unique concatenated UNICODE representation
Args:
x (string): input string
Raises:
ValueError: x must be a string
"""
if isinstance(x, str):
x = [str(ord(c)) for c in x]
x = int(''.join(x))
else:
raise ValueError('x must be a string.')
return x
def chr_to_num(x):
return int.from_bytes(x.encode(), 'little')
def char_arr_to_num(arr, type = 'hash'):
"""Converts a character array into a unique hash representation.
Args:
arr (np.array): numpy character array.
"""
if type == 'unicode':
vec_fun = np.vectorize(str_to_num)
elif type == 'byte':
vec_fun = np.vectorize(chr_to_num)
elif type == 'hash':
vec_fun = np.vectorize(hash)
out = np.apply_along_axis(vec_fun, 0, arr)
out = out.astype(float)
return out
a = np.array([['x', 'y', 'w'], ['x', 'z','p'], ['y', 'z', 'w'], ['x', 'w','y'], ['w', 'z', 'q']])
char_arr_to_num(a, type = 'unicode')
char_arr_to_num(a, type = 'byte')
char_arr_to_num(a, type = 'hash')
gets the length of the common prefix of two words i.e-the common prefix of "global" and "glossary" is "glo"(length 3)
a= input("Enter string: ")
b= input("Enter string: ")
count=0
c=a.startswith(b)
while count<=c:
if c:
count=count+1
print(count)
what im not sure is how to get the length of the common prefix
You can do:
def pre(s1, s2):
if any(bool(s.strip())==False for s in (s1, s2)):
return 0
for i, (c1, c2) in enumerate(zip(s1, s2)):
if c1!=c2:
return i
return i+1
Testing:
>>> pre("global", "glossary")
3
>>> pre("global", "global")
6
>>> pre("global", "")
0
You can "cheat" (as in - it's not the way it's meant to be used, but oh well) and use os.path.commonprefix which does a char by char comparison on all elements passed, eg:
from os.path import commonprefix
a = 'global'
b = 'glossary'
length = len(commonprefix([a, b]))
# 3