Rewrite word count function as OOP in python 3 - python-3.x

I have written a simple function for counting the occurrences of words in a sentence or phrase.
def count(phrase):
phrase = phrase.lower()
lst = phrase.split()
dct = dict()
for word in lst:
if word not in dct:
dct[word] = 1
else:
dct[word] += 1
return dct
Note that right now I'm not concerned with punctuation, numbers, or stopwords.
What I want to do is write a class to take an object and do the same thing. Here's what I have so far, but now I'm stuck on how to pass the list on to the loop that counts and creates the dictionary:
class Count:
dct = dict()
def __init__(self, phrase):
self.phrase = phrase
def lst(self):
return self.phrase.lower().split()

You can use defaultdict for convenience.
from collections import defaultdict as ddict
class Count:
def __init__(self, phrase):
self.phrase = phrase
self.dct = ddict(int)
def lst(self):
return self.phrase.lower().split()
# you can do it this way
def make_dict(self):
lst_words = self.lst()
for word in lst_words:
self.dct[word] += 1
c = Count("What I want to do is write a class to take an object and do the same thing. Here's what I have so far, but now I'm stuck on how to pass the list on to the loop that counts and creates the dictionary")
c.make_dict() # make a dictonary out of words
c.dct # display the contents stored in the dict
Output:
defaultdict(int,
{'what': 2,
'i': 2,
'want': 1,
'to': 4,
'do': 2,
'is': 1,
'write': 1,
'a': 1,
'class': 1,
'take': 1,
'an': 1,
'object': 1,
'and': 2,
'the': 4,
'same': 1,
'thing.': 1,
"here's": 1,
'have': 1,
'so': 1,
'far,': 1,
'but': 1,
'now': 1,
"i'm": 1,
'stuck': 1,
'on': 2,
'how': 1,
'pass': 1,
'list': 1,
'loop': 1,
'that': 1,
'counts': 1,
'creates': 1,
'dictionary': 1})
Update:
By the courtesy Roelant of there is one more way to do the same thing, using Counter.
from collections import Counter
class Count:
def __init__(self, phrase):
self.phrase = phrase
self.dct = None
def lst(self):
return self.phrase.lower().split()
# you can do it this way also.
def make_dict(self):
lst_words = self.lst()
self.dct = Counter(lst_words)

Just note that you can use collections.Counter for this; maybe you know this allready, but I'll put this option here anyways.
>>> import collections
>>> phrase = "What I want to do is write a class to take an object and do the same thing. Here's what I have so far, but now I'm stuck on how to pass the list on to the loop that counts and creates the dictionary."
>>> c = collections.Counter(phrase.lower().split())
>>> for k, v in c.most_common():
... print(v, k)
4 to
4 the
2 what
2 i
2 do
2 and
2 on
1 want
1 is
1 write
1 a
1 class
1 take
1 an
1 object
1 same
1 thing.
1 here's
1 have
1 so
1 far,
1 but
1 now
1 i'm
1 stuck
1 how
1 pass
1 list
1 loop
1 that
1 counts
1 creates
1 dictionary.
There is no big advantage of wrapping this in a class, but you could have something simple like this that subclasses Counter:
import collections
import string
class MyCounter(collections.Counter):
def __init__(self, phrase):
# do some preprocessing, for example removing puntuation
phrase = ''.join(
c.lower()
for c in phrase
if c not in string.punctuation)
super().__init__(phrase.split())
The use would be similar as before:
>>> mc = MyCounter(phrase) # 'lower' and 'split' is done inside
>>> for k, v in mc.most_common():
... print(v, k)
4 to
4 the
2 what
2 i
2 do
2 and
2 on
1 want
1 is
1 write
1 a
1 class
1 take
1 an
1 object
1 same
1 thing
1 heres
1 have
1 so
1 far
1 but
1 now
1 im
1 stuck
1 how
1 pass
1 list
1 loop
1 that
1 counts
1 creates
1 dictionary

Related

Python: Print stuff in dictionary line by line

I have an assignment where If I input a string for example
food food games hi food
it would print out like this:
food: 3
games: 1
hi: 1
the code I made right now is
def count_word(string1):
counts = {}
words = string1.split()
for word in words:
if word in counts:
counts[word] += 1
else:
counts[word] = 1
return counts
string1 = str(input())
print(count_word(string1))
If I input the same string as above it prints out:
{'food': 3, 'games': 1, 'hi': 1}
how do I make it so it prints out like this:
food: 3
games: 1
hi: 1
Following should work:
d = {'food': 3, 'games': 1, 'hi': 1} # generated by counter function
for word, count in d.items():
print(f'{word}: {count}')
if you want this sorted alphabetically replace d.items() with sorted(d.items()).
dsic = {'food': 3, 'games': 1, 'hi': 1}
you could try somthing like this maybe:
import json
print(json.dumps(dsic, sort_keys=False, indent=4))
or this one:
for i,x in dsic.items():
print(str(i)+': ' + str(x))
You should do your research before posting on stackoverflow.
For now, Hint is to use two loops. One for the list and one for printing the string.

How to find highest number from the vector provided?

Say, a dictionary is provided with certain values.
How to find the highest number ?
Input
d1 = {1: 1, 2: 6, 3: 7, 4: 1, 5: 3}
vector = 5
d1 = {1: 1, 2: 6, 3: 7, 4: 1, 5: 3}
vector = 5
l1 = list(td.values())
Based on vector value, it should print output.
vector is 5, so sum of the dict-values to form vector is 3,1,1
Corresponding keys are 5,4,1
so, the output should be 541 but slight change here.
Since value '1' is associated with multiple keys, it should pick up highest key,
so, output should be 544 instead of 541 (For above input, to brief about combinations without considering '1+1+1+1+1' to '44444')
Another example
d1 = {1: 1, 2: 6, 3: 7, 4: 1, 5: 3}
vector = 7
Possible combinations:
3 # --> Key of 7
21 # --> Key of 6 & 1 (6+1 = 7)
24 # --> Key of 6 & 1 (6+1 = 7)
12 # --> Key of 1 & 6 (1+6 = 7)
42 # --> Key of 1 & 6 (1+6 = 7)
Output : 42 (Highest number)
Another
d1 = {1:9,2:4,3:2,4:2,5:6,6:3,7:2,8:2,9:1}
vector = 5
here, it would be 1+2+2 (988).
But, '1' can also be added 5 times to form vector 5,
which would be '99999'
Since #Patrick Artner requested for minimal reproducible example, posting this though doesn't work as expected.
from itertools import combinations
def find_sum_with_index(l1, vector):
index_vals = [iv for iv in enumerate(l1) if iv[1] < target]
for r in range(1, len(index_vals) + 1):
for perm in combinations(index_vals, r):
if sum([p[1] for p in perm]) == target:
yield perm
d1 = {1: 1, 2: 6, 3: 7, 4: 1, 5: 3}
vector=5
l1=list(d1.values())
for match in find_sum_with_index(l1, vector):
print(dict(match))
Is there any specific algorithm to be chosen for these kind of stuffs ?
Similar to the other answer but allowing repeatedly using the same keys to get the max number of keys which values sum up to vector:
d1 = {1: 1, 2: 6, 3: 7, 4: 1, 5: 3}
vector = 7
#create a dict that contains value -> max-key for that value
d2 = {}
for k,v in d1.items():
d2[v] = max(d2.get(v,-1), k)
def mod_powerset(iterable,l):
# uses combinations_with_replacement to allow multiple usages of one value
from itertools import chain, combinations_with_replacement
s = list(set(iterable))
return chain.from_iterable(combinations_with_replacement(s, r) for r in range(l))
# create all combinations that sum to vector
p = [ s for s in mod_powerset(d1.values(),vector//min(d1.values())+1) if sum(s) == vector]
print(p)
# sort combinations by length then value descending and take the max one
mp = max( (sorted(y, reverse=True) for y in p), key=lambda x: (len(x),x))
# get the correct keys to be used from d2 dict
rv = [d2[num] for num in mp]
# sort by values, biggest first
rv.sort(reverse=True)
# solution
print(''.join(map(str,rv)))
Original powerset - see itertools-recipes.
There are some steps involved, see documentation in comments in code:
d1 = {1: 1, 2: 6, 3: 7, 4: 1, 5: 3}
vector = 7
# create a dict that contains value -> sorted key-list, used to get final keys
from collections import defaultdict
d2 = defaultdict(list)
for k,v in d1.items():
d2[v].append(k)
for k,v in d2.items():
d2[k] = sorted(v, reverse=True)
from itertools import chain, combinations
def powerset(iterable):
"see itertools: powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
# create all combinations that sum to vector
p = [ s for s in powerset(d1.values()) if sum(s) == vector]
# sort combinations by length then value descending and take the max one
mp = max( (sorted(y, reverse=True) for y in p), key=lambda x: (len(x),x))
# get the correct keys to be used from d2 dict
rv = []
for num in mp:
rv.append(d2[num][0])
# remove used key from list
d2[num][:] = d2[num][1:]
# sort by values, biggest first
rv.sort(reverse=True)
# solution
print(''.join(map(str,rv)))
For powerset - see itertools-recipes.

How to make combination, if any one of the element exists that can be added to make sum?

To find all possible combinations that can be added to make given sum.
Combinations can be formed with multiple elements and also if any single element exists.
Input:
l1 = [9,1, 2, 7, 6, 1, 5]
target = 8
**Constraints**
1<=(len(l1))<=500000
1<=each_list_element<=1000
Output:
Format : {index:element}
{1:1, 5:1, 4:6} #Indices : 1,5,4 Elements : 1,1,6
{1:1, 2:2, 6:5}
{5:1, 2:2, 6:5}
{1:1, 3:7}
{5:1, 3:7}
{2:2, 4:6}
More Scenarios:
Input = [4,6,8,5,3]
target = 3
Output {4:3}
Input = [4,6,8,3,5,3]
target = 3
Output {5:3,3:3}
Input = [1,2,3,15]
target = 15
Output {3:15}
Below code covers for all above scenarios.
Scenarios to be handled, along with above.
Input =[1,6,7,1,3]
target=5
Output={0:1,3:1,4:3} , {0:1,0:1,4:3}, {3:1,3:1,4:3}
Input=[9,6,8,1,7]
target=5
Output={3:1,3:1,3:1,3:1,3:1}
As suggested by #Chris Doyle in previous question, will be using that code.
(How to find indices and combinations that adds upto given sum?)
Code:
from itertools import combinations
def find_sum_with_index(l1, target):
index_vals = [iv for iv in enumerate(l1) if iv[1] < target]
for r in range(1, len(index_vals) + 1):
for perm in combinations(index_vals, r):
if sum([p[1] for p in perm]) == target:
yield perm
l1 = [9, 1, 2, 7, 6, 1, 5]
target = 8
for match in find_sum_with_index(l1, target):
print(dict(match))
You can use dictionary comprehension
from itertools import combinations
l1 = [9,1, 2, 7, 6, 1, 5]
target = 8
for i in range(len(l1)):
for c in combinations(l1,i):
if sum(c) == target:
res = { i:x for i,x in enumerate(c)}
print(res)

Using python need to get the substrings

Q)After executing the code Need to print the values [1, 12, 123, 2, 23, 3, 13], but iam getting [1, 12, 123, 2, 23, 3]. I have missing the letter 13. can any one tell me the reason to overcome that error?
def get_all_substrings(string):
length = len(string)
list = []
for i in range(length):
for j in range(i,length):
list.append(string[i:j+1])
return list
values = get_all_substrings('123')
results = list(map(int, values))
print(results)
count = 0
for i in results:
if i > 1 :
if (i % 2) != 0:
count += 1
print(count)
Pretty straight forward issue in your nested for loops within get_all_substrings(), lets walk it!
You are iterating over each element of your string 123:
for i in range(length) # we know length to be 3, so range is 0, 1, 2
You then iterate each subsequent element from the current i:
for j in range(i,length)
Finally you append a string from position i to j+1 using the slice operator:
list.append(string[i:j+1])
But what exactly is happening? Well we can step through further!
The first value of i is 0, so lets skip the first for, go to the second:
for j in range(0, 3): # i.e. the whole string!
# you would eventually execute all of the following
list.append(string[0:0 + 1]) # '1'
list.append(string[0:1 + 1]) # '12'
list.append(string[0:2 + 1]) # '123'
# but wait...were is '13'???? (this is your hint!)
The next value of i is 1:
for j in range(1, 3):
# you would eventually execute all of the following
list.append(string[1:1 + 1]) # '2'
list.append(string[1:2 + 1]) # '23'
# notice how we are only grabbing values of position i or more?
Finally you get to i is 2:
for j in range(2, 3): # i.e. the whole string!
# you would eventually execute all of the following
list.append(string[2:2 + 1]) # '3'
I've shown you what is happening (as you've asked in your question), I leave it to you to devise your own solution. A couple notes:
You need to look at all index combinations from position i
Dont name objects by their type (i.e. dont name a list object list)
I would try something like this using itertools and powerset() recipe
from itertools import chain, combinations
def powerset(iterable):
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
output = list(map(''.join, powerset('123')))
output.pop(0)
Here is another option, using combinations
from itertools import combinations
def get_sub_ints(raw):
return [''.join(sub) for i in range(1, len(raw) + 1) for sub in combinations(raw, i)]
if __name__ == '__main__':
print(get_sub_ints('123'))
>>> ['1', '2', '3', '12', '13', '23', '123']

Counter class extension

I am having a problem finding an elegant way to create a Counter() class that can:
Feed in arbitrary number of keys and return a nested dictionary based on this list of keys.
Increment for this nested dictionary is arbitrary as well.
For example:
counter = Counter()
for line in fin:
if a:
counter.incr(key1, 1)
else:
counter.incr(key2, key3, 2)
print counter
Ideally I am hoping to get the result looks like: {key1 : 20, {key2 : {key3 : 40}}}. But I am stuck in creating this arbitrary nested dictionary from list of keys. Any help is appreciated.
you can subclass dict and create your own nested structure.
here's my attempt at writing such class :
class Counter(dict):
def incr(self, *args):
if len(args) < 2:
raise TypeError, "incr() takes at least 2 arguments (%d given)" %len(args)
curr = self
keys, count = args[:-1], args[-1]
for depth, key in enumerate(keys, 1):
if depth == len(keys):
curr[key] = curr.setdefault(key, 0) + count
else:
curr = curr.setdefault(key, {})
counter = Counter()
counter.incr('key1', 1)
counter.incr('key2', 'key3', 2)
counter.incr('key1', 7)
print counter #{'key2': {'key3': 2}, 'key1': 8}
There are two possibilities.
First, you can always fake the nested-keys thing by using a flat Counter with a "key path" made of tuples:
counter = Counter()
for line in fin:
if a:
counter.incr((key1,), 1)
else:
counter.incr((key2, key3), 2)
But then you'll need to write a str-replacement—or, better, a wrapper class that implements __str__. And while you're at it, you can easily write an incr wrapper that lets you use exactly the API you wanted:
def incr(self, *args):
super().incr(args[:-1], args[-1])
Alternatively, you can build your own Counter-like class on top of a nested dict. The code for Counter is written in pure Python, and the source is pretty simple and readable.
From, your code, it looks like you don't have any need to access things like counter[key2][key3] anywhere, which means the first is probably going to be simpler and more appropriate.
The only type of value that can exist in a Counter object is an int, you will not be able to represent a nested dictionary with a Counter.
Here is one way to do this with a normal dictionary (counter = {}). First, to update increment the value for a single key:
counter[key1] = counter.setdefault(key1, 0) + 1
Or for an arbitrary list of keys to create the nested structure:
tmp = counter
for key in key_list[:-1]:
tmp = tmp.setdefault(key, {})
tmp[key_list[-1]] = tmp.setdefault(key_list[-1], 0) + 1
I would probably turn this into the following function:
def incr(counter, val, *keys):
tmp = counter
for key in keys[:-1]:
tmp = tmp.setdefault(key, {})
tmp[keys[-1]] = tmp.setdefault(keys[-1], 0) + val
Example:
>>> counter = {}
>>> incr(counter, 1, 'a')
>>> counter
{'a': 1}
>>> incr(counter, 2, 'a')
>>> counter
{'a': 3}
>>> incr(counter, 2, 'b', 'c', 'd')
>>> counter
{'a': 3, 'b': {'c': {'d': 2}}}
>>> incr(counter, 3, 'b', 'c', 'd')
>>> counter
{'a': 3, 'b': {'c': {'d': 5}}}

Resources