Python: Subclassing a dict to have two keys and a defaultvalue - python-3.x

following the two very readable tutorials 1 and 2, I would like to create a dictionary with two keys that gives a defaultvalue in case the key-pair does not exist.
I managed two fullfill the first condition with
from collections import defaultdict
class DictX(dict):
def __getattr__(self, key1 = None, key2 = None):
try:
return self[(key1,key2)]
# This in idea of how to implement the defaultdict. But it does not seem to work
# except KeyError as k::
# self[(key1,key2)] = 0.
# return self[(key1,key2)]
## or just return 0
except KeyError as k:
raise AttributeError(k)
def __setattr__(self, key1, key2, value):
self[(key1,key2)] = value
def __delattr__(self, key):
try:
del self[key]
except KeyError as k:
raise AttributeError(k)
def __repr__(self):
return '<DictX ' + dict.__repr__(self) + '>'
sampledict = DictX()
sampledict[3,5] = 5
sampledict[1,4] = 4
print("Checking the dict ",sampledict[1,4])
# This line is going to throw an error
print("Checking the default dict ",sampledict[3,6])
How do I code the defaultvalue behaviour?
Pro-Question:
If I just give one value sampledict[1,] or sampledict[1,:], I would like to get a list of all key - value pairs that start with 1. Is that possible?

Related

Overriding the `[]` operator in a dictionary of dictionaries

I am trying to implement a class which provides a dictionary with a default value:
from copy import deepcopy
class Dict:
def __init__(self, default) -> None:
self.default = default
self.values = {}
def __getitem__(self, key):
return self.values[key] if key in self.values else deepcopy(self.default)
def __setitem__(self, key, value):
self.values[key] = value
It works as expected when the default value is "plain" (42 in the example below):
KEY = 'k'
d = Dict(42)
print(d[KEY]) # prints 42
d[KEY] = 53
print(d[KEY]) # prints 53
But it doesn't work as expected when the default value is by itself a Dict object:
KEY1 = 'k1'
KEY2 = 'k2'
d = Dict(Dict(42))
print(d[KEY1][KEY2]) # prints 42
d[KEY1][KEY2] = 53
print(d[KEY1][KEY2]) # prints 42
I have tried to debug that by adding various printouts within the class functions, but I haven't been able to figure it out.
What exactly am I doing wrong here?
The immediate problem is in your __getitem__ method:
def __getitem__(self, key):
return self.values[key] if key in self.values else deepcopy(self.default)
Because you're only returning a value here, but not actually setting it, the returned value isn't useful. If you request a key that doesn't exist, the method is equivalent to:
def __getitem__(self, key):
return deepcopy(self.default)
So when you write:
d[KEY1][KEY2] = 53
You're successfully setting a value for KEY2, but only in the dictionary returned by __getitem__. You probably want to use the dictionary setdefault method, which will set the key in self.values if it doesn't exist (in addition to returning it):
def __getitem__(self, key):
return self.values.setdefault(key, deepcopy(self.default))
With this implementation:
>>> KEY1 = 'k1'
>>> KEY2 = 'k2'
>>> d = Dict(Dict(42))
>>> print(d[KEY1][KEY2])
42
>>> d[KEY1][KEY2] = 53
>>> print(d[KEY1][KEY2])
53
But as I mentioned in my comment, a better solution is just to use the existing defaultdict implementation:
>>> from collections import defaultdict
>>> d = defaultdict(lambda: defaultdict(lambda: 42))
>>> d[KEY1][KEY2]
42
>>> d[KEY1][KEY2]=53
>>> d[KEY1][KEY2]
53
(The difference between defaultdict and the class you implemented is that the default must be a callable. Here's I've used lambda expressions, but you could also use actual functions, classes, etc).
Since you are using deepcopy so it creates a copy without reference.
You have to return the object without deepcopy.
def __getitem__(self, key):
return self.values[key] if key in self.values else self.default
Now it should work as expected.

Problem retrieving individual objects in pickled dictionary (Python 3)

My program stores "food" objects that are pickled into a dictionary and stored in a csv file, which acts as a database. I want to retrieve individual food objects on command from the dictionary, but when I attempt to I seem to only retrieve the last object in the dictionary.
import pickle
class Food(object):
fooddict = dict({})
def __init__(self, name, weight, calories, time):
self.name = name
self.weight = weight
self.calories = calories
self.time = time
def __str__(self):
return '{self.name}s'.format(self=self) + \
' weigh {self.weight}'.format(self=self) + \
' ounces, contain {self.calories}'.format(self=self) + \
' calories, and stay fresh for {self.time}'.format(self=self) + \
' days.'
#classmethod
def createFoodInput(cls):
name = str(input("Enter the name: "))
weight = float(input("Enter the weight: "))
calories = float(input("Enter the calories: "))
time = float(input("Enter how many days it can store for: "))
return cls(name, weight, calories, time)
def storeFoodDict(f):
fooddict = Food.retreiveFoodDict()
if fooddict == "Empty File":
fooddict = dict({f.name: f})
with open("food.csv", 'wb') as filewriter:
try:
pickle.dump(fooddict, filewriter)
except:
print("Error storing pickled dictionary")
else:
food_found = False
for key in list(fooddict):
if key.__eq__(f.name):
print("Food already stored!")
food_found = True
if not food_found:
fooddict.update({f.name: f})
with open("food.csv", 'wb') as filewriter:
try:
pickle.dump(fooddict, filewriter)
except:
print("Error storing pickled dictionary")
#classmethod
def retreiveFoodDict(cls):
with open("food.csv", 'rb') as filereader:
try:
fooddict = pickle.load(filereader)
return fooddict
except EOFError:
return("Empty File")
def findFood(title):
fooddict = Food.retreiveFoodDict()
for key in list(fooddict):
if key.__eq__(title):
continue
return fooddict[key]
s = "apple"
n = findFood(s) #does not work, it returns banana instead of apple
#which is really just grabbing whatever is the
#last object in the dictionary
m = findFood("banana") #seems to work, but only because banana is the
#last object in the dictionary
print(n) #should print an apple "food object" but instead prints a banana
print(str(m.calories)) #works, but if I said n.calories it would still print
#m.calories instead
p = Food.retreiveFoodDict() #seems to work and retrieve the dictionary
print(str(p)) #also seems to work of course
Console Output:
bananas weigh 5.0 ounces, contain 120.0 calories, and stay fresh for 3.0 days.
120.0
{'apple': <main.Food object at 0x00D2C2E0>, 'banana': <main.Food object at 0x00D36D00>}
The dictionary contains 2 food objects (apple and banana), but the print(n) statement shows a banana, not an apple. Can anyone point out why this is or what I am misunderstanding? Thank you so much!
I found the answer to my own problem. I was misusing the continue in my findFood function.
This code solved my issues.
def getFood(food_name):
fooddict = Food.retreiveFoodDict()
for key in list(fooddict):
if key.__eq__(food_name):
return fooddict[key]
What this function does is simply retrieve a dictionary of objects in a csv file and iterates through the keys until the passed key name is located. If found, the proper key name will be returned as a food object. My original mistake was using the "continue" keyword to stop the for-loop, which was returning the object directly after the one we wanted.

pd.rename key KeyError: 'New_Name'

Edit 12/07/19: The problem was not in fact with pd.rename fuction but the fact that I did not return from the function the pandas dataframe and as a result the column change did not exist when printing. i.e.
def change_column_names(as_pandas, old_name, new_name):
as_pandas.rename(columns={old_name: new_name}, inplace=)
return as_pandas <- This was missing*
Please see the user comment below to uptick them for finding this error for me.
Alternatively, you can continue reading.
The data can be downloaded from this link, yet I have added a sample dataset. The formatting of the file is not a typical CSV file and I believe this may have been an assessment piece and is related to Hidden Decision Tree article. I have given the portion of the code as it solves the issues surrounding the format of the text file as mentioned above and allows the user to rename the column.
The problem occured when I tried to assign create a re-naming function:
def change_column_names(as_pandas, old_name, new_name):
as_pandas.rename(columns={old_name: new_name}, inplace=)
However, it seem to work when I set the variable names inside rename function.
def change_column_names(as_pandas):
as_pandas.rename(columns={'Unique Pageviews': 'Page_Views'}, inplace=True)
return as_pandas
Sample Dataset
Title URL Date Unique Pageviews
oupUrl=tutorials 18-Apr-15 5608
"An Exclusive Interview with Data Expert, John Bottega" http://www.datasciencecentral.com/forum/topics/an-exclusive-interview-with-data-expert-john-bottega?groupUrl=announcements 10-Jun-14 360
Announcing Composable Analytics http://www.datasciencecentral.com/forum/topics/announcing-composable-analytics 15-Jun-14 367
Announcing the release of Spark 1.5 http://www.datasciencecentral.com/forum/topics/announcing-the-release-of-spark-1-5 12-Sep-15 156
Are Extreme Weather Events More Frequent? The Data Science Answer http://www.datasciencecentral.com/forum/topics/are-extreme-weather-events-more-frequent-the-data-science-answer 5-Oct-15 204
Are you interested in joining the University of California for an empiricalstudy on 'Big Data'? http://www.datasciencecentral.com/forum/topics/are-you-interested-in-joining-the-university-of-california-for-an 7-Feb-13 204
Are you smart enough to work at Google? http://www.datasciencecentral.com/forum/topics/are-you-smart-enough-to-work-at-google 11-Oct-15 3625
"As a software engineer, what's the best skill set to have for the next 5-10years?" http://www.datasciencecentral.com/forum/topics/as-a-software-engineer-what-s-the-best-skill-set-to-have-for-the- 12-Feb-16 2815
A Statistician's View on Big Data and Data Science (Updated) http://www.datasciencecentral.com/forum/topics/a-statistician-s-view-on-big-data-and-data-science-updated-1 21-May-14 163
A synthetic variance designed for Hadoop and big data http://www.datasciencecentral.com/forum/topics/a-synthetic-variance-designed-for-hadoop-and-big-data?groupUrl=research 26-May-14 575
A Tough Calculus Question http://www.datasciencecentral.com/forum/topics/a-tough-calculus-question 10-Feb-16 937
Attribution Modeling: Key Analytical Strategy to Boost Marketing ROI http://www.datasciencecentral.com/forum/topics/attribution-modeling-key-concept 24-Oct-15 937
Audience expansion http://www.datasciencecentral.com/forum/topics/audience-expansion 6-May-13 223
Automatic use of insights http://www.datasciencecentral.com/forum/topics/automatic-use-of-insights 27-Aug-15 122
Average length of dissertations by higher education discipline. http://www.datasciencecentral.com/forum/topics/average-length-of-dissertations-by-higher-education-discipline 4-Jun-15 1303
This is the full code that produces the Key Error:
def change_column_names(as_pandas):
as_pandas.rename(columns={'Unique Pageviews': 'Page_Views'}, inplace=True)
def change_column_names(as_pandas, old_name, new_name):
as_pandas.rename(columns={old_name: new_name}, inplace=True)
def change_column_names(as_pandas):
as_pandas.rename(columns={'Unique Pageviews': 'Page_Views'},
inplace=True)
def open_as_dataframe(file_name_in):
reader = pd.read_csv(file_name_in, encoding='windows-1251')
return reader
# Get each column of data including the heading and separate each element
i.e. Title, URL, Date, Page Views
# and save to string_of_rows with comma separator for storage as a csv
# file.
def get_columns_of_data(*args):
# Function that accept variable length arguments
string_of_rows = str()
num_cols = len(args)
try:
if num_cols > 0:
for number, element in enumerate(args):
if number == (num_cols - 1):
string_of_rows = string_of_rows + element + '\n'
else:
string_of_rows = string_of_rows + element + ','
except UnboundLocalError:
print('Empty file \'or\' No arguments received, cannot be zero')
return string_of_rows
def open_file(file_name):
try:
with open(file_name) as csv_file_in, open('HDT_data5.txt', 'w') as csv_file_out:
csv_read = csv.reader(csv_file_in, delimiter='\t')
for row in csv_read:
try:
row[0] = row[0].replace(',', '')
csv_file_out.write(get_columns_of_data(*row))
except TypeError:
continue
print("The file name '{}' was successfully opened and read".format(file_name))
except IOError:
print('File not found \'OR\' Not in current directory\n')
# All acronyms used in variable naming correspond to the function at time
# of return from function.
# csv_list being a list of the v file contents the remainder i.e. 'st' of
# csv_list_st = split_title().
def main():
open_file('HDTdata3.txt')
multi_sets = open_as_dataframe('HDT_data5.txt')
# change_column_names(multi_sets)
change_column_names(multi_set, 'Old_Name', 'New_Name')
print(multi_sets)
main()
I cleaned up your code so it would run. You were changing the column names but not returning the result. Try the following:
import pandas as pd
import numpy as np
import math
def set_new_columns(as_pandas):
titles_list = ['Year > 2014', 'Forum', 'Blog', 'Python', 'R',
'Machine_Learning', 'Data_Science', 'Data',
'Analytics']
for number, word in enumerate(titles_list):
as_pandas.insert(len(as_pandas.columns), titles_list[number], 0)
def title_length(as_pandas):
# Insert new column header then count the number of letters in 'Title'
as_pandas.insert(len(as_pandas.columns), 'Title_Length', 0)
as_pandas['Title_Length'] = as_pandas['Title'].map(str).apply(len)
# Although it is log, percentage of change is inverse linear comparison of
#logX1 - logX2
# therefore you could think of it as the percentage change in Page Views
# map
# function allows for function to be performed on all rows in column
# 'Page_Views'.
def log_page_view(as_pandas):
# Insert new column header
as_pandas.insert(len(as_pandas.columns), 'Log_Page_Views', 0)
as_pandas['Log_Page_Views'] = as_pandas['Page_Views'].map(lambda x: math.log(1 + float(x)))
def change_to_numeric(as_pandas):
# Check for missing values then convert the column to numeric.
as_pandas = as_pandas.replace(r'^\s*$', np.nan, regex=True)
as_pandas['Page_Views'] = pd.to_numeric(as_pandas['Page_Views'],
errors='coerce')
def change_column_names(as_pandas):
as_pandas.rename(columns={'Unique Pageviews': 'Page_Views'}, inplace=True)
return as_pandas
def open_as_dataframe(file_name_in):
reader = pd.read_csv(file_name_in, encoding='windows-1251')
return reader
# Get each column of data including the heading and separate each element
# i.e. Title, URL, Date, Page Views
# and save to string_of_rows with comma separator for storage as a csv
# file.
def get_columns_of_data(*args):
# Function that accept variable length arguments
string_of_rows = str()
num_cols = len(args)
try:
if num_cols > 0:
for number, element in enumerate(args):
if number == (num_cols - 1):
string_of_rows = string_of_rows + element + '\n'
else:
string_of_rows = string_of_rows + element + ','
except UnboundLocalError:
print('Empty file \'or\' No arguments received, cannot be zero')
return string_of_rows
def open_file(file_name):
import csv
try:
with open(file_name) as csv_file_in, open('HDT_data5.txt', 'w') as csv_file_out:
csv_read = csv.reader(csv_file_in, delimiter='\t')
for row in csv_read:
try:
row[0] = row[0].replace(',', '')
csv_file_out.write(get_columns_of_data(*row))
except TypeError:
continue
print("The file name '{}' was successfully opened and read".format(file_name))
except IOError:
print('File not found \'OR\' Not in current directory\n')
# All acronyms used in variable naming correspond to the function at time
# of return from function.
# csv_list being a list of the v file contents the remainder i.e. 'st' of
# csv_list_st = split_title().
def main():
open_file('HDTdata3.txt')
multi_sets = open_as_dataframe('HDT_data5.txt')
multi_sets = change_column_names(multi_sets)
change_to_numeric(multi_sets)
log_page_view(multi_sets)
title_length(multi_sets)
set_new_columns(multi_sets)
print(multi_sets)
main()

Is there a way to extract a key from a dictionary by using a filtering method

I set up a class of collection, and I created an object containing a dictionary.
I created a method called pluck(self, key) that should return a new Collection with all the values of the key I sent, and I'm used with another method's that I created before (map and filter - Both methods of collection).
class Collection(object):
def __init__(self,iterable = None):
# param iterable: imutable collection
if iterable == None:
self.Iterable = ()
else:
self.Iterable = tuple(iterable)
return None
def map(self, *callbacks):
'''
:param callbacks: List of function to apply on each element in 'self.Iterable'
:return: New mapped collection
'''
c =Collection(self.Iterable)
tmp = Collection()
for item in callbacks:
for item2 in c.Iterable:
tmp = tmp.append(item(item2))
c = Collection(tmp.Iterable)
return c
def filter(self, *callbacks):
'''
:param callbacks: List of function to apply on each element in 'self.Iterable'
:return: New filtered collection
'''
return Collection(item for item in self.Iterable if CallbacksFilter(item, callbacks) == True)
def CallbacksFilter(item, callback):
for f in callback:
if f(item) == False:
return False
return True
when I try to run pluck method:
def pluck(self, key):
return self.values() if type(self.first()) is not dict else Collection(self.Iterable).filter(self.map(lambda x, y: dict([(i,x[i]) for i in x if i in set(y)])))
c3 = Collection([{'name': 'Joe', 'age': 20}, {'name': 'Jane', 'age': 13}])
c3.pluck('age')
I expect to output " Collection(20,13)" but I'm getting this error:
TypeError: () missing 1 required positional argument: 'y'
How can I fix this error?
Note: If the internal elements are not dictionaries, then a copy of the current collection returned.
The method I wrote is incorrect and does not return any result, as I mentioned above.
def pluck(self, key):
return self.values() if type(self.first()) is not dict else Collection(self.Iterable).filter(self.map(lambda x, y: dict([(i,x[i]) for i in x if i in set(y)])))
The Map method returns the result by applying the lambda function given to it on each dictionary of the collection.
def map(self, *callbacks):
'''
:param callbacks: List of function to apply on each element in 'self.Iterable'
:return: New mapped collection
'''
c =Collection(self.Iterable)
tmp = Collection()
for item in callbacks:
for item2 in c.Iterable:
tmp = tmp.append(item(item2))
c = Collection(tmp.Iterable)
return c
So when we run the following code:
def pluck(self, key):
'''
:param key: Dictionary key (13)
:return: Return a new Collection with value of each key.
'''
return "Collection{}".format(Collection(self.map(lambda index: index[key])).Iterable)
c3 = Collection([{'name': 'Joe', 'age': 20}, {'name': 'Jane', 'age': 13}])
c3.pluck('age')
we get the right result:
Collection(20,13)

How to print a dictionary made up of lines from a file in python3?

Any help is much appreciated! Thanks
I have a dictionary made up of lines extracted from a file like this:
Danny Shalev, 050-1111111, aaa#aaa.com
Gil Rom, 050-2222222, bbb#bbb.com
Tal Yakir, 050-3333333, ccc#ccc.com
Edit: my goal is for the dict to be printed out like this:
Danny Shalev - 050-1111111 - aaa#aaa.com
Gil Rom - 050-2222222 - bbb#bbb.com
Tal Yakir - 050-3333333 - ccc#ccc.com
The first name is the key, and the rest are the values.
I have written the code for converting the file lines into a dict, and I want to print out all values from my dictionary in a specific format, which would be line by line, separated by "-". I have already written the function print_person, to print it out in this format, I just want to apply this function (from the previous class) into my dict.
Here's the code:
class Person:
def __init__(self, name, phone,email):
self.name = name
self.phone = phone
self.email = email
def print_person(self):
return (str(self.name)+" - "+str(self.phone)+" - "+str(self.email))
class AddressBook:
def __init__ (self):
self.contactsdict = {}
def add(self, newContact):
self.contactsdict[newContact.name] = newContact.phone + " - " + newContact.email
def search(self, name):
return (self.contactsdict.get(name))
def addFromFile(self, fileName):
f = open("contacts.txt")
for line in f:
(key, val, val2) = line.split(",")
self.contactsdict[key] = val + " - " + val2
f.close
def printAddressBook(self):
for key, val in self.contactsdict.items():
Person.print_person
address = AddressBook() # make an instance
p1=Person("Danny Shalev","050-1111111","aaa#aaa.com")
print (p1.print_person())
address.add(p1)
address.addFromFile("contacts.txt")
address.printAddressBook()
I believe the problem is in this section, since I don't know how to use the method:
def printAddressBook(self):
for key, val in self.contactsdict.items():
Person.print_person
This
for key, val in self.contactsdict.items():
Person.print_person
deconstructs all your dictionary entries into 2 variables, one the key, the other the value. The second line is incorrrect - Person is your class, you need an instance of the class to use the defined print method on it.
You can call val.print_person() on each instance of the class Person to print each IF you store Persons in your inner dictionary. Classes are "templates" how a class is constructed - the instance must be used to call its functions. Currently your code only stores string in the internal dictionary.
To add persons to your internal Dict replace
for line in f:
(key, val, val2) = line.split(",")
self.contactsdict[key] = val + " - " + val2
with
for line in f:
(key, val, val2) = line.split(",")
self.contactsdict[key] = Person(key,val,val2) # create instances of Persons
# and store them in the dictionary by there name
# you get collisions if your file contains persons with identical names
Fixed code (this and some other errors marked with comments):
class Person:
def __init__(self, name, phone,email):
self.name = name
self.phone = phone
self.email = email
def print_person(self):
return (str(self.name) + " - " + str(self.phone) + " - " + str(self.email))
class AddressBook:
def __init__(self):
self.contactsdict = {}
def add(self, newContact):
self.contactsdict[newContact.name] = newContact # store the Person instance
# under its name as key
def search(self, name):
return (self.contactsdict.get(name))
def addFromFile(self, fileName):
f = open("contacts.txt")
for line in f:
(key, val, val2) = line.split(",")
self.add(Person(key,val,val2)) # create Person and use own add-Function
# to add it to internal dictionary
f.close
def printAddressBook(self):
for key, val in self.contactsdict.items():
print( val.print_person() ) # you need to print the output
# of print_person() - it currently only
# returns the string and does not print it
address = AddressBook() # make an instance
p1 = Person("Danny Shalev","050-1111111","aaa#aaa.com")
print(p1.print_person())
address.add(p1)
address.addFromFile("contacts.txt")
address.printAddressBook()
Search returns a person, so you can use it to change persons inside your dict:
print("")
p = address.search("Danny Shalev")
p.name = "Hallo" # change the name of the found person (the key will still be "Danny Shalev")
address.printAddressBook()
Output:
Danny Shalev - 050-1111111 - aaa#aaa.com
Gil Rom - 050-2222222 - bbb#bbb.com
Tal Yakir - 050-3333333 - ccc#ccc.com
Hallo - 050-1111111 - aaa#aaa.com # after change of searched person
Gil Rom - 050-2222222 - bbb#bbb.com
Tal Yakir - 050-3333333 - ccc#ccc.com

Resources