Question:
names = ['Edogawa, Conan, 100', 'Kaitu, kid, 90', 'Mouri, Ran, 70']
I need to store these information in a dictionary like this:
{'Edogawa Conan': 100, 'Kaitu kid': 90, 'Mouri Ran:' 70}
I tried this code but it's too long and not efficient.
names1 = names[0].split(',')
names2 = names[1].split(',')
names3 = names[2].split(',')
names_dict = {}
names_dict[names1[0] + ' ' + names1[1]]= int(names1[2])
names_dict[names2[0] + ' ' + names2[1]]= int(names2[2])
names_dict[names3[0] + ' ' + names3[1]]= int(names2[2])
>>> dict((x + y, int(z)) for (x, y, z) in (w.split(',') for w in ['Edogawa, Conan, 100', 'Kaitu, kid, 90', 'Mouri, Ran, 70']))
{'Mouri Ran': 70, 'Edogawa Conan': 100, 'Kaitu kid': 90}
Related
I need to create a bar chart with time series values on the x-axis but they should be in layers, the example of the graph below.[![desired version][1]][1]
Right now I managed to get this for now: [![my version][2]][2]
[1]: https://i.stack.imgur.com/PQnc5.png
[2]: https://i.stack.imgur.com/1Z1aC.png
The code for the graph is here:
#creating traces for the bar chart trace_desks_popl = go.Bar(
x=df_estate_desks['Time Slot'],
y=df_estate_desks['Occupancy x Hour'],
name='Desks',
marker_color = '#26A2ED'
)
trace_meeting_popl = go.Bar(
x=df_estate_meet['Time Slot'],
y=df_estate_meet['Population x Hour'],
name='Meeting Rooms, Offices & Breakout Spaces',
marker_color = '#41C572'
)
#creating the stack bar chart data_estate_popl=[trace_desks_popl, trace_meeting_popl] layout_estate_popl= go.Layout(
barmode= 'stack',
title='Total Estate Population',
xaxis=dict(title='Time'),
yaxis=dict(title='Number of People'),
template="simple_white",
height = 700,
width=1000,
bargap=0.03
) fig_estate_popl= go.Figure(data=data_estate_popl, layout=layout_estate_popl)
#adding the line for average population avg_estate_popl = round((df_estate_meet['Population x Hour'] + df_estate_desks['Occupancy x Hour']).mean()) fig_estate_popl.layout.xaxis2 = go.layout.XAxis(overlaying='x', range=[0, 2], showticklabels=False) fig_estate_popl.add_scatter(x = [0, 2], y = [avg_estate_popl, avg_estate_popl], mode= 'lines+text', xaxis='x2', line_dash ='dot', marker_color = 'Grey',
text='Avg: ' + str(avg_estate_popl),
textposition='top left',
textfont=dict(color='Black'),
name='citations',
showlegend=False)
#adding the line for minimum population min_estate_popl = round((df_estate_meet['Population x Hour'] + df_estate_desks['Occupancy x Hour']).min()) fig_estate_popl.add_scatter(x = [0, 2], y = [min_estate_popl, min_estate_popl], mode= 'lines+text', xaxis='x2', line_dash ='dot', marker_color = 'Grey',
text='Min: ' + str(min_estate_popl),
textposition='top left',
textfont=dict(color='Black'),
name='citations',
showlegend=False)
#adding the line for maximum population max_estate_popl = round((df_estate_meet['Population x Hour'] + df_estate_desks['Occupancy x Hour']).max()) fig_estate_popl.add_scatter(x = [0, 2], y = [max_estate_popl, max_estate_popl], mode= 'lines+text', xaxis='x2', line_dash ='dot', marker_color = 'Grey',
text='Max: ' + str(max_estate_popl),
textposition='top left',
textfont=dict(color='Black'),
name='citations',
showlegend=False)
fig_estate_popl.show()
I can add the dictionary of the dataframes I used if needed. Thank you!
I have a dataset with images and another dataset as it's description:
There are a lot of pictures: people with and without sunglasses, smiles and other attributes. What I want to do is be able to add smiles to photos where people are not smiling.
I've started like this:
smile_ids = attrs['Smiling'].sort_values(ascending=False).iloc[100:125].index.values
smile_data = data[smile_ids]
no_smile_ids = attrs['Smiling'].sort_values(ascending=True).head(5).index.values
no_smile_data = data[no_smile_ids]
eyeglasses_ids = attrs['Eyeglasses'].sort_values(ascending=False).head(25).index.values
eyeglasses_data = data[eyeglasses_ids]
sunglasses_ids = attrs['Sunglasses'].sort_values(ascending=False).head(5).index.values
sunglasses_data = data[sunglasses_ids]
When I print them their are fine:
plot_gallery(smile_data, IMAGE_H, IMAGE_W, n_row=5, n_col=5, with_title=True, titles=smile_ids)
Plot gallery looks like this:
def plot_gallery(images, h, w, n_row=3, n_col=6, with_title=False, titles=[]):
plt.figure(figsize=(1.5 * n_col, 1.7 * n_row))
plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
for i in range(n_row * n_col):
plt.subplot(n_row, n_col, i + 1)
try:
plt.imshow(images[i].reshape((h, w, 3)), cmap=plt.cm.gray, vmin=-1, vmax=1, interpolation='nearest')
if with_title:
plt.title(titles[i])
plt.xticks(())
plt.yticks(())
except:
pass
Then I do:
def to_latent(pic):
with torch.no_grad():
inputs = torch.FloatTensor(pic.reshape(-1, 45*45*3))
inputs = inputs.to('cpu')
autoencoder.eval()
output = autoencoder.encode(inputs)
return output
def from_latent(vec):
with torch.no_grad():
inputs = vec.to('cpu')
autoencoder.eval()
output = autoencoder.decode(inputs)
return output
After that:
smile_latent = to_latent(smile_data).mean(axis=0)
no_smile_latent = to_latent(no_smile_data).mean(axis=0)
sunglasses_latent = to_latent(sunglasses_data).mean(axis=0)
smile_vec = smile_latent-no_smile_latent
sunglasses_vec = sunglasses_latent - smile_latent
And finally:
def add_smile(ids):
for id in ids:
pic = data[id:id+1]
latent_vec = to_latent(pic)
latent_vec[0] += smile_vec
pic_output = from_latent(latent_vec)
pic_output = pic_output.view(-1,45,45,3).cpu()
plot_gallery([pic,pic_output], IMAGE_H, IMAGE_W, n_row=1, n_col=2)
def add_sunglasses(ids):
for id in ids:
pic = data[id:id+1]
latent_vec = to_latent(pic)
latent_vec[0] += sunglasses_vec
pic_output = from_latent(latent_vec)
pic_output = pic_output.view(-1,45,45,3).cpu()
plot_gallery([pic,pic_output], IMAGE_H, IMAGE_W, n_row=1, n_col=2)
But when I execute this line I don't get any faces:
add_smile(no_smile_ids)
The output:
Could someone please explain where is my mistake or why it can happen? Thanks for any help.
Added: checking the shape of pic_output:
Wild guess, but it seems you are broadcasting your images instead of permuting the axes. The former will have the undesired effect of mixing information across the batches/channels.
pic_output = pic_output.view(-1, 45, 45, 3).cpu()
should be replaced with
pic_output = pic_output.permute(0, 2, 3, 1).cpu()
Assuming tensor pic_output is already shaped like (-1, 3, 45, 45).
I have the following strategy for creating dataframes with genomics data:
from hypothesis.extra.pandas import columns, data_frames, column
import hypothesis.strategies as st
def mysort(tp):
key = [-1, tp[1], tp[2], int(1e10)]
return [x for _, x in sorted(zip(key, tp))]
positions = st.integers(min_value=0, max_value=int(1e7))
strands = st.sampled_from("+ -".split())
chromosomes = st.sampled_from(elements=["chr{}".format(str(e)) for e in list(range(1, 23)) + "X Y M".split()])
genomics_data = data_frames(columns=columns(["Chromosome", "Start", "End", "Strand"], dtype=int),
rows=st.tuples(chromosomes, positions, positions, strands).map(mysort))
I am not really interested in empty dataframes as they are invalid, and I would also like to produce some really long dfs. How do I change the sizes of the dataframes created for test cases? I.e. min size 1, avg size large?
You can give the data_frames constructor an index argument which has min_size and max_size options:
from hypothesis.extra.pandas import data_frames, columns, range_indexes
import hypothesis.strategies as st
def mysort(tp):
key = [-1, tp[1], tp[2], int(1e10)]
return [x for _, x in sorted(zip(key, tp))]
chromosomes = st.sampled_from(["chr{}".format(str(e)) for e in list(range(1, 23)) + "X Y M".split()])
positions = st.integers(min_value=0, max_value=int(1e7))
strands = st.sampled_from("+ -".split())
dfs = data_frames(index=range_indexes(min_size=5), columns=columns("Chromosome Start End Strand".split(), dtype=int), rows=st.tuples(chromosomes, positions, positions, strands).map(mysort))
Produces dfs like:
Chromosome Start End Strand
0 chr11 1411202 8025685 +
1 chr18 902289 5026205 -
2 chr12 5343877 9282475 +
3 chr16 2279196 8294893 -
4 chr14 1365623 6192931 -
5 chr12 4602782 9424442 +
6 chr10 136262 1739408 +
7 chr15 521644 4861939 +
I was not expecting this error ("AttributeError: module 'plotly' has no attribute 'plot'") and have not been able to find the exact error. I am thinking its not the exact error because plotly obviously has plotting abilities and that somewhere along the way my data isnt formatted correctly, for this particular exercise.
I am open to suggestions on new methods. This is just what I used because it was easy to follow, its what I want in the end, and its centralized.
Error occurs on last line py.plot( fig, filename='d3-cloropleth-map' )
I have copied the code from the example: United States Choropleth Map
And here is my code:
import plotly as py
import pandas as pd
import numpy as np
py.tools.set_credentials_file(username='user', api_key='xxxXxxXxxx')
df = pd.DataFrame.from_csv("C:/Users/d/Documents/Personal/Python Scripts/Python/Connect.csv")
for col in df.columns:
df[col] = df[col].astype(str)
df[['Open Balance','Amount', 'Aging']] = df[['Open Balance','Amount',
'Aging']].apply(pd.to_numeric, errors='ignore')
df[['Date', 'Due Date']] = df[['Date','Due Date']].apply(pd.to_datetime)
state_total_byitem = df.groupby(by = ['State', 'Item']).agg({'Open Balance':'sum','Amount':'sum','Paid':'count','Aging':'mean'})
sti = state_total_byitem
sti.reset_index(level=['State', 'Item'], inplace=True)
for col in sti.columns:
sti[col] = sti[col].astype(str)
sti['text'] = 'State ' + sti['State'] + ' Item ' + sti['Item'] + '<br>' +\
' Open Balance ' + sti['Open Balance'] + ' Paid ' + sti['Paid'] + '<br>' +\
' Amount ' + sti['Amount'] + ' Aging ' + sti['Aging']
scl = [[0.0, 'rgb(220,224,225)'],[0.2, 'rgb(204,220,224)'],[0.4, 'rgb(158,192,200)'],\
[0.6, 'rgb(100,166,184)'],[0.8, 'rgb(60,175,206)'],[1.0, 'rgb(10,206,255)']]
data = [ dict(
type='choropleth',
colorscale = scl,
autocolorscale = False,
locations = sti['State'],
z = sti['Amount'].astype(float),
locationmode = 'USA-states',
text = sti['text'],
marker = dict(
line = dict (
color = 'rgb(255,255,255)',
width = 2
) ),
colorbar = dict(
title = "$ in USD")
) ]
layout = dict(
title = 'Invoices by State<br>(Hover for breakdown)',
geo = dict(
scope='usa',
projection=dict( type='albers usa' ),
showlakes = True,
lakecolor = 'rgb(255, 255, 255)'),
)
fig = dict( data=data, layout=layout )
py.plot( fig, filename='d3-cloropleth-map' )
Instead of
import plotly as py
You should have
import plotly.plotly as py
For future reference, try to get a MVCE before posting. Oftentimes it will help you find the error on your own.
I have some data,
calvarbyruno.1<-structure(list(Nominal = c(1, 3, 6, 10, 30, 50, 150, 250), Run = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
PAR = c(1.25000000000000e-05, 0.000960333333333333, 0.00205833333333334,
0.00423333333333333, 0.0322333333333334, 0.614433333333334,
1.24333333333333, 1.86333333333333), PredLin = c(-0.0119152187070942,
0.00375925114245899, 0.0272709559167888, 0.0586198956158952,
0.215364594111427, 0.372109292606959, 1.15583278508462, 1.93955627756228
), PredQuad = c(-0.0615895732702735, -0.0501563307416599,
-0.0330831368244257, -0.0104619953693943, 0.100190275883806,
0.20675348710041, 0.6782336426345, 1.04748729725370)), .Names = c("Nominal",
"Run", "PAR", "PredLin", "PredQuad"), row.names = c(NA, 8L), class = "data.frame")
calweight <- -2
for which I've created both a linear and a quadratic lm model
callin.1<-lm(PAR~Nominal,data=calvarbyruno.1,weight=Nominal^calweight)
calquad.1<-lm(PAR~Nominal+I(Nominal^2),data=calvarbyruno.1,weight=Nominal^calweight)
I can then plot my data values using ggplot2
qplot(PAR,Nominal,data=calvarbyruno.1)
But can't work out how to overlay a line representing the two lm objects... Any ideas ?
The easiest option is to use geom_smooth() and let ggplot2 fit the model for you.
ggplot(calvarbyruno.1, aes(y = PAR, x = Nominal, weight=Nominal^calweight)) +
geom_smooth(method = "lm") +
geom_smooth(method = "lm", formula = y ~ poly(x, 2), colour = "red") +
geom_point() +
coord_flip()
Or you can create a new dataset with the predicted values.
newdata <- data.frame(Nominal = pretty(calvarbyruno.1$Nominal, 100))
newdata$Linear <- predict(callin.1, newdata = newdata)
newdata$Quadratic <- predict(calquad.1, newdata = newdata)
require(reshape2)
newdata <- melt(newdata, id.vars = "Nominal", variable.name = "Model")
ggplot(calvarbyruno.1, aes(x = PAR, y = Nominal, weight=Nominal^calweight)) +
geom_line(data = newdata, aes(x = value, colour = Model)) +
geom_point()
Earlier I asked a related question and Hadley had this good answer. Using the predict function from that post you can add two columns to your data. One for each model:
calvarbyruno.1$calQuad <- predict(calquad.1)
calvarbyruno.1$callin <- predict(callin.1)
Then it's a matter of plotting the point and adding each model in as a line:
ggplot() +
geom_point(data=calvarbyruno.1, aes(PAR, Nominal), colour="green") +
geom_line(data=calvarbyruno.1, aes(calQuad, Nominal), colour="red" ) +
geom_line(data=calvarbyruno.1, aes(callin, Nominal), colour="blue" ) +
opts(aspect.ratio = 1)
And that results in this nice picture (yeah the colors could use some work):
(source: cerebralmastication.com)