Related
i'm trying to plot a hbar_stack with datetimes in x axis with no luck. i've done normal hbar plots with datetimes before with no problems so it's has to be something with the hbar_stack.
Here is the code with some static data:
start_date = datetime.datetime(2020, 7, 10, 10, 26, 15, 240666)
end_date = datetime.datetime(2020, 7, 10, 13, 27, 33, 741238)
tasks = ['task 1', 'task 2', 'task 3', 'task 4']
status = ['status_1', 'status_2', 'status_3', 'status_4']
exports = {'tasks': tasks, 'status_1': [datetime.datetime(2020, 7, 10, 13, 26, 59, 531234),
datetime.datetime(2020, 7, 10, 13, 25, 16, 666837),
datetime.datetime(2020, 7, 10, 10, 37, 16, 368927),
datetime.datetime(2020, 7, 10, 10, 26, 15, 240666)],
'status_2': [None, datetime.datetime(2020, 7, 10, 13, 27, 33, 741238),
datetime.datetime(2020, 7, 10, 11, 37, 7, 629667),
datetime.datetime(2020, 7, 10, 10, 27, 5, 540767)],
'status_3': [None, None, None, datetime.datetime(2020, 7, 10, 10, 54, 17, 738024)],
'status_4': [None, None, None, datetime.datetime(2020, 7, 10, 11, 2, 15, 196620)]}
p = figure(y_range=tasks, x_range=[start_date, end_date], x_axis_type='datetime', title="Tasks timeline",
tools=["hover,pan,reset,save,wheel_zoom"], tooltips=None)
p.xaxis.formatter = DatetimeTickFormatter(
days=["%m-%d-%Y"],
months=["%m-%d-%Y"],
years=["%m-%d-%Y"],
)
p.xaxis.major_label_orientation = radians(30)
p.hbar_stack(status, y='tasks', height=0.2, color=Spectral[11][:len(status)], source=ColumnDataSource(exports))
As one can see from the data the datetimes are minutes apart but it renders with years of difference. On hovering the data(x, y) the x value is not showing a date, instead it's showing a big number like 1.589e+12. Any help is appreciated.
enter image description here
dts = [
datetime(2020, 7, 10, 13, 26, 59, 531234),
datetime(2020, 7, 10, 13, 25, 16, 666837),
datetime(2020, 7, 10, 10, 37, 16, 368927),
datetime(2020, 7, 10, 10, 26, 15, 240666)
]
# because the datetimes are in reverse order
ends = dts[0:-1]
starts = dts[1:]
p = figure(plot_height=350, x_axis_type="datetime", y_range=["a", "b", "c"])
p.hbar(y=["b", "b", "b"], left=starts, right=ends,
line_color="white", fill_color=["red", "blue", "orange"])
p.xaxis.formatter.hours = ["%b %Y %H:%M"]
show(p)
which yields:
output of predicted_classes
array([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 4, 4, 2, 4, 4, 4, 4, 5, 4, 4, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 7, 7, 7, 7, 7, 7, 7, 13, 7, 7, 8, 11, 8, 8, 8,
11, 8, 11, 11, 8, 11, 9, 9, 9, 9, 9, 9, 9, 9, 8, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 8, 11, 11, 11,
11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 3, 13, 3,
3, 13, 13, 13, 14, 14, 14, 14, 14, 14, 2, 14, 14, 14, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 20, 16, 16,
17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 19, 19, 19, 19, 8, 19, 19, 19, 19, 19, 20, 20, 20, 20,
20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23,
23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25,
25, 25, 25, 25, 25])
output of y_true
0 0
1 0
2 0
3 0
4 0
..
255 25
256 25
257 25
258 25
259 25
Name: label, Length: 260, dtype: int64
I want to get the indices with this code, and getting this value error.
predicted_classes = model.predict_classes(X_test)
y_true = data_test.iloc[:, 0]
correct = np.nonzero(predicted_classes==y_true)[0]
incorrect = np.nonzero(predicted_classes!=y_true)[0]
trace of error
ValueError Traceback (most recent call last)
in
4 #get the indices to be plotted
5 y_true = data_test.iloc[:, 0]
----> 6 correct = np.nonzero(predicted_classes!=y_true)[0]
7 incorrect = np.nonzero(predicted_classes==y_true)[0]
in nonzero(*args, **kwargs)
//anaconda3/lib/python3.7/site-packages/numpy/core/fromnumeric.py in nonzero(a)
1894
1895 """
-> 1896 return _wrapfunc(a, 'nonzero')
1897
1898
//anaconda3/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
56 bound = getattr(obj, method, None)
57 if bound is None:
---> 58 return _wrapit(obj, method, *args, **kwds)
59
60 try:
//anaconda3/lib/python3.7/site-packages/numpy/core/fromnumeric.py in _wrapit(obj, method, *args, **kwds)
49 if not isinstance(result, mu.ndarray):
50 result = asarray(result)
---> 51 result = wrap(result)
52 return result
53
//anaconda3/lib/python3.7/site-packages/pandas/core/generic.py in __array_wrap__(self, result, context)
1916 return result
1917 d = self._construct_axes_dict(self._AXIS_ORDERS, copy=False)
-> 1918 return self._constructor(result, **d).__finalize__(self)
1919
1920 # ideally we would define this to avoid the getattr checks, but
//anaconda3/lib/python3.7/site-packages/pandas/core/series.py in __init__(self, data, index, dtype, name, copy, fastpath)
290 if len(index) != len(data):
291 raise ValueError(
--> 292 f"Length of passed values is {len(data)}, "
293 f"index implies {len(index)}."
294 )
ValueError: Length of passed values is 1, index implies 260.
Please let me know where I am going wrong.
A quick search reveals that an old version of the documentation advises to use .to_numpy().nonzero() as a replacement for Series.nonzero().
I have a list_of_data_lists that consists of:
[datetime.datetime(2019, 12, 22, 0, 49, 7), 26.3]
[datetime.datetime(2019, 12, 22, 2, 6, 24), 25.5]
[datetime.datetime(2019, 12, 22, 2, 36, 57), 26.7]
[datetime.datetime(2019, 12, 22, 3, 2, 49), 25.7]
[datetime.datetime(2019, 12, 22, 5, 26, 30), 27.9]
[datetime.datetime(2019, 12, 22, 5, 32, 4), 27.8]
[datetime.datetime(2019, 12, 22, 5, 50, 23), 23.5]
[datetime.datetime(2019, 12, 22, 6, 31, 3), 27.7]
[datetime.datetime(2019, 12, 22, 8, 9, 4), 26.1]
[datetime.datetime(2019, 12, 22, 9, 30, 40), 27.1]
[datetime.datetime(2019, 12, 22, 12, 27, 49), 26.5]
[datetime.datetime(2019, 12, 22, 16, 6, 10), 24.1]
[datetime.datetime(2019, 12, 22, 17, 21, 35), 26.3]
[datetime.datetime(2019, 12, 22, 17, 28, 54), 28.0]
[datetime.datetime(2019, 12, 22, 18, 4, 52), 26.7]
[datetime.datetime(2019, 12, 22, 22, 23, 24), 23.5]
[datetime.datetime(2019, 12, 22, 22, 51, 10), 26.0]
[datetime.datetime(2019, 12, 22, 23, 30, 14), 23.6]
And want to put this into pandas dataframe, under columns date_and_time and data_value for data_list[0] and data_list[1] in the list_of_data_lists correspond to. How can this be done?
IIUC, just pass list_of_data_lists to dataframe constructor
import datetime
list_of_data_lists = [
[datetime.datetime(2019, 12, 22, 0, 49, 7), 26.3],
[datetime.datetime(2019, 12, 22, 2, 6, 24), 25.5],
[datetime.datetime(2019, 12, 22, 2, 36, 57), 26.7],
[datetime.datetime(2019, 12, 22, 3, 2, 49), 25.7],
[datetime.datetime(2019, 12, 22, 5, 26, 30), 27.9],
[datetime.datetime(2019, 12, 22, 5, 32, 4), 27.8],
[datetime.datetime(2019, 12, 22, 5, 50, 23), 23.5],
[datetime.datetime(2019, 12, 22, 6, 31, 3), 27.7],
[datetime.datetime(2019, 12, 22, 8, 9, 4), 26.1],
[datetime.datetime(2019, 12, 22, 9, 30, 40), 27.1],
[datetime.datetime(2019, 12, 22, 12, 27, 49), 26.5],
[datetime.datetime(2019, 12, 22, 16, 6, 10), 24.1],
[datetime.datetime(2019, 12, 22, 17, 21, 35), 26.3],
[datetime.datetime(2019, 12, 22, 17, 28, 54), 28.0],
[datetime.datetime(2019, 12, 22, 18, 4, 52), 26.7],
[datetime.datetime(2019, 12, 22, 22, 23, 24), 23.5],
[datetime.datetime(2019, 12, 22, 22, 51, 10), 26.0],
[datetime.datetime(2019, 12, 22, 23, 30, 14), 23.6],
]
df = pd.DataFrame(list_of_data_lists, columns=['date_and_time', 'data_value'])
Out[155]:
date_and_time data_value
0 2019-12-22 00:49:07 26.3
1 2019-12-22 02:06:24 25.5
2 2019-12-22 02:36:57 26.7
3 2019-12-22 03:02:49 25.7
4 2019-12-22 05:26:30 27.9
5 2019-12-22 05:32:04 27.8
6 2019-12-22 05:50:23 23.5
7 2019-12-22 06:31:03 27.7
8 2019-12-22 08:09:04 26.1
9 2019-12-22 09:30:40 27.1
10 2019-12-22 12:27:49 26.5
11 2019-12-22 16:06:10 24.1
12 2019-12-22 17:21:35 26.3
13 2019-12-22 17:28:54 28.0
14 2019-12-22 18:04:52 26.7
15 2019-12-22 22:23:24 23.5
16 2019-12-22 22:51:10 26.0
17 2019-12-22 23:30:14 23.6
I have this nested list:
a = [[1, 3, 6, 11, 16, 21, 25, 28, 31, 32, 33, 34, 35, 36],
[1, 2, 5, 9, 15, 20, 24, 26, 30, 36],
[1, 3, 6, 11, 16, 21, 25, 29, 31, 32, 33, 34, 35, 36],
[1, 2, 4, 8, 14, 18, 23, 36],
[1, 2, 5, 9, 15, 20, 24, 27, 30, 36],
[1, 3, 6, 11, 16, 22, 25, 28, 31, 32, 33, 34, 35, 36],
[1, 3, 7, 12, 17, 36],
[1, 2, 4, 8, 14, 19, 23, 36],
[1, 2, 5, 10, 15, 20, 24, 26, 30, 36],
[1, 3, 6, 11, 16, 22, 25, 29, 31, 32, 33, 34, 35, 36],
[1, 2, 5, 10, 15, 20, 24, 27, 30, 36],
[1, 3, 6, 11, 16, 21, 25, 28, 31, 32, 33, 35, 36],
[1, 3, 6, 11, 16, 21, 25, 28, 31, 33, 34, 35,36],
[1, 3, 6, 11, 16, 21, 25, 29, 31, 32, 33, 35, 36]]
I need to choose max length of sublist in nested list, than compare item of sublist with nested list. If item in sublist equal then same item in nested list remove and in final print nested list without this item.
I hope I understand your question correctly.
You want input to be:
a = [[1, 3, 6, 11, 16, 21, 25, 28, 31, 32, 33, 34, 35, 36],
[1, 2, 5, 9, 15, 20, 24, 26, 30, 36],
[1, 3, 6, 11, 16, 21, 25, 29, 31, 32, 33, 34, 35, 36],
[1, 2, 4, 8, 14, 18, 23, 36],
[1, 2, 5, 9, 15, 20, 24, 27, 30, 36],
[1, 3, 6, 11, 16, 22, 25, 28, 31, 32, 33, 34, 35, 36],
[1, 3, 7, 12, 17, 36],
[1, 2, 4, 8, 14, 19, 23, 36],
[1, 2, 5, 10, 15, 20, 24, 26, 30, 36],
[1, 3, 6, 11, 16, 22, 25, 29, 31, 32, 33, 34, 35, 36],
[1, 2, 5, 10, 15, 20, 24, 27, 30, 36],
[1, 3, 6, 11, 16, 21, 25, 28, 31, 32, 33, 35, 36],
[1, 3, 6, 11, 16, 21, 25, 28, 31, 33, 34, 35, 36],
[1, 3, 6, 11, 16, 21, 25, 29, 31, 32, 33, 35, 36]]
We are removing
[1, 3, 6, 11, 16, 22, 25, 29, 31, 32, 33, 34, 35, 36]
and
[1, 3, 6, 11, 16, 21, 25, 29, 31, 32, 33, 34, 35, 36]
since they are of the same length.
The output should be:
a = [[1, 2, 5, 9, 15, 20, 24, 26, 30, 36],
[1, 2, 4, 8, 14, 18, 23, 36],
[1, 2, 5, 9, 15, 20, 24, 27, 30, 36],
[1, 3, 7, 12, 17, 36],
[1, 2, 4, 8, 14, 19, 23, 36],
[1, 2, 5, 10, 15, 20, 24, 26, 30, 36],
[1, 2, 5, 10, 15, 20, 24, 27, 30, 36],
[1, 3, 6, 11, 16, 21, 25, 28, 31, 32, 33, 35, 36],
[1, 3, 6, 11, 16, 21, 25, 28, 31, 33, 34, 35, 36],
[1, 3, 6, 11, 16, 21, 25, 29, 31, 32, 33, 35, 36]]
with the previous lists removed.
Your question was not worded clearly, but I hope this is what you wanted. Here is the code:
# assume a is not empty
d = {} # list of the max length -> number of occurrences in 2d array
# find the length of the longest list
maxLen = len(a[0])
for l in a:
if len(l) > maxLen:
maxLen = len(l)
# add lists of the same max length and their count to the dictionary
for l in a:
if len(l) == maxLen:
#convert list to string because python does not support list being key of a dictionary
l_string = str(l)
if l_string in d:
d[l_string] += 1
else:
d[l_string] = 1
# remove
for l_string in d:
while d[l_string] > 0:
# convert string back to list and remove
a.remove(eval(l_string))
d[l_string] -= 1
# test result if you want
for row in a:
print(row)
I'm trying to build my own speech recognition network. I understood how to pre-process audio. But I can't figure out the pre-processing of the text.
I have a alphabet:
alphabet = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14,'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}
And I encode each letter of the sentence into a number (27 is a space):
array([list([27, 23, 8, 5, 14, 27, 8, 5, 27, 19, 16, 5, 1, 11, 19, 27, 9, 14, 27, 15, 21, 18, 27, 12, 1, 14, 7, 21, 1, 7, 5, 27, 9, 27, 3, 1, 14, 27, 9, 14, 20, 5, 18, 16, 18, 5, 20, 27, 23, 8, 1, 20, 27, 8, 5, 27, 8, 1, 19, 27, 19, 1, 9, 4, 27]),
list([27, 19, 15, 27, 14, 15, 23, 27, 9, 27, 6, 5, 1, 18, 27, 14, 15, 20, 8, 9, 14, 7, 27, 2, 5, 3, 1, 21, 19, 5, 27, 9, 20, 27, 23, 1, 19, 27, 20, 8, 15, 19, 5, 27, 15, 13, 5, 14, 19, 27, 20, 8, 1, 20, 27, 2, 18, 15, 21, 7, 8, 20, 27, 25, 15, 21, 27, 20, 15, 27, 13, 5, 27]),
list([27, 14, 9, 7, 8, 20, 27, 6, 5, 12, 12, 27, 1, 14, 4, 27, 1, 14, 27, 1, 19, 19, 15, 18, 20, 13, 5, 14, 20, 27, 15, 6, 27, 6, 9, 7, 8, 20, 9, 14, 7, 27, 13, 5, 14, 27, 1, 14, 4, 27, 13, 5, 18, 3, 8, 1, 14, 20, 19, 27, 5, 14, 20, 5, 18, 5, 4, 27, 1, 14, 4, 27, 5, 24, 9, 20, 5, 4, 27, 20, 8, 5, 27, 20, 5, 14, 20, 27]),
list([27, 9, 27, 8, 5, 1, 18, 4, 27, 1, 27, 6, 1, 9, 14, 20, 27, 13, 15, 22, 5, 13, 5, 14, 20, 27, 21, 14, 4, 5, 18, 27, 13, 25, 27, 6, 5, 5, 20, 27]),
list([27, 25, 15, 21, 27, 3, 1, 13, 5, 27, 19, 15, 27, 20, 8, 1, 20, 27, 25, 15, 21, 27, 3, 15, 21, 12, 4, 27, 12, 5, 1, 18, 14, 27, 1, 2, 15, 21, 20, 27, 25, 15, 21, 18, 27, 4, 18, 5, 1, 13, 19, 27, 19, 1, 9, 4, 27, 20, 8, 5, 27, 15, 12, 4, 27, 23, 15, 13, 1, 14, 27])],
dtype=object)
Here are 5 sentences.
I just create one network layer and try to transfer this data there in order to get a number corresponding to the letter.
model = Sequential()
model.add(Dense(27, input_shape=(20,), activation='softmax'))
model.compile(loss='mean_squared_error',optimizer='Adam', metrics=['accuracy'])
for X, y in batch(X_train, y_train, 5):
model.train_on_batch(X, y)
batch() just breaks X_train, y_train into batch.
5 is size of batch.
But when I try to start the network I get an error
Error when checking target: expected dense_25 to have shape (27,) but got array with shape (1,)
UPD:
I'm using MFCC for X
audio, sr = librosa.load(pathTrain+"\\"+str(file), mono=True, sr=None)
fileMFCC = librosa.feature.mfcc(audio)
mean_scale = np.mean(fileMFCC, axis=0)
std_scale = np.std(fileMFCC, axis=0)
fileMFCC = (fileMFCC - mean_scale[np.newaxis, :]) / std_scale[np.newaxis, :]
X is
[array([[-4.35889894, -4.35889894, -4.35455134, ..., -3.95851777,
-3.99308173, -4.05261022],
[ 0.22941573, 0.22941573, 0.31913073, ..., 1.87189324,
1.7987301 , 1.66804349],
[ 0.22941573, 0.22941573, 0.31165866, ..., -0.27962786,
-0.19009062, -0.13788484],
...,
[ 0.22941573, 0.22941573, 0.18657944, ..., 0.14699792,
0.12751924, 0.16724807],
[ 0.22941573, 0.22941573, 0.18478513, ..., 0.00674492,
-0.04570105, 0.01231168],
[ 0.22941573, 0.22941573, 0.18232521, ..., 0.2571599 ,
0.22477036, 0.09153304]])
etc.