IllegalArgumentException error for running a pyspark mllib example - apache-spark

I am following the Spark MLexample here,
from pyspark.mllib.linalg import Vectors
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.param import Param, Params
# Prepare training data from a list of (label, features) tuples.
training = sqlContext.createDataFrame([
(1.0, Vectors.dense([0.0, 1.1, 0.1])),
(0.0, Vectors.dense([2.0, 1.0, -1.0])),
(0.0, Vectors.dense([2.0, 1.3, 1.0])),
(1.0, Vectors.dense([0.0, 1.2, -0.5]))], ["label", "features"])
# Create a LogisticRegression instance. This instance is an Estimator.
lr = LogisticRegression(maxIter=10, regParam=0.01)
# Print out the parameters, documentation, and any default values.
print "LogisticRegression parameters:\n" + lr.explainParams() + "\n"
# Learn a LogisticRegression model. This uses the parameters stored in lr.
model1 = lr.fit(training)
However, model1 = lr.fit(training) gives the following error message.
---------------------------------------------------------------------------
IllegalArgumentException Traceback (most recent call last)
<ipython-input-14-3e398ce8c8bd> in <module>
1 # Learn a LogisticRegression model. This uses the parameters stored in lr.
----> 2 model1 = lr.fit(training)
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\base.py in fit(self, dataset, params)
127 return self.copy(params)._fit(dataset)
128 else:
--> 129 return self._fit(dataset)
130 else:
131 raise ValueError("Params must be either a param map or a list/tuple of param maps, "
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit(self, dataset)
319
320 def _fit(self, dataset):
--> 321 java_model = self._fit_java(dataset)
322 model = self._create_model(java_model)
323 return self._copyValues(model)
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\ml\wrapper.py in _fit_java(self, dataset)
316 """
317 self._transfer_params_to_java()
--> 318 return self._java_obj.fit(dataset._jdf)
319
320 def _fit(self, dataset):
C:\spark\spark-3.0.2-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py in __call__(self, *args)
1303 answer = self.gateway_client.send_command(command)
1304 return_value = get_return_value(
-> 1305 answer, self.gateway_client, self.target_id, self.name)
1306
1307 for temp_arg in temp_args:
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\sql\utils.py in deco(*a, **kw)
132 # Hide where the exception came from that shows a non-Pythonic
133 # JVM exception message.
--> 134 raise_from(converted)
135 else:
136 raise
C:\spark\spark-3.0.2-bin-hadoop2.7\python\pyspark\sql\utils.py in raise_from(e)
IllegalArgumentException: requirement failed: Column features must be of type struct<type:tinyint,size:int,indices:array<int>,values:array<double>> but was actually struct<type:tinyint,size:int,indices:array<int>,values:array<double>>.

Related

Colab IndexError: Target 255 is out of bounds

I'm trying to perform an image semantic segmentation (segment mining fields) using lightning-flash. My images are all RGB/uint8/512x512 and the masks are L/uint8/512x512.
When I run the code, I get an error when fitting.
My code is this one:
import torch
import flash
from flash.image import SemanticSegmentation, SemanticSegmentationData
import os
from google.colab import drive
import ssl
drive.mount("/content/drive")
DATA_DIR = '/content/drive/MyDrive/data/'
x_train_dir = os.path.join(DATA_DIR, 'train_images')
y_train_dir = os.path.join(DATA_DIR, 'train_masks')
x_valid_dir = os.path.join(DATA_DIR, 'val_images')
y_valid_dir = os.path.join(DATA_DIR, 'val_masks')
x_test_dir = os.path.join(DATA_DIR, 'test_images')
y_test_dir = os.path.join(DATA_DIR, 'test_masks')
datamodule = SemanticSegmentationData.from_folders(
train_folder=x_train_dir,
train_target_folder=y_train_dir,
val_folder=x_valid_dir,
val_target_folder=y_valid_dir,
test_folder=x_test_dir,
test_target_folder=y_test_dir,
transform_kwargs=dict(image_size=(256, 256)),
num_classes=1,
batch_size=16,
)
#avoid ssl error
ssl._create_default_https_context = ssl._create_unverified_context
model = SemanticSegmentation(
head="unetplusplus",
backbone="densenet169",
pretrained="imagenet",
num_classes=datamodule.num_classes
)
GPUS = torch.cuda.device_count()
if GPUS > 0:
trainer = flash.Trainer(max_epochs=2, gpus=torch.cuda.device_count())
else:
trainer = flash.Trainer(max_epochs=2)
trainer.finetune(model, datamodule=datamodule, strategy="freeze")
trainer.save_checkpoint("semantic_segmentation_model.pt")
When I run the code, I get this error:
IndexError Traceback (most recent call last)
<ipython-input-7-11e2ce087ca0> in <module>
6
7 #trainer.fit(model, datamodule=datamodule)
----> 8 trainer.finetune(model, datamodule=datamodule, strategy="freeze")
9 trainer.save_checkpoint("semantic_segmentation_model.pt")
19 frames
/usr/local/lib/python3.7/dist-packages/flash/core/trainer.py in finetune(self, model, train_dataloader, val_dataloaders, datamodule, strategy, train_bn)
162 """
163 self._resolve_callbacks(model, strategy, train_bn=train_bn)
--> 164 return super().fit(model, train_dataloader, val_dataloaders, datamodule)
165
166 def predict(
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in fit(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
695 self.strategy.model = model
696 self._call_and_handle_interrupt(
--> 697 self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
698 )
699
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _call_and_handle_interrupt(self, trainer_fn, *args, **kwargs)
648 return self.strategy.launcher.launch(trainer_fn, *args, trainer=self, **kwargs)
649 else:
--> 650 return trainer_fn(*args, **kwargs)
651 # TODO(awaelchli): Unify both exceptions below, where `KeyboardError` doesn't re-raise
652 except KeyboardInterrupt as exception:
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
735 ckpt_path, model_provided=True, model_connected=self.lightning_module is not None
736 )
--> 737 results = self._run(model, ckpt_path=self.ckpt_path)
738
739 assert self.state.stopped
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run(self, model, ckpt_path)
1166 self._checkpoint_connector.resume_end()
1167
-> 1168 results = self._run_stage()
1169
1170 log.detail(f"{self.__class__.__name__}: trainer tearing down")
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run_stage(self)
1252 if self.predicting:
1253 return self._run_predict()
-> 1254 return self._run_train()
1255
1256 def _pre_training_routine(self):
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run_train(self)
1274
1275 with isolate_rng():
-> 1276 self._run_sanity_check()
1277
1278 # enable train mode
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _run_sanity_check(self)
1343 # run eval step
1344 with torch.no_grad():
-> 1345 val_loop.run()
1346
1347 self._call_callback_hooks("on_sanity_check_end")
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/loop.py in run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/dataloader/evaluation_loop.py in advance(self, *args, **kwargs)
153 if self.num_dataloaders > 1:
154 kwargs["dataloader_idx"] = dataloader_idx
--> 155 dl_outputs = self.epoch_loop.run(self._data_fetcher, dl_max_batches, kwargs)
156
157 # store batch level output per dataloader
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/loop.py in run(self, *args, **kwargs)
198 try:
199 self.on_advance_start(*args, **kwargs)
--> 200 self.advance(*args, **kwargs)
201 self.on_advance_end()
202 self._restarting = False
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py in advance(self, data_fetcher, dl_max_batches, kwargs)
141
142 # lightning module methods
--> 143 output = self._evaluation_step(**kwargs)
144 output = self._evaluation_step_end(output)
145
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py in _evaluation_step(self, **kwargs)
238 """
239 hook_name = "test_step" if self.trainer.testing else "validation_step"
--> 240 output = self.trainer._call_strategy_hook(hook_name, *kwargs.values())
241
242 return output
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _call_strategy_hook(self, hook_name, *args, **kwargs)
1704
1705 with self.profiler.profile(f"[Strategy]{self.strategy.__class__.__name__}.{hook_name}"):
-> 1706 output = fn(*args, **kwargs)
1707
1708 # restore current_fx when nested context
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/strategies/strategy.py in validation_step(self, *args, **kwargs)
368 with self.precision_plugin.val_step_context():
369 assert isinstance(self.model, ValidationStep)
--> 370 return self.model.validation_step(*args, **kwargs)
371
372 def test_step(self, *args: Any, **kwargs: Any) -> Optional[STEP_OUTPUT]:
/usr/local/lib/python3.7/dist-packages/flash/image/segmentation/model.py in validation_step(self, batch, batch_idx)
151 def validation_step(self, batch: Any, batch_idx: int) -> Any:
152 batch = (batch[DataKeys.INPUT], batch[DataKeys.TARGET])
--> 153 return super().validation_step(batch, batch_idx)
154
155 def test_step(self, batch: Any, batch_idx: int) -> Any:
/usr/local/lib/python3.7/dist-packages/flash/core/model.py in validation_step(self, batch, batch_idx)
423
424 def validation_step(self, batch: Any, batch_idx: int) -> None:
--> 425 output = self.step(batch, batch_idx, self.val_metrics)
426 log_kwargs = {"batch_size": output.get(OutputKeys.BATCH_SIZE, None)} if _PL_GREATER_EQUAL_1_5_0 else {}
427 self.log_dict(
/usr/local/lib/python3.7/dist-packages/flash/core/model.py in step(self, batch, batch_idx, metrics)
360 output = {OutputKeys.OUTPUT: y_hat}
361 y_hat = self.to_loss_format(output[OutputKeys.OUTPUT])
--> 362 losses = {name: l_fn(y_hat, y) for name, l_fn in self.loss_fn.items()}
363
364 y_hat = self.to_metrics_format(output[OutputKeys.OUTPUT])
/usr/local/lib/python3.7/dist-packages/flash/core/model.py in <dictcomp>(.0)
360 output = {OutputKeys.OUTPUT: y_hat}
361 y_hat = self.to_loss_format(output[OutputKeys.OUTPUT])
--> 362 losses = {name: l_fn(y_hat, y) for name, l_fn in self.loss_fn.items()}
363
364 y_hat = self.to_metrics_format(output[OutputKeys.OUTPUT])
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in cross_entropy(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)
3012 if size_average is not None or reduce is not None:
3013 reduction = _Reduction.legacy_get_string(size_average, reduce)
-> 3014 return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
3015
3016
IndexError: Target 255 is out of bounds.
How can I solve this problem? I researched others issues on stackoverflow and they were all related to the number of classes. But in my case, I only want to segment mining fields.

K-NN in Pyspark

The following code for poker data set has been coded as for to classify the poker data set having 10 features(all numeric) and 10 class label(all numeric). I have used the sklearn's K-NN function in Pyspark with custom distance function. It throws an error while broadcasting K-NN model and predicting the test label. When I do not use a custom function it is not showing any error. Why is this happening?
x=sc.textFile("/home/ritesh/Spark/poker100.txt")
def parseLine(line):
cols = line.split(',') # split the txt file with ','
# label is the last column
label = cols[-1]
# vector is every column, except the label
vector = cols[:-1]
vector = [element for i, element in enumerate(vector) ]
# convert each value from string to float
vector = np.array(vector, dtype=np.float)
vector=vector.tolist()
return (label, vector)
x= x.map(parseLine)
train,test=x.randomSplit([0.7,0.3],seed=100)
train=train.map(lambda x: (x[0], x[1]))
test=test.map(lambda x: (x[0],x[1]))
X=train.map(lambda x: x[1])
#collect traing data
X=X.collect()
Y=train.map(lambda x: x[0])
#collect training label
Y=Y.collect()
y=test.map(lambda x: x[0])
# collect testing label
y=y.collect()
import math
def dist(x,y):#Euc. distance function to calculate distance between training and testing data
return np.sqrt(np.sum((x-y)**2))
import numpy as np
from sklearn.neighbors.ball_tree import BallTree
BallTree.valid_metrics
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
knn=KNeighborsClassifier(n_neighbors=3,algorithm='ball_tree', metric= dist)
model=knn.fit(X,Y) # fit KNN model
model=sc.broadcast(model)
testdata=test.map(lambda x: model.value.predict(np.array(x[1],dtype="float64").reshape(1,-1))) #predict test data
y_pred=testdata.collect()
on running it gives error:
Py4JJavaError
Traceback (most recent call last)
<ipython-input-113-a20ddffd3048> in <module>()
1 model=sc.broadcast(model)
2 testdata=test.map(lambda x: model.value.predict(np.array(x[1],dtype="float64").reshape(1,-1)))
----> 3 y_pred=testdata.collect()
/apps/spark-2.4.3/python/pyspark/rdd.py in collect(self)
814 """
815 with SCCallSiteSync(self.context) as css:
--> 816 sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
817 return list(_load_from_socket(sock_info, self._jrdd_deserializer))
818
/apps/spark-2.4.3/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/apps/spark-2.4.3/python/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
/apps/spark-2.4.3/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 43.0 failed 1 times, most recent failure: Lost task 1.0 in stage 43.0 (TID 87, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/serializers.py", line 393, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "<ipython-input-113-a20ddffd3048>", line 2, in <lambda>
File "/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/broadcast.py", line 148, in value
self._value = self.load_from_path(self._path)
File "/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/broadcast.py", line 125, in load_from_path
return self.load(f)
File "/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/broadcast.py", line 131, in load
return pickle.load(file)
AttributeError: Can't get attribute 'dist' on <module 'pyspark.daemon' from '/apps/spark-2.4.3/python/lib/pyspark.zip/pyspark/daemon.py'>

keras.backend is not defined for variational autoencoder model

I created a Variational Autoencoder model. To do the sampling, I created the following method:
from keras import backend as k
def sampling(args):
z_mean , z_log_var=args
batch=k.shape(z_mean)[0]
dim=k.int_shape(z_mean)[1]
epsilon=k.random_normal(shape=(batch,dim))
return z_mean + k.exp(0.5 * z_log_var) * epsilon
this is the model architecture:
def create_variationalModel(original_dim):
input_shape=(original_dim,)
intermidiate_dim=58
batch_size=10
latent_dim=3
epochs=100
inputs=Input(shape=input_shape,name="encoder_input")
x= Dense(units=original_dim,activation="tanh")(inputs)
x= Dense(units=int(original_dim/2),activation="tanh")(x)
x1= Dense(units=int(original_dim/4),activation="tanh")(x)
x2= Dense(units=int(original_dim/8),activation="tanh")(x1)
x3= Dense(units=10,activation="tanh")(x2)
z_mean=Dense(latent_dim,name="z_mean")(x3)
z_log_var=Dense(latent_dim,name="z_log_var")(x3)
z=Lambda(sampling,output_shape=(latent_dim,),name="z")([z_mean,z_log_var])
encoder=Model(inputs,[z_mean,z_log_var,z],name="encoder")
encoder.summary()
latent_inputs=Input(shape=(latent_dim,),name="z_sampling")
x= Dense(units=10,activation="tanh")(latent_inputs)
x1=Dense(units=int(original_dim/8),activation="tanh")(x)
x2=Dense(units=int(original_dim/4),activation="tanh")(x1)
x3=Dense(units=int(original_dim/2),activation="tanh")(x2)
x3=Dense(units=original_dim,activation="tanh")(x3)
outputs=Dense(units=original_dim,activation="sigmoid")(x3)
decoder=Model(latent_inputs,outputs,name="decoder")
decoder.summary()
outputs=decoder(encoder(inputs)[2])
vae = Model(inputs,outputs,name="vae_mlp")
reconstruction_loss=mse(inputs,outputs)
reconstruction_loss*=original_dim
kl_loss = 1 + z_log_var -k.square(z_mean) - k.exp(z_log_var)
kl_loss=k.sum(kl_loss,axis=-1)
kl_loss*=-0.5
vae_loss=k.mean(reconstruction_loss+kl_loss)
vae.add_loss(vae_loss)
plot_model(vae,to_file='vae.png',show_shapes=True)
vae.compile(optimizer=RMSprop(),loss="mean_squared_error",metrics=["mae"])
return vae
the after training the model and test it, I decide to save it like this:
vae.save("./models/vae.h5")
but when I tried to load the model like this:
model = load_model("./models/vae.h5")
I have this issue:
--------------------------------------------------------------------------- NameError Traceback (most recent call
last) in
1 #load model
----> 2 model = load_model("./models/vae.h5")
3 # summarize model.
4 model.summary()
5 with open("./models/LabelEncoders_dic.pickle","rb") as f:
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/saving.py
in load_wrapper(*args, **kwargs)
490 os.remove(tmp_filepath)
491 return res
--> 492 return load_function(*args, **kwargs)
493
494 return load_wrapper
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/saving.py
in load_model(filepath, custom_objects, compile)
582 if H5Dict.is_supported_type(filepath):
583 with H5Dict(filepath, mode='r') as h5dict:
--> 584 model = _deserialize_model(h5dict, custom_objects, compile)
585 elif hasattr(filepath, 'write') and callable(filepath.write):
586 def load_function(h5file):
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/saving.py
in _deserialize_model(h5dict, custom_objects, compile)
272 raise ValueError('No model found in config.')
273 model_config = json.loads(model_config.decode('utf-8'))
--> 274 model = model_from_config(model_config, custom_objects=custom_objects)
275 model_weights_group = h5dict['model_weights']
276
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/saving.py
in model_from_config(config, custom_objects)
625 'Sequential.from_config(config)?')
626 from ..layers import deserialize
--> 627 return deserialize(config, custom_objects=custom_objects)
628
629
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/layers/init.py
in deserialize(config, custom_objects)
163 globs['Model'] = models.Model
164 globs['Sequential'] = models.Sequential
--> 165 return deserialize_keras_object(config,
166 module_objects=globs,
167 custom_objects=custom_objects,
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/utils/generic_utils.py
in deserialize_keras_object(identifier, module_objects,
custom_objects, printable_module_name)
142 custom_objects = custom_objects or {}
143 if has_arg(cls.from_config, 'custom_objects'):
--> 144 return cls.from_config(
145 config['config'],
146 custom_objects=dict(list(_GLOBAL_CUSTOM_OBJECTS.items()) +
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/network.py
in from_config(cls, config, custom_objects) 1054 # First,
we create all layers and enqueue nodes to be processed 1055
for layer_data in config['layers']:
-> 1056 process_layer(layer_data) 1057 1058 # Then we process nodes in order of layer depth.
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/network.py
in process_layer(layer_data) 1039 from ..layers import
deserialize as deserialize_layer 1040
-> 1041 layer = deserialize_layer(layer_data, 1042 custom_objects=custom_objects) 1043
created_layers[layer_name] = layer
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/layers/init.py
in deserialize(config, custom_objects)
163 globs['Model'] = models.Model
164 globs['Sequential'] = models.Sequential
--> 165 return deserialize_keras_object(config,
166 module_objects=globs,
167 custom_objects=custom_objects,
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/utils/generic_utils.py
in deserialize_keras_object(identifier, module_objects,
custom_objects, printable_module_name)
142 custom_objects = custom_objects or {}
143 if has_arg(cls.from_config, 'custom_objects'):
--> 144 return cls.from_config(
145 config['config'],
146 custom_objects=dict(list(_GLOBAL_CUSTOM_OBJECTS.items()) +
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/network.py
in from_config(cls, config, custom_objects) 1073
node_data = node_data_list[node_index] 1074
try:
-> 1075 process_node(layer, node_data) 1076 1077 # If the node does not have all
inbound layers
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/network.py
in process_node(layer, node_data) 1023 # and building
the layer if needed. 1024 if input_tensors:
-> 1025 layer(unpack_singleton(input_tensors), **kwargs) 1026 1027 def process_layer(layer_data):
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/backend/tensorflow_backend.py
in symbolic_fn_wrapper(*args, **kwargs)
73 if _SYMBOLIC_SCOPE.value:
74 with get_graph().as_default():
---> 75 return func(*args, **kwargs)
76 else:
77 return func(*args, **kwargs)
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/engine/base_layer.py
in call(self, inputs, **kwargs)
487 # Actually call the layer,
488 # collecting output(s), mask(s), and shape(s).
--> 489 output = self.call(inputs, **kwargs)
490 output_mask = self.compute_mask(inputs, previous_mask)
491
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/layers/core.py
in call(self, inputs, mask)
714 else:
715 self._input_dtypes = K.dtype(inputs)
--> 716 return self.function(inputs, **arguments)
717
718 def compute_mask(self, inputs, mask=None):
~/anaconda3/envs/myenv/lib/python3.8/site-packages/keras/layers/core.py
in sampling(args)
NameError: name 'k' is not defined
K comes from from keras import backend as k. even y adding this importation, I have the same error. Can anyone know how to fix this ?

mllib KernelDensity error

I'm trying to use pyspark.mllib.stat.KernelDensity this way:
data = sc.parallelize([0, 1, 2, 2, 1, 1, 1, 1, 1, 2, 0, 0])
kd = KernelDensity()
kd.setSample(data)
kd.setBandwidth(3)
densities = kd.estimate([-1.0, 2.0, 5.0])
but eventually get this error:
--------------------------------------------------------------------------- Py4JError Traceback (most recent call
last) in ()
8
9 # Find density estimates for the given values
---> 10 densities = kd.estimate([-1.0, 2.0, 5.0])
/home/user10215193/anaconda3/lib/python3.6/site-packages/pyspark/mllib/stat/KernelDensity.py
in estimate(self, points)
56 points = list(points)
57 densities = callMLlibFunc(
---> 58 "estimateKernelDensity", self._sample, self._bandwidth, points)
59 return np.asarray(densities)
/home/user10215193/anaconda3/lib/python3.6/site-packages/pyspark/mllib/common.py
in callMLlibFunc(name, *args)
129 api = getattr(sc._jvm.PythonMLLibAPI(), name)
130 print(api)
--> 131 return callJavaFunc(sc, api, *args)
132
133
/home/user10215193/anaconda3/lib/python3.6/site-packages/pyspark/mllib/common.py
in callJavaFunc(sc, func, *args)
121 """ Call Java Function """
122 args = [_py2java(sc, a) for a in args]
--> 123 return _java2py(sc, func(*args))
124
125
/home/user10215193/anaconda3/lib/python3.6/site-packages/py4j/java_gateway.py
in call(self, *args) 1131 answer =
self.gateway_client.send_command(command) 1132 return_value
= get_return_value(
-> 1133 answer, self.gateway_client, self.target_id, self.name) 1134 1135 for temp_arg in temp_args:
/home/user10215193/anaconda3/lib/python3.6/site-packages/py4j/protocol.py
in get_return_value(answer, gateway_client, target_id, name)
321 raise Py4JError(
322 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
--> 323 format(target_id, ".", name, value))
324 else:
325 raise Py4JError(
Py4JError: An error occurred while calling o19.estimateKernelDensity.
Trace: py4j.Py4JException: Method estimateKernelDensity([class
org.apache.spark.api.java.JavaRDD, class java.lang.Integer, class
java.util.ArrayList]) does not exist at
py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
at
py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
at py4j.Gateway.invoke(Gateway.java:272) at
py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79) at
py4j.GatewayConnection.run(GatewayConnection.java:214) at
java.lang.Thread.run(Thread.java:748)
I couldn't find anything similar here so if somebody can help me with this I would much appreciate it.
You have to be careful about the types:
bandwidth has to be float
sample has to be RDD[float]
So replace your code with:
kd.setSample(data.map(float))
kd.setBandwidth(3.0)
densities = kd.estimate([-1.0, 2.0, 5.0])
and you'll be fine.

has training error using pyspark ALS

I run Spark on a virtual machine and implemented ALS library to train my data.
rawRatings = sc.textFile('data/ratings.csv').map(lambda x: x.replace('\t', ','))
parsedRatings = rawRatings.map(lambda x: x.split(',')).map(lambda x: Rating(int(x[0]), int(x[1]), float(x[2])))
trainData, valData, testData = parsedRatings.randomSplit([0.6, 0.2, 0.2], seed=42)
model = ALS.train(trainData, rank=8, iterations=5, lambda_=0.1)
It works. But if I tuned iteration=10, then it shows the error message:
Py4JJavaError Traceback (most recent call last)
<ipython-input-181-e64eb91ba0eb> in <module>()
6 regularization_parameter = 0.1
7 tolerance = 0.02
----> 8 model = ALS.train(trainData, rank=8, seed=seed, iterations=7, lambda_=regularization_parameter)
/usr/local/bin/spark-1.3.1-bin-hadoop2.6/python/pyspark/mllib/recommendation.py in train(cls, ratings, rank, iterations, lambda_, blocks, nonnegative, seed)
138 seed=None):
139 model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
--> 140 lambda_, blocks, nonnegative, seed)
141 return MatrixFactorizationModel(model)
142
/usr/local/bin/spark-1.3.1-bin-hadoop2.6/python/pyspark/mllib/common.py in callMLlibFunc(name, *args)
118 sc = SparkContext._active_spark_context
119 api = getattr(sc._jvm.PythonMLLibAPI(), name)
--> 120 return callJavaFunc(sc, api, *args)
121
122
/usr/local/bin/spark-1.3.1-bin-hadoop2.6/python/pyspark/mllib/common.py in callJavaFunc(sc, func, *args)
111 """ Call Java Function """
112 args = [_py2java(sc, a) for a in args]
--> 113 return _java2py(sc, func(*args))
114
115
/usr/local/bin/spark-1.3.1-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
536 answer = self.gateway_client.send_command(command)
537 return_value = get_return_value(answer, self.gateway_client,
--> 538 self.target_id, self.name)
539
540 for temp_arg in temp_args:
/usr/local/bin/spark-1.3.1-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
298 raise Py4JJavaError(
299 'An error occurred while calling {0}{1}{2}.\n'.
--> 300 format(target_id, '.', name), value)
301 else:
302 raise Py4JError(
Py4JJavaError: An error occurred while calling o7508.trainALSModel.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 14882.0 failed 1 times, most recent failure: Lost task 0.0 in stage 14882.0 (TID 3699, localhost): java.lang.StackOverflowError
at java.io.ObjectInputStream$PeekInputStream.peek(ObjectInputStream.java:2293)
at java.io.ObjectInputStream$BlockDataInputStream.peek(ObjectInputStream.java:2586)
at java.io.ObjectInputStream$BlockDataInputStream.peekByte(ObjectInputStream.java:2596)
at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1505)
.....
I am just wondering what's wrong with that? It is ok to tune iterations =6,
but iterations = 7 will start to have such error message again. I used it
in iPython and Python 3.x version. Thanks for any generous answers!

Resources