delete query in databricks showing parse exception - databricks

DELETE FROM (
select
id,
firstname,
middlename,
lastname,
gender,
row_number() over(
partition by firstname,
middlename,
lastname,
gender
order by
id
) as dupcount
from
delete_people10m
)
WHERE dupcount > 1;
please note that inner query is working fine.

Related

Same code different Table name gives Error! in Cassandra

working with apache Cassandra in Jupyter Notebook, creating a table, and inserting data all works fine. after I change the Table name, respectively, gives an error!
the working first code:
session.execute("""CREATE TABLE IF NOT EXISTS table2
(artist text , song text, firstname text , lastname text, userId int ,sessionid int, iteminsession int ,
PRIMARY KEY ((userId, sessionId), itemInSession)) """)
file = 'event_datafile_new.csv'
with open(file, encoding = 'utf8') as f:
csvreader = csv.reader(f)
next(csvreader) # skip header
for line in csvreader:
query = "INSERT INTO table2 (artist, song, firstname, lastname , userId, sessionId, itemInSession)"
query = query + "VALUES (%s, %s, %s, %s ,%s ,%s ,%s)"
# building the insert
# executing the insertion
session.execute(query , (line[0], line[9], line[1], line[4], int(line[10]), int(line[8]), int(line[3])) )
query = "select artist, song, firstname, lastname from table2 WHERE userId= 10 and sessionId = 182"
try:
rows = session.execute(query)
except Exception as e:
print(e)
for row in rows:
print(f'artist: {row.artist}, song: {row.song}, user first name: {row.firstname},user last name: {row.lastname}')
changing table name from 'table2' to 'artist_song' getting error: InvalidRequest: Error from server: code=2200 [Invalid query] message="Undefined column name firstname"
session.execute("""CREATE TABLE IF NOT EXISTS artist_song
(artist text , song text, firstname text , lastname text, userId int ,sessionid int, iteminsession int ,
PRIMARY KEY ((userId, sessionId), itemInSession)) """)
file = 'event_datafile_new.csv'
with open(file, encoding = 'utf8') as f:
csvreader = csv.reader(f)
next(csvreader) # skip header
for line in csvreader:
query = "INSERT INTO artist_song (artist, song, firstname, lastname , userId, sessionId, itemInSession)"
query = query + "VALUES (%s, %s, %s, %s ,%s ,%s ,%s)"
# building the insert
# executing the insertion
session.execute(query , (line[0], line[9], line[1], line[4], int(line[10]), int(line[8]), int(line[3])) )
query = "select artist, song, firstname, lastname from artist_song WHERE userId= 10 and sessionId = 182"
try:
rows = session.execute(query)
except Exception as e:
print(e)
for row in rows:
print(f'artist: {row.artist}, song: {row.song}, user first name: {row.firstname},user last name: {row.lastname}')

SQL - HELP - Write a query to get the Full name, email id, phone of tenants who are married and paying rent > 9000 using subqueries

I have two tables from which the queries to be executed below is the query which have written, need help in joining the link between the query
select FIRST_NAME+ ' '+ LAST_NAME as FULL_NAME,PHONE,EMAIL
FROM PROFILES
WHERE PROFILE_ID IN
((
SELECT PROFILE_ID
FROM PROFILES
WHERE MARITIAL_STATUS= 'Y' ) and
( SELECT PROFILE_ID
FROM TENANCY_HISTORIES
WHERE RENT> '9000'));
You are using and in the list of ID's output for the in clause. Try as following:
select FIRST_NAME+ ' '+ LAST_NAME as FULL_NAME,PHONE,EMAIL
FROM PROFILES
WHERE
(PROFILE_ID IN
(
SELECT PROFILE_ID
FROM PROFILES
WHERE MARITIAL_STATUS= 'Y' ) or PROFILE_ID IN
( SELECT PROFILE_ID
FROM TENANCY_HISTORIES
WHERE RENT> '9000'));

Cassandra insert data into table column with data type set<text>

We're trying to setup an INSERT statement on a table with a set<text> column data type in Cassandra, but have come up with no luck. Here's an example:
INSERT INTO test_table
(my_items)
VALUES
([{"test1":"test val 1","test2":"test val 2"}])
The result is always something like:
no viable alternative at input for [test val ]2
Enclosing values in curly brackets and separate by comma
Example :
Let's we have the schema :
CREATE TABLE users (
user_id text PRIMARY KEY,
first_name text,
last_name text,
emails set<text>
);
Insert :
INSERT INTO users (user_id, first_name, last_name, emails)
VALUES('frodo', 'Frodo', 'Baggins', {'f#baggins.com', 'baggins#gmail.com'});
Update :
UPDATE users SET emails = emails + {'fb#friendsofmordor.org'}
WHERE user_id = 'frodo';
More Using the set type
Edited
If you want to insert value "test1":"test val 1" and "test2":"test val 2" into set then enclose each value with single quote
Example :
INSERT INTO users (user_id, first_name, last_name, emails)
VALUES('2011331035', 'Md Ashraful', 'Islam', {'"test1":"test val 1"', '"test2":"test val 2"'});

Cassandra Data Modeling : Dynamic column

I have a table user_activity.
user_activity
{
user_id text
user_name text
email_id text
password text
activity1 text}
Is it possible to add column dynamically?
for ex -
user1(user_id : Rowkey)
user_name, emai_id, password, activity1, activity2
user2(user_id : Rowkey)
user_name, emai_id, password, activity1, activity2, activity3
user3(user_id : Rowkey)
user_name, emai_id, password, activity1
since activities could be of any number.
thanks in advance :)
You can use Map
For example :
CREATE TABLE user_activity (
user_id text primary key,
emai_id text,
password text,
activity map<text,text>
);
Insert :
INSERT INTO user_activity (user_id, emai_id, password, activity) VALUES ('1', 'a1#b.com','password1', {'activity1' : 'test activity 1', 'activity2' : 'test acitivty 2' });
Update :
UPDATE user_activity SET activity['activity1'] = 'updated test activity 1' where user_id = '1';
Delete :
DELETE activity['activity2'] FROM user_activity WHERE user_id = '1';
Note : Keep the collection (map) size small, In Cassandra for map Maximum number of keys: 65535 and values size: 65535

Order by is currently only supported on the clustered columns of the PRIMARY KEY

cassandra2.0.7 cql 3.1.1
CREATE TABLE playlists (
id uuid,
song_order int,
song_id uuid,
title text,
album text,
artist text,
PRIMARY KEY (id, song_order ) );
INSERT INTO playlists (id, song_order, song_id, title, artist, album)
VALUES (62c36092-82a1-3a00-93d1-46196ee77204, 1,
a3e64f8f-bd44-4f28-b8d9-6938726e34d4, 'La Grange', 'ZZ Top', 'Tres Hombres');
INSERT INTO playlists (id, song_order, song_id, title, artist, album)
VALUES (62c36092-82a1-3a00-93d1-46196ee77204, 2,
8a172618-b121-4136-bb10-f665cfc469eb, 'Moving in Stereo', 'Fu Manchu', 'We Must Obey');
INSERT INTO playlists (id, song_order, song_id, title, artist, album)
VALUES (62c36092-82a1-3a00-93d1-46196ee77204, 3,
2b09185b-fb5a-4734-9b56-49077de9edbf, 'Outside Woman Blues', 'Back Door Slam', 'Roll Away');
SELECT * FROM playlists WHERE id = 62c36092-82a1-3a00-93d1-46196ee77204
ORDER BY song_order DESC LIMIT 2;
error: Order by is currently only supported on the clustered columns
of the PRIMARY KEY, got song_order
this demo from : http://www.datastax.com/documentation/cql/3.1/cql/cql_reference/select_r.html
who can tell me why ?
thanks!
Try this when you create your table.
CREATE TABLE playlists (
id uuid, song_order int, song_id uuid, title text, album text, artist text,
PRIMARY KEY (id, song_order)) WITH CLUSTERING ORDER BY (song_order ASC);

Resources