How to transfer data from Elasticsearch to SQLite? - node.js

For a Node.js project with Elasticsearch I have to change DBMS to SQLite. I choose Sequelize ORM as a Node.js model.
I was able to create a new SQLite database and model, however I don't know how to convert the data stored with Elasticsearch to SQLite

I believe the best option would be to use Logstash, with JDBC as the output. For example:
input {
elasticsearch {
hosts => "https://localhost:9200/"
user => "yourUser"
password => "yourPassword"
index => "yourIndex"
query => '{"query":{"match_all": {}}}'
scroll => "5m"
size => "5000"
}
}
output {
jdbc {
jdbc_connection_string => ""
jdbc_user => ""
jdbc_password => ""
jdbc_validate_connection => true
jdbc_driver_library => ""
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
statement => "INSERT INTO table_name VALUES (value1, value2, value3, ... <<<<< JUST A QUERY EXAMPLE)"
}
}

Related

logstash add array in document

I would like to import data from my postgresql database into my elasticsearch database.
I have an appointments index, in this index I would like to add a persons field (list of people in an appointment).
here is my logstash configuration file and a sample document.
thank you.
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://localhost:5432/app"
jdbc_user => "postgres"
jdbc_password => "admin"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_driver_library => "postgresql-42.2.21.jar"
statement => "select id::text,trim(firstname),trim(lastname) from persons"
}
}
filter {
ruby {
code => "
event['persons'].each{|subdoc| subdoc['persons'] = subdoc['persons']['firstname']}
"
}
}
output {
#stdout { codec => json_lines }
elasticsearch {
hosts => "127.0.0.1"
index => "appointments"
doc_as_upsert => true
document_id => "%{id}"
}
}
{
"_index" : "appointments",
"_type" : "_doc",
"_id" : "41",
"_score" : 1.0,
"_source" : {
... others fields
[add array fields]
ex:
persons: [{
"firstname": "firstname1"
}, {
"firstname": "firstname2"
}]
}
}
UPDATE 2:
I made a mistake, I was modifying the wrong document, I modified the document_id and I added appointment_id in my request.
It still does not work. It replaces my document with what is in the request.
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://localhost:5432/app"
jdbc_user => "postgres"
jdbc_password => "admin"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_driver_library => "postgresql-42.2.21.jar"
statement => "select id::text, appointment_id::text,trim(firstname),trim(lastname) from appointments_persons order by created_at"
}
}
filter {
aggregate {
task_id => "%{appointment_id}"
code => "
map['persons'] ||= []
"
push_map_as_event_on_timeout => true
timeout_task_id_field => "appointment_id"
timeout => 10
}
}
output {
#stdout { codec => json_lines }
elasticsearch {
hosts => "127.0.0.1"
index => "appointments"
action => update
doc_as_upsert => true
document_id => "%{appointment_id}"
}
}
Unless you are running a very old version of logstash (prior to 5.0) you cannot reference or modify the event by treating it as a hash.
The jdbc filter creates one event for each row in the result set. If you want to combine all the events that have the same [id] you could use an aggregate filter. Note the warning about setting pipeline.workers to 1 so that all events go through the same instance of the filter. In your case I do not think you need to preserve event order, so pipeline.ordered can be ignored.
You will need to do something similar to example 3 in the documentation.
aggregate {
task_id => "%{id}"
code => '
map['persons'] ||= []
map['persons'] << { "firstname" => event.get("firstname"), "lastname" => event.get("lastname") }
'
push_map_as_event_on_timeout => true
timeout_task_id_field => "id"
timeout => 10
}
If you are using document_id => "%{appointment_id}" the event read from the database will be written with to elasticsearch with that document id. Then when the aggregate timeout fires a second document will overwrite that. You might want to add event.cancel to the aggregate code option so that the event from the db does not cloud things.

problem when restarting tables and insert data in bigquery using node api

I have unexpected behaviour when loading data into BigQuery just after creating the schema.
I'm using Node API to insert data with BigQuery streaming API.
In order to reset the data I delete and create the tables before loading any data.
My Problem: the first time it works fine, but if I execute it again it fails.
The process always delete and creates the table schema, but does not insert the data until I wait a moment to execute it again.
This is the code which reproduces the case:
async function loadDataIntoBigquery() {
const {BigQuery} = require('#google-cloud/bigquery')
const tableName = "users"
const dataset = "data_analisis"
const schemaUsers = "name:string,date:string,type:string"
const userData = [{name: "John", date: "20/08/93", type: "reader"}, {
name: "Marie",
date: "20/08/90",
type: "owner"
}]
try {
const bigquery = new BigQuery()
await bigquery.createDataset(dataset).then(err => console.log("dataset created successfully")).catch(err => {
console.log("warn: maybe the dataset already exists")
})
await bigquery.dataset(dataset).table(tableName).delete().then(err => console.log("table deleted successfully")).catch((err) => {
console.log("Error: maybe the table does not exist")
})
await bigquery.dataset(dataset).createTable(tableName, {schema: schemaUsers}).then(() => console.log("table created successfully")).catch(err => console.log("Error: maybe the table already exists"))
await bigquery.dataset(dataset).table(tableName).insert(userData).then((data) => console.log("Ok inserted ", data)).catch(err => console.log("Error: can't insert "))
} catch (err) {
console.log("err", err)
}
}
to verify that the data was inserted I'm using this query
select * from `data_analisis.users`
I have the same issue. As a workaround, i insert data with a query instead :
const query = "INSERT INTO `"+dataset+"."+tableName"` (name, date, type ) VALUES ("+name+",'"+date+"','"+type+"')";
await bigQuery.query({
query: query,
useLegacySql: false,
location: 'EU'
}, (err) => {
console.log("Insertion error : ",err);
})

Find key contain specific key value pair in Puppet Hash

I am still a beginner in Puppet. So please bear with me. Let's assume i have this hash created in Puppet through some module
account = {
user#desktop1 => {
owner => john,
type => ssh-rsa,
public => SomePublicKey
},
user#desktop2 => {
owner => mary,
type => ssh-rsa,
public => SomePublicKey
},
user#desktop3 => {
owner => john,
type => ssh-rsa,
public => SomePublicKey
},
user#desktop4 => {
owner => matt,
type => ssh-rsa,
public => SomePublicKey
}
}
How can i find find the key for specific key and value pair inside the hash? which in this case just for example i want to find all the key owned by john. So the expected result would be something like:
[user#desktop1, user#desktop3]
Thanks in advance
The question asks about how to do this in Puppet, although, confusingly, the Hash is a Ruby Hash and the question also has a Ruby tag.
Anyway, this is how you do it in Puppet:
$account = {
'user#desktop1' => {
'owner' => 'john',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
},
'user#desktop2' => {
'owner' => 'mary',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
},
'user#desktop3' => {
'owner' => 'john',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
},
'user#desktop4' => {
'owner' => 'matt',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
}
}
$users = $account.filter |$k, $v| { $v['owner'] == 'john' }.keys
notice($users)
Puppet applying that leads to:
Notice: Scope(Class[main]): [user#desktop1, user#desktop3]
https://ruby-doc.org/core-2.5.1/Hash.html#method-i-select
account.select {|key, value| value['owner'] == 'john'}.keys
Another option using Enumerable#each_with_object:
account.each_with_object([]) { |(k, v), a| a << k if v['owner'] == 'john'}
#=> ["user#desktop1", "user#desktop3"]
Supposing keys and values to be String.

Can I get newly inserted id with sequelize insertOrUpdate?

How can I get newly inserted id with the following sequelize? It currently returns a boolean.
exports.update = async note => entityChildModel.insertOrUpdate(note)
.catch(e => 'Error');
You can use returning : true ( to get back auto generated values ) , but it's only for Postgres :
exports.update = async note => entityChildModel
.insertOrUpdate(note , { returning : true }) // <--- HERE
.catch(e => 'Error');
For more detail : DO READ

Kibana Index Pattern showing wrong results

I am using ELK stack in which i have used jdbc input in logstash
I have created 2 indexes
users
employees
Both the indexes have one same column objid
Logstash config file
input {
jdbc {
jdbc_driver_library => "/opt/application/cmt/ELK/logstash-5.3.0/ojdbc14.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
jdbc_connection_string => "jdbc:oracle:thin:#xx.xxx.xx.xx:xxxx:abc"
jdbc_user => "xxxx"
jdbc_password => "xxxxx"
schedule => "*/2 * * * *"
statement => "select * from table_employee"
}
}
output {
elasticsearch {
index => "employees"
document_type => "employee"
document_id => "%{objid}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
input {
jdbc {
jdbc_driver_library => "/opt/application/cmt/ELK/logstash-5.3.0/ojdbc14.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
jdbc_connection_string => "jdbc:oracle:thin:#xx.xxx.xx.xx:xxxx:abc"
jdbc_user => "xx"
jdbc_password => "xxxxxxx"
schedule => "*/2 * * * *"
statement => "select A.OBJID,A.LOGIN_NAME,A.STATUS,A.USER_ACCESS2PRIVCLASS,A.USER_DEFAULT2WIPBIN,A.SUPVR_DEFAULT2MONITOR,A.USER2RC_CONFIG,A.OFFLINE2PRIVCLASS,A.WIRELESS_EMAIL from table_user a where A.STATUS=1"
}
}
output {
elasticsearch {
index => "users"
document_type => "user"
document_id => "%{objid}%{login_name}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
1st input jdbc 'employees' contains 26935 records
2nd input jdbc 'users' contains 10619 records
Common Records : 9635 ( objid matches )
1st problem is that when i create an index pattern in kibana as '
users
It's showing count of 37554 ,why ? it should show only 10619
2nd problem : when i create an index pattern as '
employees
It's showing count of 27919 ,why ? it should show only 26935
Also i have create different document Id for index 'users' %{objid}%{login_name}
If your users and employees input and output are in the same file/executed at the same time, as what your example shows, you need to use conditionals to route your data to the correct elasticsearch index. Logstash concatenates your files/file into one pipeline, so all your inputs run through all of the filters/outputs, which is likely why you're getting unexpected results. See this discussion.
You will need to do something like this:
input {
jdbc {
statement => "SELECT * FROM users"
type => "users"
}
}
input {
jdbc {
statement => "SELECT * FROM employees"
type => "employees"
}
}
output {
if [type] == "users" {
elasticsearch {
index => "users"
document_type => "user"
document_id => "%{objid}%{login_name}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
if [type] == "employees" {
elasticsearch {
index => "employees"
document_type => "employee"
document_id => "%{objid}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
}

Resources