For a Node.js project with Elasticsearch I have to change DBMS to SQLite. I choose Sequelize ORM as a Node.js model.
I was able to create a new SQLite database and model, however I don't know how to convert the data stored with Elasticsearch to SQLite
I believe the best option would be to use Logstash, with JDBC as the output. For example:
input {
elasticsearch {
hosts => "https://localhost:9200/"
user => "yourUser"
password => "yourPassword"
index => "yourIndex"
query => '{"query":{"match_all": {}}}'
scroll => "5m"
size => "5000"
}
}
output {
jdbc {
jdbc_connection_string => ""
jdbc_user => ""
jdbc_password => ""
jdbc_validate_connection => true
jdbc_driver_library => ""
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
statement => "INSERT INTO table_name VALUES (value1, value2, value3, ... <<<<< JUST A QUERY EXAMPLE)"
}
}
Related
I would like to import data from my postgresql database into my elasticsearch database.
I have an appointments index, in this index I would like to add a persons field (list of people in an appointment).
here is my logstash configuration file and a sample document.
thank you.
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://localhost:5432/app"
jdbc_user => "postgres"
jdbc_password => "admin"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_driver_library => "postgresql-42.2.21.jar"
statement => "select id::text,trim(firstname),trim(lastname) from persons"
}
}
filter {
ruby {
code => "
event['persons'].each{|subdoc| subdoc['persons'] = subdoc['persons']['firstname']}
"
}
}
output {
#stdout { codec => json_lines }
elasticsearch {
hosts => "127.0.0.1"
index => "appointments"
doc_as_upsert => true
document_id => "%{id}"
}
}
{
"_index" : "appointments",
"_type" : "_doc",
"_id" : "41",
"_score" : 1.0,
"_source" : {
... others fields
[add array fields]
ex:
persons: [{
"firstname": "firstname1"
}, {
"firstname": "firstname2"
}]
}
}
UPDATE 2:
I made a mistake, I was modifying the wrong document, I modified the document_id and I added appointment_id in my request.
It still does not work. It replaces my document with what is in the request.
input {
jdbc {
jdbc_connection_string => "jdbc:postgresql://localhost:5432/app"
jdbc_user => "postgres"
jdbc_password => "admin"
jdbc_driver_class => "org.postgresql.Driver"
jdbc_driver_library => "postgresql-42.2.21.jar"
statement => "select id::text, appointment_id::text,trim(firstname),trim(lastname) from appointments_persons order by created_at"
}
}
filter {
aggregate {
task_id => "%{appointment_id}"
code => "
map['persons'] ||= []
"
push_map_as_event_on_timeout => true
timeout_task_id_field => "appointment_id"
timeout => 10
}
}
output {
#stdout { codec => json_lines }
elasticsearch {
hosts => "127.0.0.1"
index => "appointments"
action => update
doc_as_upsert => true
document_id => "%{appointment_id}"
}
}
Unless you are running a very old version of logstash (prior to 5.0) you cannot reference or modify the event by treating it as a hash.
The jdbc filter creates one event for each row in the result set. If you want to combine all the events that have the same [id] you could use an aggregate filter. Note the warning about setting pipeline.workers to 1 so that all events go through the same instance of the filter. In your case I do not think you need to preserve event order, so pipeline.ordered can be ignored.
You will need to do something similar to example 3 in the documentation.
aggregate {
task_id => "%{id}"
code => '
map['persons'] ||= []
map['persons'] << { "firstname" => event.get("firstname"), "lastname" => event.get("lastname") }
'
push_map_as_event_on_timeout => true
timeout_task_id_field => "id"
timeout => 10
}
If you are using document_id => "%{appointment_id}" the event read from the database will be written with to elasticsearch with that document id. Then when the aggregate timeout fires a second document will overwrite that. You might want to add event.cancel to the aggregate code option so that the event from the db does not cloud things.
I have unexpected behaviour when loading data into BigQuery just after creating the schema.
I'm using Node API to insert data with BigQuery streaming API.
In order to reset the data I delete and create the tables before loading any data.
My Problem: the first time it works fine, but if I execute it again it fails.
The process always delete and creates the table schema, but does not insert the data until I wait a moment to execute it again.
This is the code which reproduces the case:
async function loadDataIntoBigquery() {
const {BigQuery} = require('#google-cloud/bigquery')
const tableName = "users"
const dataset = "data_analisis"
const schemaUsers = "name:string,date:string,type:string"
const userData = [{name: "John", date: "20/08/93", type: "reader"}, {
name: "Marie",
date: "20/08/90",
type: "owner"
}]
try {
const bigquery = new BigQuery()
await bigquery.createDataset(dataset).then(err => console.log("dataset created successfully")).catch(err => {
console.log("warn: maybe the dataset already exists")
})
await bigquery.dataset(dataset).table(tableName).delete().then(err => console.log("table deleted successfully")).catch((err) => {
console.log("Error: maybe the table does not exist")
})
await bigquery.dataset(dataset).createTable(tableName, {schema: schemaUsers}).then(() => console.log("table created successfully")).catch(err => console.log("Error: maybe the table already exists"))
await bigquery.dataset(dataset).table(tableName).insert(userData).then((data) => console.log("Ok inserted ", data)).catch(err => console.log("Error: can't insert "))
} catch (err) {
console.log("err", err)
}
}
to verify that the data was inserted I'm using this query
select * from `data_analisis.users`
I have the same issue. As a workaround, i insert data with a query instead :
const query = "INSERT INTO `"+dataset+"."+tableName"` (name, date, type ) VALUES ("+name+",'"+date+"','"+type+"')";
await bigQuery.query({
query: query,
useLegacySql: false,
location: 'EU'
}, (err) => {
console.log("Insertion error : ",err);
})
I am still a beginner in Puppet. So please bear with me. Let's assume i have this hash created in Puppet through some module
account = {
user#desktop1 => {
owner => john,
type => ssh-rsa,
public => SomePublicKey
},
user#desktop2 => {
owner => mary,
type => ssh-rsa,
public => SomePublicKey
},
user#desktop3 => {
owner => john,
type => ssh-rsa,
public => SomePublicKey
},
user#desktop4 => {
owner => matt,
type => ssh-rsa,
public => SomePublicKey
}
}
How can i find find the key for specific key and value pair inside the hash? which in this case just for example i want to find all the key owned by john. So the expected result would be something like:
[user#desktop1, user#desktop3]
Thanks in advance
The question asks about how to do this in Puppet, although, confusingly, the Hash is a Ruby Hash and the question also has a Ruby tag.
Anyway, this is how you do it in Puppet:
$account = {
'user#desktop1' => {
'owner' => 'john',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
},
'user#desktop2' => {
'owner' => 'mary',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
},
'user#desktop3' => {
'owner' => 'john',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
},
'user#desktop4' => {
'owner' => 'matt',
'type' => 'ssh-rsa',
'public' => 'SomePublicKey',
}
}
$users = $account.filter |$k, $v| { $v['owner'] == 'john' }.keys
notice($users)
Puppet applying that leads to:
Notice: Scope(Class[main]): [user#desktop1, user#desktop3]
https://ruby-doc.org/core-2.5.1/Hash.html#method-i-select
account.select {|key, value| value['owner'] == 'john'}.keys
Another option using Enumerable#each_with_object:
account.each_with_object([]) { |(k, v), a| a << k if v['owner'] == 'john'}
#=> ["user#desktop1", "user#desktop3"]
Supposing keys and values to be String.
How can I get newly inserted id with the following sequelize? It currently returns a boolean.
exports.update = async note => entityChildModel.insertOrUpdate(note)
.catch(e => 'Error');
You can use returning : true ( to get back auto generated values ) , but it's only for Postgres :
exports.update = async note => entityChildModel
.insertOrUpdate(note , { returning : true }) // <--- HERE
.catch(e => 'Error');
For more detail : DO READ
I am using ELK stack in which i have used jdbc input in logstash
I have created 2 indexes
users
employees
Both the indexes have one same column objid
Logstash config file
input {
jdbc {
jdbc_driver_library => "/opt/application/cmt/ELK/logstash-5.3.0/ojdbc14.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
jdbc_connection_string => "jdbc:oracle:thin:#xx.xxx.xx.xx:xxxx:abc"
jdbc_user => "xxxx"
jdbc_password => "xxxxx"
schedule => "*/2 * * * *"
statement => "select * from table_employee"
}
}
output {
elasticsearch {
index => "employees"
document_type => "employee"
document_id => "%{objid}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
input {
jdbc {
jdbc_driver_library => "/opt/application/cmt/ELK/logstash-5.3.0/ojdbc14.jar"
jdbc_driver_class => "Java::oracle.jdbc.driver.OracleDriver"
jdbc_connection_string => "jdbc:oracle:thin:#xx.xxx.xx.xx:xxxx:abc"
jdbc_user => "xx"
jdbc_password => "xxxxxxx"
schedule => "*/2 * * * *"
statement => "select A.OBJID,A.LOGIN_NAME,A.STATUS,A.USER_ACCESS2PRIVCLASS,A.USER_DEFAULT2WIPBIN,A.SUPVR_DEFAULT2MONITOR,A.USER2RC_CONFIG,A.OFFLINE2PRIVCLASS,A.WIRELESS_EMAIL from table_user a where A.STATUS=1"
}
}
output {
elasticsearch {
index => "users"
document_type => "user"
document_id => "%{objid}%{login_name}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
1st input jdbc 'employees' contains 26935 records
2nd input jdbc 'users' contains 10619 records
Common Records : 9635 ( objid matches )
1st problem is that when i create an index pattern in kibana as '
users
It's showing count of 37554 ,why ? it should show only 10619
2nd problem : when i create an index pattern as '
employees
It's showing count of 27919 ,why ? it should show only 26935
Also i have create different document Id for index 'users' %{objid}%{login_name}
If your users and employees input and output are in the same file/executed at the same time, as what your example shows, you need to use conditionals to route your data to the correct elasticsearch index. Logstash concatenates your files/file into one pipeline, so all your inputs run through all of the filters/outputs, which is likely why you're getting unexpected results. See this discussion.
You will need to do something like this:
input {
jdbc {
statement => "SELECT * FROM users"
type => "users"
}
}
input {
jdbc {
statement => "SELECT * FROM employees"
type => "employees"
}
}
output {
if [type] == "users" {
elasticsearch {
index => "users"
document_type => "user"
document_id => "%{objid}%{login_name}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
if [type] == "employees" {
elasticsearch {
index => "employees"
document_type => "employee"
document_id => "%{objid}"
hosts => "xx.xxx.xxx.xx:9200"
}
}
}