For a project I'm working on we get some data delivered in an Excel sheet which I convert to CSV through Excel.
These files contain measurements with different categories but the same ID.
Example
readingId; category; result;
1 ; cat 1 ; A
1 ; cat 2 ; B
2 ; cat1 ; C
I've then converted the CSV to JSON and wrote a function to output the data into different objects
const fs = require('fs');
const path = require('path');
exports.convertJson = (file) => {
let rawData = fs.readFileSync(file);
let jsonData = JSON.parse(rawData);
let rawOutput = [];
for (output of jsonData) {
rawOutput.push({
locationId: output.Meetlocatienummer,
date: output.Aanmaakdatum_score,
subCategorie: output.Bestekspost,
score: output.Score,
scoreNumber: output.Cijfer,
categories: output.Categorie,
coordinates: output.Coordinaten,
neighbourhoodIndex: output.BUURTCODE,
quality: output.KWALITEIT,
district: output.STADSDEEL,
distrcitIndex: output.STADSDLCD,
street: output.STRAATNAAM,
neighbourhood: output.WIJK,
cluster: output.Cluster,
});
}
return rawOutput;
};
Which outputs the following results
[
{
locationId: 10215,
date: undefined,
subCategorie: 'Meubilair-afvalbak-vullingsgraad',
score: '',
scoreNumber: 8,
categories: 'Meubilair',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Meubilair-container-bijgeplaatst afval rondom container',
score: 'A+',
scoreNumber: 10,
categories: 'Meubilair',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Riolering-kolk-belemmering inlaat',
score: 'A+',
scoreNumber: 10,
categories: 'Riolering',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-open verharding-elementenverharding-onkruid',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-natuurlijk afval',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-uitwerpselen',
score: 'A+',
scoreNumber: 10,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-zwerfafval grof',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-veegvuil goten',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-onkruid rondom obstakels',
score: 'B',
scoreNumber: 6,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-grof vuil',
score: 'A+',
scoreNumber: 10,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 10215,
date: undefined,
subCategorie: 'Verharding-zwerfafval fijn',
score: 'A',
scoreNumber: 8,
categories: 'Verharding',
coordinates: '52.072843, 4.287723',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Xaverystraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
},
{
locationId: 7466,
date: undefined,
subCategorie: 'Meubilair-afvalbak-vullingsgraad',
score: 'B',
scoreNumber: 6,
categories: 'Meubilair',
coordinates: '52.072647, 4.288656',
neighbourhoodIndex: 10,
quality: 'residentiekwaliteit',
district: 'Segbroek',
distrcitIndex: 3,
street: 'Jan Krosstraat',
neighbourhood: 'Regentessekwartier',
cluster: 'WRF'
}
]
In the end I would like to write this information to MongoDB and I had the following scheme in mind to reduce the loads of duplicated data
{
locationId: output.Meetlocatienummer,
date: output.Aanmaakdatum_score,
subCategories: [
{
subCategory: output.Bestekspost,
score: output.Score,
scoreNumber: output.Cijfer,
},
],
categories: [{ category: output.Categorie }],
coordinates: output.Coordinaten,
neighbourhoodIndex: output.BUURTCODE,
quality: output.KWALITEIT,
district: output.STADSDEEL,
distrcitIndex: output.STADSDLCD,
street: output.STRAATNAAM,
neighbourhood: output.WIJK,
cluster: output.Cluster,
}
This project is a hobby project while learning NodeJS. The actual data are readings how much the streets of the city I work for a poluted with litter. It's a bit boring to read thousands of lines in Excel to find the hotspots of the city as it's a bit boring to just read some scores and graphs so I though it would be nice to import it into Leaflet through NodeJS.
The actual backend will contain more functionality as I learn Node and maybe in the future React, that's why I try to write it myself rather then importing the data into Google maps, which works oke but lacks detailed category filtering.
I hope my idea is a bit clear and someone can point me in the right direction.
Edit 1
I got a bit further with lodash.
return _(rawOutput)
.groupBy('locationId')
.map((obj) => _.assignWith({}, ...obj, (val1, val2) => val1 || val2))
.value();
I found the above snippet and now I only get 1 output per unique locationId but now I'm stuck with contructing the final output with the subcategories.
I was also playing around a bit with csv-parser to directly go from the csv to a proper json output, which would be ideal because I don't have to convert it manually then.
I'll get back to it tomorrow :-)
If you take that JSON and mongoimport into MongoDB, you can use the following pipeline to transform it -- although honestly, a little python script on the outside could construct the structure just as easily and then you would still import the condensed data.
db.foo.aggregate([
{$group: {_id: "$locationId",
subCategories: {$push: {subCategory: "$subCategorie", score:"$score", scoreNumber: "$scoreNumber"}},
categories: {$push: "$categories"},
// Just take the first occurance of each of these since they are claimed
// to be the same.
date: {$first: "$date"},
neighbourhoodIndex: {$first: "$neighbourhoodIndex"},
quality: {$first: "$quality"},
district: {$first: "$district"},
distrcitIndex: {$first: "$distrcitIndex"},
street: {$first: "$street"},
neighbourhood: {$first: "$neighbourhood"},
cluster: {$first: "$cluster"},
coordinates: {$first: "$coordinates"}
}}
// Now that we have a single doc with locationId x and a coordinate, convert
// the string lat,long "52.072843, 4.287723" into a GeoJSON Point which is
// a long,lat array of doubles. We convert by using $addFields to
// overwrite the original coordinates field:
,{$addFields: {"coordinates": {$let: {
vars: {pt: {$split:["$coordinates",","]}},
in: {"type": "Point", "coordinates": [
{$toDouble: {$trim: {input:{$arrayElemAt:["$$pt",1]}}}},
{$toDouble: {$trim: {input:{$arrayElemAt:["$$pt",0]}}}}
]
}
}}
}}
// Put the whole transformed thing into a new collection named "foo2":
,{$out: "foo2"}
]);
Alright, in the end the code from Buzz Moschetti was exactly what I wanted to get rid of the duplicated data. I didn't hear about aggregates yet so thanks for that.
I ended up using the CSV Parse library to convert the CSV to JSON, drop that into the database and then query out the duplicates with the code from Buzz.
I haven't written the code yet to write back the cleaned up data back to the database but that shouldn't be to hard so I'll just post what I have now for as reference for others.
First of all I have written a csv helper for the conversion.
const fs = require('fs');
const { parse } = require('csv');
const moment = require('moment');
exports.processFile = async (filePath) => {
const records = [];
const input = fs.createReadStream(filePath);
const parser = parse({
// CSV options
bom: true,
delimiter: ';',
cast: (value, context) => {
if (context.header) return value;
// Convert data
if (context.column === 'date') {
const dateString = moment(value, 'dd-mm-yyyy h:mm');
const date = dateString.toDate();
return date;
}
// Convert coordinates to GeoJSON
if (context.column === 'coordinates') {
const coordinate = value.split(',');
const geoData = {
type: 'Point',
coordinate: [coordinate[0], coordinate[1]],
};
return geoData;
}
// Output rest of the fields
return String(value);
},
columns: [
'locationId', // meetlocatienummer
'date', // aanmaakdatum score
'subCategory', //bestekpost
'category', // categorie
'score', // score
'coordinates', //coordinaten
undefined, // buurt
undefined, // buurtcode
undefined, // gebied
undefined, // id
'quality', //kwaliteit
undefined, // stadsdeel
'districtIndex', //stadsdlcd
'street', //straatnaam
undefined, //vaknr
'neighbourhood', //wijk
undefined, //wijkcode
'cluster', //cluster
'scoreNumber', //cijfer
undefined, // week
undefined, // maand
undefined, // jr-mnd
undefined, // jaar
],
trim: true,
from_line: 2,
skip_records_with_empty_values: true,
});
// parser.on('error', (err) => {
// console.log(err);
// const error = new Error(err);
// error.httpStatusCode = 500;
// throw error;
// });
//const transformer = transform((record, callback) => {});
input.pipe(parser).on('error', (err) => {
input.close();
});
for await (let record of parser) {
// Skip all lines without coordinates
if (record.coordinates.coordinate[1] === undefined) {
continue;
}
// Push filename to the record object
record.fileName = filePath;
// Push records for final output
records.push(record);
//console.log('Records converted');
}
return records;
};
I'm uploading the file with the Multer lirbary. Here's the POST action in my import data controller. After the file has been uploaded the conversion starts. If an error occurs the file gets deleted again and no records are written to the database. If the conversion succeeds the records will be written to importdatas in MongoDB, these are still the 'dirty' records, so loads of duplicates but without useless data which gets filtered by the CSV Parse helper. (Basically all the data without coordinates)
exports.postImportData = (req, res, next) => {
const uploadedCSV = req.file;
//console.log(uploadedCSV);
// Load imported CSV files from DB to be able to delete them
ImportedCSVFile.find()
.sort({ date: -1 })
.then((result) => {
// Check if there are already files imported
let hasFiles = null;
if (result.length > 0) {
hasFiles = 1;
}
// If there are any erros with the file being uploaded
if (req.fileValidationError) {
return res.render('admin/import-data/import-data', {
pageTitle: 'Importeer data',
path: '/admin/import-data',
files: result,
activeAdmin: true,
errorMessage: req.fileValidationError,
validationErrors: [],
hasFiles,
});
}
// If there's no file uploaded
if (!uploadedCSV) {
return res.render('admin/import-data/import-data', {
pageTitle: 'Importeer data',
path: '/admin/import-data',
files: result,
activeAdmin: true,
errorMessage: 'Geen bestand geselecteerd',
validationErrors: [],
hasFiles,
});
}
(async () => {
const csvFile = await fileHelper.hasFile(uploadedCSV);
try {
const records = await convert.processFile(csvFile);
// Write all CSV data to importdatas in MongoDB
await ImportData.insertMany(records)
.then((result) => {
console.log('Data imported');
// Push info about the uploaded file into 'importedcsvfiles'
const importedCSVFile = new ImportedCSVFile({
filePath: fileHelper.hasFile(uploadedCSV),
originalName: uploadedCSV.originalname,
});
return (
importedCSVFile
.save() // Save all CSV data into 'importedcsvfiles' in MongoDB
.then((result) => {
res.redirect('/admin/import-data');
})
// Catch save filepath error
.catch((err) => {
console.log('save failed');
const error = new Error(err);
error.httpStatusCode = 500;
return next(error);
})
);
})
// Catch insert CSV data into DB error
.catch((err) => {
console.log('insert many failed');
const error = new Error(err);
error.httpStatusCode = 500;
return next(error);
});
} catch (err) {
// console.log(error);
fileHelper.removeFile(csvFile);
return res.render('admin/import-data/import-data', {
pageTitle: 'Importeer data',
path: '/admin/import-data',
files: result,
activeAdmin: true,
errorMessage:
'Het geselecteerde bestand heeft niet de juiste indeling. Neem contact op met de beheerder.',
validationErrors: [],
hasFiles,
});
}
})();
});
};
Also wrote a delete option which removes the CSV file and all the database records which are linked to that file
exports.postDeleteData = (req, res, next) => {
const dataId = req.body.dataId;
ImportedCSVFile.findById(dataId)
.then((result) => {
// console.log('FilePath:');
// console.log(result.filePath);
const filePath = result.filePath;
const deleteData = async () => {
await ImportData.deleteMany({ filePath: filePath })
.then((result) => {})
.catch((err) => {
const error = new Error(err);
error.httpStatusCode = 500;
return next(error);
});
await ImportedCSVFile.findByIdAndDelete(dataId)
.then((result) => {
console.log('Data deleted');
fileHelper.removeFile(filePath);
res.redirect('/admin/import-data');
})
.catch((err) => {
console.log('here');
const error = new Error(err);
error.httpStatusCode = 500;
return next(error);
});
};
return deleteData();
})
.catch((err) => {
const error = new Error(err);
error.httpStatusCode = 500;
return next(error);
});
};
And for now the Aggregate code from Buzz to clean up the data and drop it into Leaflet so I get 1 point with all the different categories.
const { ImportData, OutputData } = require('../models/importData.model');
// Main controller for the homepage
exports.getMainController = (req, res, next) => {
ImportData.aggregate([
{
$group: {
_id: '$coordinates',
subCategories: {
$push: {
subCategory: '$subCategory',
score: '$score',
scoreNumber: '$scoreNumber',
},
},
categories: { $push: '$category' },
// Just take the first occurance of each of these since they are claimed
// to be the same.
date: { $first: '$date' },
quality: { $first: '$quality' },
districtIndex: { $first: '$districtIndex' },
street: { $first: '$street' },
neighbourhood: { $first: '$neighbourhood' },
cluster: { $first: '$cluster' },
},
},
]).exec((err, locations) => {
if (err) {
throw next(err);
}
//console.log(locations);
res.render('index.ejs', {
pageTitle: 'Kaart',
path: '/kaart',
activeAdmin: true,
data: locations,
errorMessage: null,
});
});
};
As of now I just query this data like I said in the beginning. As I am still learning a lot about Javascript and Node I now started to build a frontend with React. Once I got on going there I will convert all this code to an API and I'll finish this part of the project.
I like to update an entry in mongodb. But lodash only update one value in the array. I send this object to my node.js server:
{ _id: 5593df7c087e59a00c04cda3,
name: 'blueberry',
uuid: 'b9407f30-f5f8-466e-aff9-25556b57fe6d',
major: '12345',
minor: '12345',
position: 'Kantine',
__v: 18,
messages:
[ { _id: 5593df7c087e59a00c04cda4,
timeRange: [Object],
url: '',
message: 'j',
title: 'jv',
messageType: 'text' },
{ _id: 5593df7c087e59a00c04cda4,
timeRange: [Object],
url: '',
message: 'j',
title: 'jv',
messageType: 'text' } ] }
Here is the code for the update of the mongodb-entry:
// Updates an existing ibeacons in the DB.
exports.update = function(req, res) {
Ibeacons.findById(req.params.id, function (err, ibeacons) {
if (err) { return handleError(res, err); }
if(!ibeacons) { return res.send(404); }
var updated = _.merge(ibeacons, req.body);
updated.save(function (err) {
if (err) { return handleError(res, err); }
return res.json(200, ibeacons);
});
});
};
But I get this as result:
{ _id: 5593df7c087e59a00c04cda3,
name: 'blueberry',
uuid: 'b9407f30-f5f8-466e-aff9-25556b57fe6d',
major: '12345',
minor: '12345',
position: 'Kantine',
__v: 18,
messages:
[ { _id: 5593df7c087e59a00c04cda4,
timeRange: [Object],
url: '',
message: 'j',
title: 'jv',
messageType: 'text' },
{ _id: 5593df7c087e59a00c04cda4,
timeRange: [Object],
url: '',
message: 'j',
title: 'jv',
messageType: 'text' } ] }
Maybe someone can help me.
Ok I get it. the version of lodash was on 2.4.1 now I updated it to 3.1.0 and it works. :D
I try to update a document with mongoose and it fails. The query I can successful execute directly in Mongo is like:
db.orders.update(
{
orderId: 1014428,
'delivery.items.id': '5585d77c714a90fe0fc2fcb4'
},
{
$inc: {
"delivery.items.$.quantity" : 1
}
}
)
When I try to run the following update command with mongoose:
this.update(
{
orderId: this.orderId ,
"delivery.items.id": product.id
},
{
$inc: {
"delivery.items.$.quantity" : 1
}
}, function (err, raw) {
if (err) {
console.log(err);
}
console.log('The raw response from Mongo was ', raw);
}
);
I see the following error:
{ [MongoError: cannot use the part (items of delivery.items.id) to traverse the element ({items: [ { quantity: 1, price: 6.9, name: "Manipulationstechniken", brand: null, id: "5585d77c714a90fe0fc2fcb4" } ]})]
name: 'MongoError',
message: 'cannot use the part (items of delivery.items.id) to traverse the element ({items: [ { quantity: 1, price: 6.9, name: "Manipulationstechniken", brand: null, id: "5585d77c714a90fe0fc2fcb4" } ]})',
index: 0,
code: 16837,
errmsg: 'cannot use the part (items of delivery.items.id) to traverse the element ({items: [ { quantity: 1, price: 6.9, name: "Manipulationstechniken", brand: null, id: "5585d77c714a90fe0fc2fcb4" } ]})' }
The raw response from Mongo was { ok: 0, n: 0, nModified: 0 }
I tried so many things. Any advice on this?
As requested the schema:
var Order = new Schema({
orderId: Number,
orderDate: String,
customerName: String,
state: Number,
delivery: {
items: {type: Array, default: []},
state: { type: Number, default: 0 }
}
});
TL;DR: use your model Order instead of an instance this when doing more advanced queries:
Orders.update(
{
orderId: this.orderId ,
"delivery.items.id": product.id
},
{
$inc: {
"delivery.items.$.quantity" : 1
}
}, function (err, raw) {
if (err) {
console.log(err);
}
console.log('The raw response from Mongo was ', raw);
}
);
Explanation:
Mapping differences between Model.update() and Document.update().
The using the model, then Model.update() will be used and
Model.update(conditions, doc, options, callback)
will be mapped to:
db.collection.update(query = conditions, update = doc, options)
When using an instance instead your calling Document.update() and
Document.update(doc, options, callback)
will be mapped to the following:
db.collection.update(query = {_id: _id}, update = doc, options)
Don't know if this helps, but in this question the O.P. had a similar issue with mongoose 3.6.15, and claimed it was solved in 3.8.22
EDIT: In the linked question, the O.P. had the following working on mongodb
db.orchards.update(
({"orchardId": ObjectId("5391c137722b051908000000")},
{"trees" : { $elemMatch: {"name":"apple"}}}),
{ $push: { "trees.$.fruits": ObjectId("54c542c9d900000000001234") }})
But this not working in mongoose:
orchards.update(
({"orchardId": ObjectId.fromString(orchard.id)},
{"trees" : {$elemMatch: {"name": "apple"}}}),
{$push: {"trees.$.fruits": ObjectId("54c542c9d900000000001234") }},function(err, data){ ...
In a comment, he said the issue was solved switching to mongoose 3.8.22