Delete Documents from CosmosDB based on condition through Query Explorer - azure

What's the query or some other quick way to delete all the documents matching the where condition in a collection?
I want something like DELETE * FROM c WHERE c.DocumentType = 'EULA' but, apparently, it doesn't work.
Note: I'm not looking for any C# implementation for this.

This is a bit old but just had the same requirement and found a concrete example of what #Gaurav Mantri wrote about.
The stored procedure script is here:
https://social.msdn.microsoft.com/Forums/azure/en-US/ec9aa862-0516-47af-badd-dad8a4789dd8/delete-multiple-docdb-documents-within-the-azure-portal?forum=AzureDocumentDB
Go to the Azure portal, grab the script from above and make a new stored procedure in the database->collection you need to delete from.
Then right at the bottom of the stored procedure pane, underneath the script textarea is a place to put in the parameter. In my case I just want to delete all so I used:
SELECT c._self FROM c
I guess yours would be:
SELECT c._self FROM c WHERE c.DocumentType = 'EULA'
Then hit 'Save and Execute'. Viola, some documents get deleted. After I got it working in the Azure Portal I switched over the Azure DocumentDB Studio and got a better view of what was happening. I.e. I could see I was throttled to deleting 18 a time (returned in the results). For some reason I couldn't see this in the Azure Portal.
Anyway, pretty handy even if limited to a certain amount of deletes per execution. Executing the sp is also throttled so you can't just mash the keyboard. I think I would just delete and recreate the Collection unless I had a manageable number of documents to delete (thinking <500).
Props to Mimi Gentz #Microsoft for sharing the script in the link above.
HTH

I want something like DELETE * FROM c WHERE c.DocumentType = 'EULA'
but, apparently, it doesn't work.
Deleting documents this way is not supported. You would need to first select the documents using a SELECT query and then delete them separately. If you want, you can write the code for fetching & deleting in a stored procedure and then execute that stored procedure.

I wrote a script to list all the documents and delete all the documents, it can be modified to delete the selected documents as well.
var docdb = require("documentdb");
var async = require("async");
var config = {
host: "https://xxxx.documents.azure.com:443/",
auth: {
masterKey: "xxxx"
}
};
var client = new docdb.DocumentClient(config.host, config.auth);
var messagesLink = docdb.UriFactory.createDocumentCollectionUri("xxxx", "xxxx");
var listAll = function(callback) {
var spec = {
query: "SELECT * FROM c",
parameters: []
};
client.queryDocuments(messagesLink, spec).toArray((err, results) => {
callback(err, results);
});
};
var deleteAll = function() {
listAll((err, results) => {
if (err) {
console.log(err);
} else {
async.forEach(results, (message, next) => {
client.deleteDocument(message._self, err => {
if (err) {
console.log(err);
next(err);
} else {
next();
}
});
});
}
});
};
var task = process.argv[2];
switch (task) {
case "listAll":
listAll((err, results) => {
if (err) {
console.error(err);
} else {
console.log(results);
}
});
break;
case "deleteAll":
deleteAll();
break;
default:
console.log("Commands:");
console.log("listAll deleteAll");
break;
}

And if you want to do it in C#/Dotnet Core, this project may help: https://github.com/lokijota/CosmosDbDeleteDocumentsByQuery. It's a simple Visual Studio project where you specify a SELECT query, and all the matches will be a) backed up to file; b) deleted, based on a set of flags.

create stored procedure in collection and execute it by passing select query with condition to delete. The major reason to use this stored proc is because of continuation token which will reduce RUs to huge extent and will cost less.

##### Here is the python script which can be used to delete data from Partitioned Cosmos Collection #### This will delete documents Id by Id based on the result set data.
Identify the data that needs to be deleted before below step
res_list = "select id from id_del"
res_id = [{id:x["id"]}
for x in sqlContext.sql(res_list).rdd.collect()]
config = {
"Endpoint" : "Use EndPoint"
"Masterkey" : "UseKey",
"WritingBatchSize" : "5000",
'DOCUMENTDB_DATABASE': 'Database',
'DOCUMENTDB_COLLECTION': 'collection-core'
};
for row in res_id:
# Initialize the Python DocumentDB client
client = document_client.DocumentClient(config['Endpoint'], {'masterKey': config['Masterkey']})
# use a SQL based query to get documents
## Looping thru partition to delete
query = { 'query': "SELECT c.id FROM c where c.id = "+ "'" +row[id]+"'" }
print(query)
options = {}
options['enableCrossPartitionQuery'] = True
options['maxItemCount'] = 1000
result_iterable = client.QueryDocuments('dbs/Database/colls/collection-core', query, options)
results = list(result_iterable)
print('DOCS TO BE DELETED : ' + str(len(results)))
if len(results) > 0 :
for i in range(0,len(results)):
# print(results[i]['id'])
docID = results[i]['id']
print("docID :" + docID)
options = {}
options['enableCrossPartitionQuery'] = True
options['maxItemCount'] = 1000
options['partitionKey'] = docID
client.DeleteDocument('dbs/Database/colls/collection-core/docs/'+docID,options=options)
print ('deleted Partition:' + docID)

Related

Queries being run twice on sql.js

I'm using SQLite with sql.js on my project and I have been having some trouble with my implementation. Seems like the queries are being run on the database twice because for the for the INSERT statements I get 2 records in the DB.
The way I do it, I create the SQL and then pass it on to this method (the opts variable contains all of the data being put into the database):
prepareStatementAndCompileResults(db, sql, opts){
const stmt = db.prepare(sql);
const result = stmt.getAsObject(opts);
var rows = [];
if(!this.isEmpty(result)){ // isEmpty is a simple method that checks for empty objects
rows.push(result);
}
while(stmt.step()) {
var row = stmt.getAsObject();
rows.push(row);
}
this.saveToFile(db);
stmt.free();
return rows;
},
Here is a sample SQL INSERT that is being run twice
INSERT OR IGNORE INTO tag_event (tag_id, event_id, unique_string)
VALUES (:tag_id,:event_id, :unique);
Here is what the opts variable would look like for this query:
var opts = {
[':tag_id']: 1,
[':event_id']:1,
[':unique']: '1-1'
}
Because you're pushing it into row 2 time.
// if not empty will add to row
if(!this.isEmpty(result)){ // isEmpty is a simple method that checks for empty objects
rows.push(result);
}
// not sure what step() does but I'm assuming this will also run
while(stmt.step()) {
var row = stmt.getAsObject();
rows.push(row);
}
Verify by using a debugger or just console.log(rows) after the while loop before the save
So, what it turns out I needed to do was bind the variables to the prepared statement before getting the rather than binding them through getAsObject. This is much more efficient. My API response time on a local test went from 785ms to 14.5ms
prepareStatementAndCompileResults(db, sql, opts){
const rows = [];
const stmt = db.prepare(sql);
stmt.bind(opts);
while(stmt.step()) {
var row = stmt.getAsObject();
rows.push(row);
}
this.saveToFile(db);
stmt.free();
return rows;
},

Saving & Testing Stored Procedures/Triggers (maybe User Defined Functions) For Partitioned Collections

I'm receiving the following error when attempting save modifications to a Stored Procedure that has been created within a partitioned collection:
Failed to save the script
Here is the details from within the Azure Portal:
Operation name Failed to save the script Time stamp Fri Feb 17 2017
08:46:32 GMT-0500 (Eastern Standard Time) Event initiated by
- Description Database Account: MyDocDbAccount, Script: bulkImport, Message: {"code":400,"body":"{\"code\":\"BadRequest\",\"message\":\"Replaces
and upserts for scripts in collections with multiple partitions are
not supported.
The Stored Procedure in question is the example "bulkImport" script that can be found here.
There is a known missing capability (bug, if you prefer) in DocumentDB right now where you cannot update existing stored procedures in a partitioned collection. The workaround is to delete it first and then recreate it under the same name/id.
Contrary to the error message, it turns out that _client.ReplaceStoredProcedureAsync(...) does work (as of June 2018) on partitioned collections. So you can do something like this:
try
{
await _client.CreateStoredProcedureAsync(...);
}
catch(DocumentClientException dex) when (dex.StatusCode == HttpStatusCode.Conflict)
{
await _client.ReplaceStoredProcedureAsync(...);
}
Once your SP gets created the 1st time, you will never have any time quanta when it isn't available (due to deletion + recreation).
This extension method can handle add or update of a stored procedure.
public static async Task AddOrUpdateProcedure(this DocumentClient client,
string databaseId,
string collectionId,
string storedProcedureId,
string storedProcedureBody)
{
try
{
var documentCollectionUri = UriFactory.CreateDocumentCollectionUri(databaseId, collectionId);
await client.CreateStoredProcedureAsync(documentCollectionUri, new StoredProcedure
{
Id = storedProcedureId,
Body = storedProcedureBody
});
}
catch (DocumentClientException ex) when (ex.StatusCode == HttpStatusCode.Conflict)
{
var storedProcedureUri = UriFactory.CreateStoredProcedureUri(databaseId, collectionId, storedProcedureId);
var storedProcedure = await client.ReadStoredProcedureAsync(storedProcedureUri);
storedProcedure.Resource.Body = storedProcedureBody;
await client.ReplaceStoredProcedureAsync(storedProcedure);
}
}
As of now, updating a stored procedure still does not work in Azure Portal / CosmosDB Data explorer. There is a Cosmos DB Extension for Visual Studio Code where this works. However I don't see a way of executing the procedure from the extension like I can from Data Explorer.
try
{
var spResponse = await dbClient.CreateStoredProcedureAsync($"/dbs/{dataRepoDatabaseId}/colls/{collectionName}", new StoredProcedure
{
Id = sp.Item1,
Body = sp.Item2
}, new RequestOptions { PartitionKey = new PartitionKey(partitionKey) });
}
catch (DocumentClientException dex) when (dex.StatusCode == HttpStatusCode.Conflict)
{
//Fetch the resource to be updated
StoredProcedure sproc = dbClient.CreateStoredProcedureQuery($"/dbs/{dataRepoDatabaseId}/colls/{collectionName}")
.Where(r => r.Id == sp.Item1)
.AsEnumerable()
.SingleOrDefault();
if(!sproc.Body.Equals( sp.Item2))
{
sproc.Body = sp.Item2;
StoredProcedure updatedSPResponse = await dbClient.ReplaceStoredProcedureAsync(sproc);
}
}

Distinct values in Azure Search Suggestions?

I am offloading my search feature on a relational database to Azure Search. My Products tables contains columns like serialNumber, PartNumber etc.. (there can be multiple serialNumbers with the same partNumber).
I want to create a suggestor that can autocomplete partNumbers. But in my scenario I am getting a lot of duplicates in the suggestions because the partNumber match was found in multiple entries.
How can I solve this problem ?
The Suggest API suggests documents, not queries. If you repeat the partNumber information for each serialNumber in your index and then suggest based on partNumber, you will get a result for each matching document. You can see this more clearly by including the key field in the $select parameter. Azure Search will eliminate duplicates within the same document, but not across documents. You will have to do that on the client side, or build a secondary index of partNumbers just for suggestions.
See this forum thread for a more in-depth discussion.
Also, feel free to vote on this UserVoice item to help us prioritize improvements to Suggestions.
I'm facing this problem myself. My solution does not involve a new index (this will only get messy and cost us money).
My take on this is a while-loop adding 'UserIdentity' (in your case, 'partNumber') to a filter, and re-search until my take/top-limit is met or no more suggestions exists:
public async Task<List<MachineSuggestionDTO>> SuggestMachineUser(string searchText, int take, string[] searchFields)
{
var indexClientMachine = _searchServiceClient.Indexes.GetClient(INDEX_MACHINE);
var suggestions = new List<MachineSuggestionDTO>();
var sp = new SuggestParameters
{
UseFuzzyMatching = true,
Top = 100 // Get maximum result for a chance to reduce search calls.
};
// Add searchfields if set
if (searchFields != null && searchFields.Count() != 0)
{
sp.SearchFields = searchFields;
}
// Loop until you get the desired ammount of suggestions, or if under desired ammount, the maximum.
while (suggestions.Count < take)
{
if (!await DistinctSuggestMachineUser(searchText, take, searchFields, suggestions, indexClientMachine, sp))
{
// If no more suggestions is found, we break the while-loop
break;
}
}
// Since the list might me bigger then the take, we return a narrowed list
return suggestions.Take(take).ToList();
}
private async Task<bool> DistinctSuggestMachineUser(string searchText, int take, string[] searchFields, List<MachineSuggestionDTO> suggestions, ISearchIndexClient indexClientMachine, SuggestParameters sp)
{
var response = await indexClientMachine.Documents.SuggestAsync<MachineSearchDocument>(searchText, SUGGESTION_MACHINE, sp);
if(response.Results.Count > 0){
// Fix filter if search is triggered once more
if (!string.IsNullOrEmpty(sp.Filter))
{
sp.Filter += " and ";
}
foreach (var result in response.Results.DistinctBy(r => new { r.Document.UserIdentity, r.Document.UserName, r.Document.UserCode}).Take(take))
{
var d = result.Document;
suggestions.Add(new MachineSuggestionDTO { Id = d.UserIdentity, Namn = d.UserNamn, Hkod = d.UserHkod, Intnr = d.UserIntnr });
// Add found UserIdentity to filter
sp.Filter += $"UserIdentity ne '{d.UserIdentity}' and ";
}
// Remove end of filter if it is run once more
if (sp.Filter.EndsWith(" and "))
{
sp.Filter = sp.Filter.Substring(0, sp.Filter.LastIndexOf(" and ", StringComparison.Ordinal));
}
}
// Returns false if no more suggestions is found
return response.Results.Count > 0;
}
public async Task<List<string>> SuggestionsAsync(bool highlights, bool fuzzy, string term)
{
SuggestParameters sp = new SuggestParameters()
{
UseFuzzyMatching = fuzzy,
Top = 100
};
if (highlights)
{
sp.HighlightPreTag = "<em>";
sp.HighlightPostTag = "</em>";
}
var suggestResult = await searchConfig.IndexClient.Documents.SuggestAsync(term, "mysuggestion", sp);
// Convert the suggest query results to a list that can be displayed in the client.
return suggestResult.Results.Select(x => x.Text).Distinct().Take(10).ToList();
}
After getting top 100 and using distinct it works for me.
You can use the Autocomplete API for that where does the grouping by default. However, if you need more fields together with the result, like, the partNo plus description it doesn't support it. The partNo will be distinct though.

Azure Mobile server update script w/ complex field type

I've got a complex data type "AzureTemplate" containing a list of children "AzureField". I've implemented my read and insert on the server side according to this article. Works great.
Needing an update as well, I copy/pasted the insert into the update so it does the same thing, but using update instead. So my update looks like this:
function update(item, user, request) {
// remove complex child object, make copy first
var fields = item.fields;
if (fields) {
delete item.fields;
}
request.execute({
success: function () {
var templateId = item.id; // "foreign key"
var fieldsTable = tables.getTable('AzureFields');
if (fields) {
// update the child fields
var updateNextField = function (index) {
if (index >= fields.length) {
// done updating fields, respond to client
request.respond();
} else {
var field = fields[index];
field.templateId = templateId;
// *** THE ID LOGGED HERE LOOKS FINE ***
console.log("updating field w/ id ", field.id);
fieldsTable.update(field, {
success: function () {
updateNextField(index + 1);
}
});
}
};
// kick off the loop saving each field
updateNextField(0);
} else {
// no fields. no need to do anything else
request.respond();
}
}
});
}
The log that prints the ID of the child "field" shows a valid field id (I save them on the client side when reading them). But I get an error that says:
Error in script '/table/AzureTemplate.update.js'. Error: Invalid id value specified. AzureTemplate/update Tue Jan 27 2015, 10:11:31 AM
I put a console.log() at the top of the AzureField.update, but that never shows up, so it's not getting in there. Also, when I update a single child "Field" directly from the client it works fine. So the AzureField.update is working. Any ideas?
var fieldsTable = tables.getTable('AzureFields');
... my table name is AzureField, not AzureFields. The above code works, hopefully it helps someone.
I have misnamed a table before and got a meaningful error about "table not existing". Not sure why the error in this case is totally unrelated.

Retrieving all Documents from couchdb using Node.js

I am writing a simple test app to experiment with the functionality of node.js and couchdb, so far i am loving it, but i ran in a snag. i have looked for and wide but can't seem to find an answer. My test server(a simple address book) does 2 things:
if the user goes to localhost:8000/{id} then my app returns the name and address of the user with that id.
if the user goes to localhost:8000/ then my app needs to return a list a names that are hyperlinks and takes them to the page localhost:8000/{id}.
I was able to get the first requirement working. i cant not seem to find how to retrieve a list of all names from my couchdb. that is what i need help with. here is my code:
var http = require('http');
var cradle = require('cradle');
var conn = new(cradle.Connection)();
var db = conn.database('users');
function getUserByID(id) {
var rv = "";
db.get(id, function(err,doc) {
rv = doc.name;
rv += " lives at " + doc.Address;
});
return rv;
}
function GetAllUsers() {
var rv = ""
return rv;
}
var server = http.createServer(function(req,res) {
res.writeHead(200, {'Content-Type':'text/plain'});
var rv = "" ;
var id = req.url.substr(1);
if (id != "")
rv = getUserByID(id);
else
rv = GetAllUsers();
res.end(rv);
});
server.listen(8000);
console.log("server is runnig");
As you can see, I need to fill in the GetAllUsers() function. Any help would be appreciated. Thanks in advance.
I would expect you to be doing something like (using nano, which is a library I authored):
var db = require('nano')('http://localhost:5984/my_db')
, per_page = 10
, params = {include_docs: true, limit: per_page, descending: true}
;
db.list(params, function(error,body,headers) {
console.log(body);
});
I'm not pretty sure what you are trying to accomplish with http over there but feel free to head to my blog if you are looking for some more examples. Just wrote a blog post for people getting started with node and couch
As said above it will come a time when you will need to create your own view. Check up the CouchDB API Wiki, then scan thru the book, check what are design documents, then if you like you can go and check the test code I have for view generation and querying.
You can create a CouchDB view which will list the users. Here are several resources on CouchDB views which you should read in order to get a bigger picture on this topic:
Introduction to CouchDB Views
Finding Your Data with Views
View Cookbook for SQL Jockeys
HTTP View API
So let's say you have documents structured like this:
{
"_id": generated by CouchDB,
"_rev": generated by CouchDB,
"type": "user",
"name": "Johny Bravo",
"isHyperlink": true
}
Then you can create a CouchDB view (the map part) which would look like this:
// view map function definition
function(doc) {
// first check if the doc has type and isHyperlink fields
if(doc.type && doc.isHyperlink) {
// now check if the type is user and isHyperlink is true (this can also inclided in the statement above)
if((doc.type === "user") && (doc.isHyperlink === true)) {
// if the above statements are correct then emit name as it's key and document as value (you can change what is emitted to whatever you want, this is just for example)
emit(doc.name, doc);
}
}
}
When a view is created you can query it from your node.js application:
// query a view
db.view('location of your view', function (err, res) {
// loop through each row returned by the view
res.forEach(function (row) {
// print out to console it's name and isHyperlink flag
console.log(row.name + " - " + row.isHyperlink);
});
});
This is just an example. First I would recommend to go through the resources above and learn the basics of CouchDB views and it's capabilities.

Resources