All goroutines deadlock - multithreading
Encountered problem where all goroutines are asleep - deadlock.
I have a Data structure with cars array with limited amount of cars in it. Worker threads starts and tries to remove cars from data structure if there are no cars in the array and the main thread hasn't finished writing data to Data stricture then worker thread sleeps till main thread adds more cars to data struct cars array. Then worker thread wakes up, removes car object from data structure, does calculations and moves it to result structure. At some point sometimes it goes in to deadlock. Noticed that even if program finishes without exception some data(sometimes more, sometimes less) is still missing.
Code:
package main
import (
"crypto/sha256"
"encoding/json"
"fmt"
"io/ioutil"
"log"
"os"
"strconv"
"sync"
)
type Car struct {
Make string `json:"Make"`
Year int `json:"Year"`
Displacement float64 `json:"Displacement"`
Hash string
}
type Cars struct {
Auto []Car
count int
MaxLen int
mutex *sync.Mutex
cond *sync.Cond
end bool
}
func (a *Cars) Insert(aut Car) {
a.mutex.Lock() // lock method so other thread couldin't use Data structure
for a.count == a.MaxLen {
a.cond.Wait()//wait if current count of cars is equal to maximum amount that are allowed to store in cars array in Data structure
}
a.Auto[a.count] = aut
a.count++
a.mutex.Unlock()
a.cond.Broadcast()
}
func (a *Cars) Remove(group *sync.WaitGroup) Car {
a.mutex.Lock()
for a.count == 0 {
a.cond.Wait()//if there is no cars to remove from Data struct car array then sleep
}
result := a.Auto[a.count-1]//get the last car from cars structure car array
var tmp Car
a.Auto[a.count-1] = tmp//remove the last car from structure car array
a.count--
a.mutex.Unlock() // unlock this method and let others thread use i
a.cond.Broadcast() //tell all threads that removing has been finishedt
return result
}
func (a *Cars) InsertSort(aut Car) {
a.mutex.Lock()
for a.count == a.MaxLen {
a.cond.Wait()
}
j := 0
for i := 0; i < a.count; i++ {
if a.Auto[i].Displacement < aut.Displacement {
j = i //Finds where to insert new item in sorted list
}
}
if j != 0 {
for i := a.count; i >= j; i-- {
a.Auto[i+1] = a.Auto[i]//moves objects from j to the right
}
}
a.Auto[j] = aut
a.count++
a.mutex.Unlock()
a.cond.Broadcast()
}
var Auto []Car
func main() {
CurrentWD, err := os.Getwd()
if err != nil {
log.Println(err)
}
path := CurrentWD + "\\Auto.json"
jsonFile, err := os.Open(path)
byteValue, _ := ioutil.ReadAll(jsonFile)
json.Unmarshal(byteValue, &Auto)
var mutex = sync.Mutex{}
var cond = sync.NewCond(&mutex) //syncing cond with mutex
MaxLength := 5 // max lenght of data array
var A = make([]Car, 5)
Auto1 := Cars{count: 0, MaxLen: MaxLength, cond: cond, mutex: &mutex, Auto: A}//data structs
var B = make([]Car, 40)
Auto2 := Cars{count: 0, MaxLen: 40, cond: cond, mutex: &mutex, Auto: B}//results struct
var waitGroup = sync.WaitGroup{}
ThreadsAmt := 8
waitGroup.Add(ThreadsAmt)
for i := 0; i < ThreadsAmt; i++ {
go execute(&Auto1, &waitGroup, &Auto2)
}
for _, s := range Auto {
Auto1.Insert(s)
}
Auto1.end = true//finished writing to data struct
waitGroup.Wait()
var RLoc = CurrentWD + "\\Results.txt"
f, err := os.Create(RLoc)
defer f.Close()
f.WriteString(fmt.Sprintf("%15s|%4s|%12s|%50s \n", "Make", "Year", "Displacement", "Hash"))
for i := 0; i < Auto2.count-1; i++ {
f.WriteString(fmt.Sprintf("%3d %15s|%4d|%12.2f|%50s \n", i, Auto2.Auto[i].Make,
Auto2.Auto[i].Year, Auto2.Auto[i].Displacement, Auto2.Auto[i].Hash))
}
fmt.Println("Program finished execution")
}
func execute(Data *Cars, group *sync.WaitGroup, res *Cars) {
hash := sha256.New()
for Data.end == false && Data.count != 0 {
carTemp := Data.Remove(group)//removes and returns car object from data struct
if carTemp.Displacement > 0 {//checks if returned car object displacement is bigger than *
var ss string
ss = carTemp.Make + strconv.Itoa(carTemp.Year) + fmt.Sprint(carTemp.Displacement) //making string calculating hash
sum := hash.Sum([]byte(ss))
for i := 0; i < len(sum); i++ {
ss += string(sum[i])//joining hash byte array in to string
}
carTemp.Hash = ss
res.InsertSort(carTemp) // inserts car
}
}
defer group.Done()
}
Data: Auto.json
[{
"Make": "Chrysler",
"Year": 1997,
"Displacement": 3.6
}, {
"Make": "Honda",
"Year": 2016,
"Displacement": 1.4
}, {
"Make": "Aston Martin",
"Year": 2009,
"Displacement": 4.1
}, {
"Make": "Geo",
"Year": 2011,
"Displacement": 4.9
}, {
"Make": "Buick",
"Year": 2001,
"Displacement": 6.3
}, {
"Make": "Chevrolet",
"Year": 2001,
"Displacement": 2.7
}, {
"Make": "Suzuki",
"Year": 2004,
"Displacement": 4.5
}, {
"Make": "Studebaker",
"Year": 2001,
"Displacement": 7.5
}, {
"Make": "Chevrolet",
"Year": 2020,
"Displacement": 1.1
}, {
"Make": "Volkswagen",
"Year": 1996,
"Displacement": 6.2
}, {
"Make": "Mercedes-Benz",
"Year": 2009,
"Displacement": 2.9
}, {
"Make": "Nissan",
"Year": 2019,
"Displacement": 7.2
}, {
"Make": "Subaru",
"Year": 2010,
"Displacement": 2.6
}, {
"Make": "Hummer",
"Year": 1991,
"Displacement": 8.8
}, {
"Make": "Subaru",
"Year": 2017,
"Displacement": 8.0
}, {
"Make": "Mitsubishi",
"Year": 2010,
"Displacement": 6.6
}, {
"Make": "Mercedes-Benz",
"Year": 1996,
"Displacement": 2.0
}, {
"Make": "Lincoln",
"Year": 1991,
"Displacement": 9.9
}, {
"Make": "Chevrolet",
"Year": 1998,
"Displacement": 3.4
}, {
"Make": "Dodge",
"Year": 2010,
"Displacement": 5.8
}, {
"Make": "GMC",
"Year": 2016,
"Displacement": 6.8
}, {
"Make": "Chevrolet",
"Year": 2013,
"Displacement": 3.4
}, {
"Make": "Ford",
"Year": 2010,
"Displacement": 5.1
}, {
"Make": "Toyota",
"Year": 2017,
"Displacement": 9.6
}, {
"Make": "Hyundai",
"Year": 2015,
"Displacement": 3.8
}, {
"Make": "Mercedes-Benz",
"Year": 2016,
"Displacement": 4.3
}, {
"Make": "Chevrolet",
"Year": 2019,
"Displacement": 2.2
}, {
"Make": "Dodge",
"Year": 2009,
"Displacement": 1.8
}, {
"Make": "Pontiac",
"Year": 2006,
"Displacement": 4.6
}, {
"Make": "Chevrolet",
"Year": 2008,
"Displacement": 9.2
}]
Error:
Fatal error: all goroutines are asleep - deadlock!
goroutine 1 [sync.Cond.Wait]:
runtime.goparkunlock(...)
E:/Program Files (x86)/Go projects/go1.15.2/src/runtime/proc.go:312
sync.runtime_notifyListWait(0xc00003c050, 0x3)
E:/Program Files (x86)/Go projects/go1.15.2/src/runtime/sema.go:513 +0x117
sync.(*Cond).Wait(0xc00003c040)
E:/Program Files (x86)/Go projects/go1.15.2/src/sync/cond.go:56 +0xa5
main.(*Cars).Insert(0xc00003c080, 0xc000014343, 0x3, 0x7e0, 0x401b333333333333, 0x0, 0x0)
c:/Users/Justas/OneDrive - Kaunas University of Technology/5
Semestras/Lygiagretusis programavimas/Lab1/main.go:32 +0x57
main.main()
c:/Users/Justas/OneDrive - Kaunas University of Technology/5
Semestras/Lygiagretusis programavimas/Lab1/main.go:109 +0x53c
exit status 2
Process exiting with code: 1 signal: false
Welcome to StackOverflow & Go development! Go currency is powerful - but also very hard to master. Don't get discouraged!
Issues:
while the other functions employ sync locking - the execute() function does not - leading to the bulk of the data-race conditions
to compile a data-race version go build --race and standard error will show where concurrent reads/writes occur. Read more here.
the InsertSort function is broken in a lot of edge cases
use of the hasher is incorrect:
Create hash, write bytes, then compute hash via h.Sum(nil)
hashes are binary - so when printing them with fmt hex-formatting is recommended. (%x)
Design:
As #Adrian mentioned - using a channel is much easer to coordinate - when feeding work items in. A slice is tedious to control concurrent access here
Your output requires a sorted-slice - so in that cases a results channel does not make sense.
Go slices already have a count you can retrieve with the native len() function - so no need to track length with your own count field
slices entries do not need to be zero-out when reducing the length of a slice
to remove the last element of a slice: s = s[:len(s)-1]
there is too much quirky slice manipulation to determine the cause of the deadlock
go standard library has sort.Slice to do heavy lifting for you
I applied all the above suggestions and roll them into a channel-based solution - with a mutex on the output slice, to sort the results at runtime:
https://play.golang.org/p/ewR3zHxirL8
This can be improved by having worker write results to an output channel - and have a results goroutine reads that channel and thus sort only once at the end. This removes the need for any sync.Mutexs - and any custom structs:
https://play.golang.org/p/0T1fFaP0iml
Sorry, I haven't looked at it in detail. But one thing that immediately stands out is the data race where you are checking Data.end in the goroutine running execute() and modifying it with Auto1.end = true in main(). Similarly the use of Data.Count is not protected with the mutex.
Another bug I noticed is that is in InsertSort if the displacement is less than that of the 1st element the j is zero and the new element replaces the existing one. This might explain your lost data.
Have you tried using the race detector? This can be useful in finding problems, some of which can cause deadlocks. However, the problem sounds like it would be more suited (like most concurrency problems) to using channels. In my experience it's much harder to get things right using a mutex.
There are a lot of other little things that could be tidied up to make the code easier to understand. Eg the parameter to the Remove() method is not used, defer group.Done() should be at the top of execute() function. Maybe you could fix the above problems, test it and post the new code if it still has problems. One thing you could try yourself is adding log messages at critical points to try to understand what is happening.
Related
Elastsearch query causing NodeJS heap out of memory
What's happen now? Recenly I build a Elasticsearch query. The main function is to get data count per hours until 12 weeks ago. When the query get call over and over again. NodeJS memory will start from 20mb growing to 1024mb. And surprisingly the memory aren’t immediately get to the top. Its more like stably under 25mb ( maintain about several minutes ) and suddenly start to growing like (25mb,46mb,125mb,350mb...until 1024mb) and finally causing NodeJS memory leak. Whatever I call this query or not, The memory will still growing up and won’t release at all. And this scenario only happen at remote server (running in docker), At local docker env is totally fine (the memory is identical). enter image description here How am I query? like below. const query = { "size": 0, "query": { "bool": { "must": [ { terms: { '_id.keyword': array_id } }, { "range": { "date_created": { "gte": start_timestamp - timestamp_twelve_weeks, "lt": start_timestamp } } } ] } }, "aggs": { "shortcode_log": { "date_histogram": { "field": "date_created", "interval": "3600ms" } } } } What's the return value? like below ( total query time is around 2 sec ) . { "aggs_res": { "shortcode_log": { "buckets": [ { "key": 1594710000, "doc_count": 2268 }, { "key": 1594713600, "doc_count": 3602 }, {//.....total item count 2016 ] } } }
If your histogram interval is really of 3600ms(it should not be 3600s ?), it's a really short period of time to do the aggregation on 12 weeks. It means 0.06 minutes. 24000 periods per day 168000 per week 2016000 for 12 weeks. It can explain Why your script wait for a long before doing anything Why your memory explode when you try to loop on the buckets In your example, you have 2016 buckets back only. I think that their is a small difference between your 2 tests.
New update. The issue is solved. The problem in project has a layer between server and DB. So the code of this layer causing the query memory can't release.
How to return the results of a bulk insert?
How do I return the array of newly inserted documents? I want to perform a bulk insert from Java. INSERT { text: #text0, createdAt: #createdAt0 } IN messages LET result0 = { id: NEW._key, text: NEW.text, createdAt: NEW.createdAt } INSERT { text: #text1, createdAt: #createdAt1 } IN messages LET result1 = { id: NEW._key, text: NEW.text, createdAt: NEW.createdAt } RETURN [result0, result1] Is there a better way to collect the results from each insert, other than defining a new variable to keep the results in it?
This query should do what you want. FOR d IN [ {text:#text0, createdAt: #createdAt0}, {text:#text1, createdAt: #createdAt1} ] INSERT d INTO abc RETURN { id: NEW._id, text: d.text, createdAt: d.createdAt } An example response from the AQL is: [ { "id": "abc/21971344", "text": "apple", "createdAt": 1584107091 }, { "id": "abc/21971345", "text": "banana", "createdAt": 1584108473 } ] So long as the end user doesn't craft the AQL it should be fine, especially since you're using parameters. If you ever used the product beyond testing, I'd definitely recommend looking at Foxx, as it gives you a layer of abstraction, hosted (relatively) within the database, which at the least stops consumers from having to execute AQL queries at all, rather they just communicate via REST endpoints. PS. This query format also works: FOR d IN #messages INSERT d INTO abc RETURN { id: NEW._id, text: d.text, createdAt: d.createdAt } Where #messages is "messages": [ { "text": "apple", "createdAt": 1584107091 }, { "text": "banana", "createdAt": 1584108473 } ]
Integer valued to 0 not present in output using GRPCv3
I am using GRPCv3 on node.js I have a .proto file with the following message: message ProductAvailabilityWithRatesResponse { // How many slots are available for this day/time. Mandatory. int32 capacity = 1; // Date for when this product is available. Mandatory. Date date = 2; // When does this event start? Unset if this product does not support times. Time time = 3; // When does a pickup for this product start? Unset if this product does not support times or pickups. Time pickupTime = 4; // Rates with prices. Mandatory (should at least have one entry). repeated RateWithPrice rates = 5; } on the server using console.log i see this output: { capacity: 1, date: { year: 2019, month: 7, day: 1 }, rates: [ { rateId: 1, pricePerPerson: [Object] } ], time: { hour: 9, minute: 0 } } and on a client using node.js too: { rates: [ { rateId: '1', pricePerPerson: [Object], pricingOptions: 'pricePerPerson' } ], capacity: 0, date: { year: 2019, month: 7, day: 1 }, time: { hour: 9, minute: 0 }, pickupTime: null } but another person using a java client tells me that he sees: 2019-06-26 10:59:39,442 ← getProductAvailability::grpc response {date { year: 2019 month: 7 day: 1 } time { hour: 9 } rates { rateId: "1" pricePerPerson { pricingCategoryWithPrice { pricingCategoryId: "30" price { currency: "EUR" amount: "145" } } pricingCategoryWithPrice { pricingCategoryId: "31" price { currency: "EUR" amount: "150" } } } }} where capacity is not set. If it's value is 1 and not 0, everything works well everywhere. Is it possible? How can i force the server to output the value? I already tried using capacity = parseInt(capacity)
Mongoose find time furthest from other times in db
I have a program that runs every 5 minutes and checks the last time a users data was updated. If it's been greater than 4 hours an update routine is called but as the service grows, I've seen some spikes in the number of calls at given times. I want to start spreading out the update times. Since I know each time the program updated each users data last, I was wondering if there was an elegant way to find the largest gap between times and set the new users update time to that? Here's an example. Given the following data: { "_id": "1", "updatedAt": "2018-01-17T01:12:33.807Z" },{ "_id": "2", "updatedAt": "2018-01-17T03:17:33.807Z" },{ "_id": "3", "updatedAt": "2018-01-17T02:22:33.807Z" },{ "_id": "4", "updatedAt": "2018-01-17T02:37:33.807Z" } The largest time between the given updates is 1 hour and 10 minutes between id: 1 and id: 3. I want a function that can find that largest gap of time and returns the a suggested update time for the next item added to the database of '2018-01-17T01:47:33.807Z'. Which was calculated by taking the 1 hour and 10 minutes and dividing it by 2 and then adding it to id: 1's date. I would also like to spread out all the existing users update time but I suppose that would be a different function.
You can't use aggregation framework for a difference style comparison. However you can use map reduce to get the largest time diff between documents. Something like db.col.mapReduce( function () { if (typeof this.updatedAt != "undefined") { var date = new Date(this.updatedAt); emit(null, date); } }, function(key, dates) { result = {"prev":dates[0].getTime(), "last":dates[0].getTime(), "diff":0} for (var ix = 1; ix < dates.length; ix++) { value = dates[ix].getTime(); curdiff = value - result.prev; olddiff = result.diff; if(olddiff < curdiff) result = {"prev":value, "diff":curdiff, "last":result.prev}; } return result; }, { "sort":{"updatedAt":1}, "out": { "inline": 1 }, "finalize":function(key, result) { return new Date(result.last + result.diff/2); } } ) Aggregation query: db.col.aggregate([ {"$match":{"updatedAt":{"$exists":true}}}, {"$sort":{"updatedAt":1}}, {"$group":{ "_id":null, "dates":{"$push":"$updatedAt"} }}, {"$project":{ "_id":0, "next":{ "$let":{ "vars":{ "result":{ "$reduce":{ "input":{"$slice":["$dates",1,{"$subtract":[{"$size":"$dates"},1]}]}, "initialValue":{"prev":{"$arrayElemAt":["$dates",0]},"last":{"$arrayElemAt":["$dates",0]},"diff":0}, "in":{ "$cond":[ {"$lt":["$$value.diff",{"$subtract":["$$this","$$value.prev"]}]}, {"prev":"$$this","last":"$$value.prev","diff":{"$subtract":["$$this","$$value.prev"]}}, "$$value" ] } } } }, "in":{ "$add":["$$result.last",{"$divide":["$$result.diff",2]}] } } } }} ])
Use AQL Variables e.g. for counting (LET sum = sum + 1)
According to https://www.arangodb.com/2014/07/13/arangodb-2-2-0-released it shall be possible to use statements like this: LET sum = 0 FOR v IN values SORT v.year LET sum = sum + v.value RETURN { year: v.year, value: v.value, sum: sum } I currently use version 2.4 but am not able to use it, e.g. in such a statement: LET sum = 0 FOR i in memoryColl LET sum = sum + 1 // sum = sum + 1 RETURN { "i": i, "sum": sum } I got the error [1511] variable 'sum' is assigned multiple times (while parsing) Can somebody tell me if such a statemtn should in principle work, and how exactly?
As explained in the upgrading docs for 2.3, it's no longer possible to update variables in queries: Previous versions of ArangoDB allowed the modification of variables inside AQL queries [...] While this is admittedly a convenient feature, the new query optimizer design did not allow to keep it. Additionally, updating variables inside a query would prevent a lot of optimizations to queries that we would like the optimizer to make. Additionally, updating variables in queries that run on different nodes in a cluster would like cause non-deterministic behavior because queries are not executed linearly. To enumerate documents, you could do LET range = 0..LENGTH(memoryColl)-1 FOR i IN range RETURN {i: i+1, doc: memoryColl[i]} but it looks like a really bad idea to me. Better return the documents and let the client enumerate them. If you actually want to count the number of documents, you may use a sub-query: LET result = ( FOR doc IN memoryColl FILTER True // add some condition here for instance RETURN doc ) RETURN LENGTH(result) In 2.4, it is also possible to count more efficiently: http://jsteemann.github.io/blog/2014/12/12/aql-improvements-for-24/
On arango 3.7 in 2020 you could do something like described here LET values = [ { year: 2019, value: 35 }, { year: 2017, value: 8 }, { year: 2018, value: 17 }, { year: 2020, value: 84 } ] LET sortedValues = (FOR v IN values SORT v.year RETURN v) FOR i IN 0..LENGTH(sortedValues)-1 LET v = sortedValues[i] LET sum = sortedValues[i].value + SUM(SLICE(sortedValues, 0, i)[*].value) RETURN {year:v.year,value:v.value,sum:sum} This returned [ { "year": 2017, "value": 8, "sum": 8 }, { "year": 2018, "value": 17, "sum": 25 }, { "year": 2019, "value": 35, "sum": 60 }, { "year": 2020, "value": 84, "sum": 144 } ]