vote.go 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. package cluster
  2. import (
  3. "errors"
  4. "fmt"
  5. "net/http"
  6. "net/url"
  7. "sort"
  8. "time"
  9. "github.com/langgenius/dify-plugin-daemon/internal/utils/cache"
  10. "github.com/langgenius/dify-plugin-daemon/internal/utils/http_requests"
  11. )
  12. func (c *Cluster) voteIps() error {
  13. var total_errors error
  14. add_error := func(err error) {
  15. if err != nil {
  16. if total_errors == nil {
  17. total_errors = err
  18. } else {
  19. total_errors = errors.Join(total_errors, err)
  20. }
  21. }
  22. }
  23. // get all nodes status
  24. nodes, err := cache.GetMap[node](CLUSTER_STATUS_HASH_MAP_KEY)
  25. if err == cache.ErrNotFound {
  26. return nil
  27. }
  28. for node_id, node_status := range nodes {
  29. if node_id == c.id {
  30. continue
  31. }
  32. // vote for ips
  33. ips_voting := make(map[string]bool)
  34. for _, ip := range node_status.Ips {
  35. // skip ips which have already been voted by current node in the last 5 minutes
  36. for _, vote := range ip.Votes {
  37. if vote.NodeID == c.id {
  38. if time.Since(time.Unix(vote.VotedAt, 0)) < time.Minute*5 && !vote.Failed {
  39. continue
  40. } else if time.Since(time.Unix(vote.VotedAt, 0)) < time.Minute*30 && vote.Failed {
  41. continue
  42. }
  43. }
  44. }
  45. ips_voting[ip.Address] = c.voteIp(ip) == nil
  46. }
  47. // lock the node status
  48. if err := c.LockNodeStatus(node_id); err != nil {
  49. add_error(err)
  50. c.UnlockNodeStatus(node_id)
  51. continue
  52. }
  53. // get the node status again
  54. node_status, err := cache.GetMapField[node](CLUSTER_STATUS_HASH_MAP_KEY, node_id)
  55. if err != nil {
  56. add_error(err)
  57. c.UnlockNodeStatus(node_id)
  58. continue
  59. }
  60. // update the node status
  61. for i, ip := range node_status.Ips {
  62. // update voting time
  63. if success, ok := ips_voting[ip.Address]; ok {
  64. // check if the ip has already voted
  65. already_voted := false
  66. for j, vote := range ip.Votes {
  67. if vote.NodeID == c.id {
  68. node_status.Ips[i].Votes[j].VotedAt = time.Now().Unix()
  69. node_status.Ips[i].Votes[j].Failed = !success
  70. already_voted = true
  71. break
  72. }
  73. }
  74. // add a new vote
  75. if !already_voted {
  76. node_status.Ips[i].Votes = append(node_status.Ips[i].Votes, vote{
  77. NodeID: c.id,
  78. VotedAt: time.Now().Unix(),
  79. Failed: !success,
  80. })
  81. }
  82. }
  83. }
  84. // sync the node status
  85. if err := cache.SetMapOneField(CLUSTER_STATUS_HASH_MAP_KEY, node_id, node_status); err != nil {
  86. add_error(err)
  87. }
  88. // unlock the node status
  89. if err := c.UnlockNodeStatus(node_id); err != nil {
  90. add_error(err)
  91. }
  92. }
  93. return total_errors
  94. }
  95. func (c *Cluster) voteIp(ip ip) error {
  96. type healthcheck struct {
  97. Status string `json:"status"`
  98. }
  99. healthcheck_endpoint, err := url.JoinPath(fmt.Sprintf("http://%s:%d", ip.Address, c.port), "health/check")
  100. if err != nil {
  101. return err
  102. }
  103. resp, err := http_requests.GetAndParse[healthcheck](
  104. http.DefaultClient,
  105. healthcheck_endpoint,
  106. http_requests.HttpWriteTimeout(500),
  107. http_requests.HttpReadTimeout(500),
  108. )
  109. if err != nil {
  110. return err
  111. }
  112. if resp.Status != "ok" {
  113. return errors.New("health check failed")
  114. }
  115. return nil
  116. }
  117. func (c *Cluster) SortIps(node_status node) []ip {
  118. // sort by votes
  119. sort.Slice(node_status.Ips, func(i, j int) bool {
  120. return len(node_status.Ips[i].Votes) > len(node_status.Ips[j].Votes)
  121. })
  122. return node_status.Ips
  123. }