dhcp_scope_watch.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. #!/usr/bin/env python
  2. #
  3. # Copyright (c) 2017-2023 Joe Clarke <jclarke@cisco.com>
  4. # All rights reserved.
  5. #
  6. # Redistribution and use in source and binary forms, with or without
  7. # modification, are permitted provided that the following conditions
  8. # are met:
  9. # 1. Redistributions of source code must retain the above copyright
  10. # notice, this list of conditions and the following disclaimer.
  11. # 2. Redistributions in binary form must reproduce the above copyright
  12. # notice, this list of conditions and the following disclaimer in the
  13. # documentation and/or other materials provided with the distribution.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21. # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22. # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23. # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24. # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25. # SUCH DAMAGE.
  26. import sys
  27. import json
  28. from sparker import Sparker, MessageType # type: ignore
  29. from subprocess import Popen, PIPE
  30. import re
  31. import shlex
  32. import os
  33. from multiprocessing import Pool
  34. import time
  35. import CLEUCreds # type: ignore
  36. from cleu.config import Config as C # type: ignore
  37. SPARK_ROOM = "DHCP Scope Alarms"
  38. THRESHOLD = "75"
  39. CACHE_FILE = "/home/jclarke/dhcp_scope.dat"
  40. STATS_FILE = "/home/jclarke/dhcp_scope_stats.dat"
  41. def parse_result(out):
  42. matches = re.findall(r"([\w-]+=[^;]+);(?=\s|$)", out)
  43. res = {}
  44. for m in matches:
  45. if m == "":
  46. continue
  47. k, v = m.split("=")
  48. res[k] = v
  49. return res
  50. def get_results(scope):
  51. global DHCP_SERVER
  52. scope = scope.strip()
  53. if scope != "100 Ok" and scope != "":
  54. for _ in range(2):
  55. proc = Popen(
  56. shlex.split(f"ssh -2 root@{C.DHCP_SERVER} /root/nrcmd.sh -r scope {scope} getUtilization"), stdout=PIPE, stderr=PIPE
  57. )
  58. out, err = proc.communicate()
  59. outs = out.decode("utf-8")
  60. errs = err.decode("utf-8")
  61. if re.search(r"^100", outs):
  62. break
  63. time.sleep(1)
  64. if not re.search(r"^100", outs):
  65. sys.stderr.write(f"Error getting scope utilization for {scope}: {outs} {errs}\n")
  66. return None
  67. outd = parse_result(outs)
  68. if "active-dynamic" not in outd or "total-dynamic" not in outd or "free-dynamic" not in outd:
  69. return None
  70. util = (float(outd["active-dynamic"]) / float(outd["total-dynamic"])) * 100.0
  71. # print('Util for {0} is {1:.2f}% utilized'.format(scope, util))
  72. return (
  73. scope,
  74. {
  75. "util": util,
  76. "free-dynamic": outd["free-dynamic"],
  77. "active-dynamic": outd["active-dynamic"],
  78. "total-dynamic": outd["total-dynamic"],
  79. },
  80. )
  81. def get_metrics(pool):
  82. global DHCP_SERVER
  83. response = {}
  84. proc = Popen(shlex.split("ssh -2 root@{} /root/nrcmd.sh -r scope listnames".format(C.DHCP_SERVER)), stdout=PIPE, stderr=PIPE)
  85. out, err = proc.communicate()
  86. outs = out.decode("utf-8")
  87. errs = err.decode("utf-8")
  88. if not re.search(r"^100", outs):
  89. sys.stderr.write(f"Error getting scopes: {outs} {errs}\n")
  90. sys.exit(0)
  91. scopes = outs.split("\n")
  92. results = [pool.apply_async(get_results, [s]) for s in scopes[1:]]
  93. for res in results:
  94. retval = res.get()
  95. if retval is not None:
  96. response[retval[0]] = retval[1]
  97. return response
  98. if __name__ == "__main__":
  99. prev_state = {}
  100. curr_state = {}
  101. stats = {}
  102. spark = Sparker(token=CLEUCreds.SPARK_TOKEN)
  103. if os.path.exists(CACHE_FILE):
  104. with open(CACHE_FILE, "r") as fd:
  105. prev_state = json.load(fd)
  106. pool = Pool(20)
  107. metrics = get_metrics(pool)
  108. for scope, stat in metrics.items():
  109. stats[scope] = {"perc": stat["util"]}
  110. if stat["util"] >= float(THRESHOLD):
  111. curr_state[scope] = stat["util"]
  112. if scope not in prev_state or (scope in prev_state and stat["util"] - prev_state[scope] >= 1.0):
  113. curr_state[scope] = stat["util"]
  114. spark.post_to_spark(
  115. C.WEBEX_TEAM,
  116. SPARK_ROOM,
  117. "Scope **{0}** is now **{1:.2f}%** utilized ({2} of {3} free addresses remain); suppressing future alerts until resolved or utilization increases".format(
  118. scope, stat["util"], stat["free-dynamic"], stat["total-dynamic"]
  119. ),
  120. MessageType.WARNING,
  121. )
  122. else:
  123. curr_state[scope] = False
  124. if scope in prev_state and prev_state[scope]:
  125. spark.post_to_spark(
  126. C.WEBEX_TEAM,
  127. SPARK_ROOM,
  128. "Scope **{0}** is now only **{1:.2f}%** utilized ({2} free addresses out of {3})".format(
  129. scope, stat["util"], stat["free-dynamic"], stat["total-dynamic"]
  130. ),
  131. MessageType.GOOD,
  132. )
  133. with open(CACHE_FILE, "w") as fd:
  134. json.dump(curr_state, fd, indent=4)
  135. with open(STATS_FILE, "w") as fd:
  136. json.dump(stats, fd, indent=4)