poll_macs.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. #!/usr/bin/env python
  2. #
  3. # Copyright (c) 2017-2023 Joe Clarke <jclarke@cisco.com>
  4. # All rights reserved.
  5. #
  6. # Redistribution and use in source and binary forms, with or without
  7. # modification, are permitted provided that the following conditions
  8. # are met:
  9. # 1. Redistributions of source code must retain the above copyright
  10. # notice, this list of conditions and the following disclaimer.
  11. # 2. Redistributions in binary form must reproduce the above copyright
  12. # notice, this list of conditions and the following disclaimer in the
  13. # documentation and/or other materials provided with the distribution.
  14. #
  15. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16. # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19. # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21. # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22. # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23. # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24. # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25. # SUCH DAMAGE.
  26. from builtins import str
  27. from builtins import range
  28. import os
  29. import re
  30. import sys
  31. import time
  32. import json
  33. import paramiko
  34. from multiprocessing import Pool
  35. import traceback
  36. import CLEUCreds # type: ignore
  37. CACHE_FILE = "/home/jclarke/mac_counts.dat"
  38. CACHE_FILE_TMP = CACHE_FILE + ".tmp"
  39. IDF_FILE = "/home/jclarke/idf-devices.json"
  40. commands = {
  41. "macCore": {
  42. "command": "show mac address-table count | inc Dynamic Address Count",
  43. "pattern": r"Dynamic Address Count:\s+(\d+)",
  44. "metric": "totalMacs",
  45. },
  46. "macIdf": {
  47. "command": "show mac address-table dynamic | inc Total",
  48. "pattern": r"Total.*: (\d+)",
  49. "metric": "totalMacs",
  50. },
  51. "arpEntries": {
  52. "command": "show ip arp summary | inc IP ARP",
  53. "pattern": r"(\d+) IP ARP entries",
  54. "metric": "arpEntries",
  55. },
  56. "ndEntries": {
  57. "command": "show ipv6 neighbors statistics | inc Entries",
  58. "pattern": r"Entries (\d+),",
  59. "metric": "ndEntries",
  60. },
  61. "natTrans": {
  62. "command": "show ip nat translations total",
  63. "pattern": r"Total number of translations: (\d+)",
  64. "metric": "natTranslations",
  65. },
  66. "umbrella1Trans": {
  67. "command": "show platform hardware qfp active feature nat datapath limit",
  68. "pattern": r"limit_type 5 limit_id 0xa64fd06.*curr_count (\d+)",
  69. "metric": "umbrella1NatTrans",
  70. },
  71. "umbrella2Trans": {
  72. "command": "show platform hardware qfp active feature nat datapath limit",
  73. "pattern": r"limit_type 5 limit_id 0xa64fe06.*curr_count (\d+)",
  74. "metric": "umbrella2NatTrans",
  75. },
  76. "natPoolDefault1": {
  77. "command": "show ip nat statistics | begin NAT-POOL-DEFAULT-1",
  78. "pattern": r"total addresses (\d+), allocated (\d+)[^,]+, misses (\d+)",
  79. "metrics": ["natPoolDefault1Addresses", "natPoolDefault1Allocated", "natPoolDefault1Misses"],
  80. },
  81. "natPoolDefault2": {
  82. "command": "show ip nat statistics | begin NAT-POOL-DEFAULT-2",
  83. "pattern": r"total addresses (\d+), allocated (\d+)[^,]+, misses (\d+)",
  84. "metrics": ["natPoolDefault2Addresses", "natPoolDefault2Allocated", "natPoolDefault2Misses"],
  85. },
  86. "natPoolDns": {
  87. "command": "show ip nat statistics | begin NAT-POOL-DNS",
  88. "pattern": r"total addresses (\d+), allocated (\d+)[^,]+, misses (\d+)",
  89. "metrics": ["natPoolDnsAddresses", "natPoolDnsAllocated", "natPoolDnsMisses"],
  90. },
  91. "natPoolLabs": {
  92. "command": "show ip nat statistics | begin NAT-POOL-LABS",
  93. "pattern": r"total addresses (\d+), allocated (\d+)[^,]+, misses (\d+)",
  94. "metrics": ["natPoolLabsAddresses", "natPoolLabsAllocated", "natPoolLabsMisses"],
  95. },
  96. "natPoolWLC": {
  97. "command": "show ip nat statistics | begin NAT-ACL-WLC",
  98. "pattern": r"total addresses (\d+), allocated (\d+)[^,]+, misses (\d+)",
  99. "metrics": ["natPoolWLCAddresses", "natPoolWLCAllocated", "natPoolWLCMisses"],
  100. },
  101. "natGatewayStatsIn": {
  102. "command": "show platform hardware qfp active feature nat datapath gatein activity",
  103. "pattern": r"Hits ([^,]+), Miss ([^,]+), Aged ([^ ]+) Added ([^ ]+) Active ([0-9]+)",
  104. "metrics": ["natGateInHits", "natGateInMisses", "natGateInAged", "natGateInAdded", "natGateInActive"],
  105. },
  106. "natGatewayStatsOut": {
  107. "command": "show platform hardware qfp active feature nat datapath gateout activity",
  108. "pattern": r"Hits ([^,]+), Miss ([^,]+), Aged ([^ ]+) Added ([^ ]+) Active ([0-9]+)",
  109. "metrics": ["natGateOutHits", "natGateOutMisses", "natGateOutAged", "natGateOutAdded", "natGateOutActive"],
  110. },
  111. "natHealthStats": {
  112. "command": "show ip nat statistics | begin In-to-out",
  113. "pattern": r"In-to-out-drops: (\d+)\s+Out-to-in-drops: (\d+).*Pool stats drop: (\d+)\s+Mapping stats drop: (\d+).*Port block alloc fail: (\d+).*IP alias add fail: (\d+).*Limit entry add fail: (\d+)",
  114. "metrics": [
  115. "natHealthInOutDrops",
  116. "natHealthOutInDrops",
  117. "natHealthStatsDrops",
  118. "natHealthPortBlockAllocFail",
  119. "natHealthAliasAddFail",
  120. "natHealthEntryAddFail",
  121. ],
  122. },
  123. "qfpUtil": {
  124. "command": "show platform hardware qfp active datapath utilization summary",
  125. "pattern": r"Processing: Load \(pct\)\s+(\d+)",
  126. "metric": "qfpUtil",
  127. },
  128. }
  129. devices = [
  130. {
  131. "pattern": "CORE{}-CORE",
  132. "range": {"min": 1, "max": 2},
  133. "commands": ["arpEntries", "ndEntries"],
  134. },
  135. {
  136. "file": IDF_FILE,
  137. "commands": ["macIdf", "arpEntries", "ndEntries"],
  138. },
  139. {
  140. "pattern": "CORE{}-WA",
  141. "range": {"min": 1, "max": 2},
  142. "commands": ["macIdf", "arpEntries", "ndEntries"],
  143. },
  144. {
  145. "pattern": "CORE{}-EDGE",
  146. "range": {"min": 1, "max": 2},
  147. "commands": [
  148. "natTrans",
  149. "qfpUtil",
  150. "umbrella1Trans",
  151. "umbrella2Trans",
  152. "natPoolDefault1",
  153. "natPoolDefault2",
  154. "natPoolDns",
  155. "natPoolLabs",
  156. "natPoolWLC",
  157. "natHealthStats",
  158. "natGatewayStatsIn",
  159. "natGatewayStatsOut",
  160. ],
  161. },
  162. ]
  163. def send_command(chan, command):
  164. chan.sendall(command + "\n")
  165. time.sleep(0.5)
  166. output = ""
  167. i = 0
  168. while i < 60:
  169. r = chan.recv(65535)
  170. if len(r) == 0:
  171. raise EOFError("Remote host has closed the connection")
  172. r = r.decode("utf-8", "ignore")
  173. output += r
  174. if re.search(r"[#>]$", r.strip()):
  175. break
  176. time.sleep(1)
  177. return output
  178. def get_results(dev):
  179. global commands
  180. ssh_client = paramiko.SSHClient()
  181. ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
  182. response = []
  183. try:
  184. ssh_client.connect(
  185. dev["device"],
  186. username=CLEUCreds.NET_USER,
  187. password=CLEUCreds.NET_PASS,
  188. timeout=5,
  189. allow_agent=False,
  190. look_for_keys=False,
  191. )
  192. chan = ssh_client.invoke_shell()
  193. try:
  194. send_command(chan, "term width 0")
  195. send_command(chan, "term length 0")
  196. for command in dev["commands"]:
  197. cmd = commands[command]["command"]
  198. pattern = commands[command]["pattern"]
  199. metric = None
  200. if "metric" in commands[command]:
  201. metric = commands[command]["metric"]
  202. output = ""
  203. try:
  204. output = send_command(chan, cmd)
  205. except Exception as iie:
  206. response.append("")
  207. sys.stderr.write("Failed to get result for {} from {}: {}\n".format(cmd, dev["device"], iie))
  208. traceback.print_exc()
  209. m = re.search(pattern, output)
  210. if m:
  211. if metric:
  212. response.append('{}{{idf="{}"}} {}'.format(metric, dev["device"], m.group(1)))
  213. else:
  214. metrics = commands[command]["metrics"]
  215. i = 1
  216. for metric in metrics:
  217. response.append('{}{{idf="{}"}} {}'.format(metric, dev["device"], m.group(i)))
  218. i += 1
  219. else:
  220. # sys.stderr.write(
  221. # 'Failed to find pattern "{}" in "{}"\n'.format(pattern, output)
  222. # )
  223. if metric:
  224. response.append('{}{{idf="{}"}} {}'.format(metric, dev["device"], 0))
  225. else:
  226. metrics = commands[command]["metrics"]
  227. for metric in metrics:
  228. response.append('{}{{idf="{}"}} {}'.format(metric, dev["device"], 0))
  229. except Exception as ie:
  230. for command in dev["commands"]:
  231. response.append("")
  232. sys.stderr.write("Failed to setup SSH on {}: {}\n".format(dev["device"], ie))
  233. traceback.print_exc()
  234. except Exception as e:
  235. for command in dev["commands"]:
  236. response.append("")
  237. sys.stderr.write("Failed to connect to {}: {}\n".format(dev["device"], e))
  238. ssh_client.close()
  239. return response
  240. def get_metrics(pool):
  241. response = []
  242. targets = []
  243. for device in devices:
  244. if "list" in device:
  245. for dev in device["list"]:
  246. targets.append({"device": dev, "commands": device["commands"]})
  247. elif "range" in device or "subs" in device:
  248. if "range" in device:
  249. for i in range(device["range"]["min"], device["range"]["max"] + 1):
  250. targets.append(
  251. {
  252. "device": device["pattern"].format(str(i)),
  253. "commands": device["commands"],
  254. }
  255. )
  256. else:
  257. for sub in device["subs"]:
  258. targets.append(
  259. {
  260. "device": device["pattern"].format(sub),
  261. "commands": device["commands"],
  262. }
  263. )
  264. else:
  265. with open(device["file"]) as fd:
  266. for dev in json.load(fd):
  267. targets.append({"device": dev, "commands": device["commands"]})
  268. results = [pool.apply_async(get_results, [d]) for d in targets]
  269. for res in results:
  270. retval = res.get()
  271. if retval is not None:
  272. response += retval
  273. return response
  274. if __name__ == "__main__":
  275. pool = Pool(20)
  276. response = get_metrics(pool)
  277. with open(CACHE_FILE_TMP, "w") as fd:
  278. json.dump(response, fd, indent=4)
  279. os.rename(CACHE_FILE_TMP, CACHE_FILE)