# Azure/azure-support-scripts
# 
# Copyright (c) Microsoft Corporation
#
# All rights reserved.
# 
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this
# software and associated documentation files (the ""Software""), to deal in the Software
# without restriction, including without limitation the rights to use, copy, modify, merge,
# publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons
# to whom the Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT 
# NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


import argparse
import os
import sys
import socket
import requests
import logging
import subprocess
import re
# for os-release (initially)
import csv
import pathlib
# network checking
import socket
import json
# For talking to the wire server and decoding responses
import http.client
from xml.etree import ElementTree

### COMMAND LINE ARGUMENT HANDLING
parser = argparse.ArgumentParser(
    description="stuff"
)
parser.add_argument('-b', '--bash', required=True, type=str)
parser.add_argument('-r', '--report', action='store_true') # this is just to 'catch' a bash-side parameter, we don't use it
parser.add_argument('-d', '--debug', action='store_true')
parser.add_argument('-v', '--verbose', action='count', default=0)
parser.add_argument('-l', '--log', type=str, required=False, default='/var/log/azure/'+os.path.basename(__file__)+'.log')
parser.add_argument('-t', '--noterm', action='store_true') # mainly used for coloring output
args=parser.parse_args()
# TODO: implement using verbosity level
if ( args.debug ):
  if ( args.verbose == 0 ):
    args.verbose = 1

# example bash value:
# bash="DISTRO=debian|SERVICE=walinuxagent.service|UNIT=active|PY=/usr/bin/python3.8|PYCOUNT=1|PYREQ=loaded|PYALA=loaded"
bashArgs = dict(inStr.split('=') for inStr in args.bash.split("|"))
# any value can be extracted with 
#   bashArgs.get('NAME', "DefaultString")
#  ex:
#   bashArgs.get('PY',"N/A")
### END COMMAND LINE ARGUMENT HANDLING
### UTILS
#### UTIL VARs and OBJs
logger = logging.getLogger(__name__)
logging.basicConfig(format='%(asctime)s %(message)s', filename=args.log, level=logging.DEBUG)
# start logging as soon as possible
logger.info("Python script started:"+os.path.basename(__file__))
# add the 'to the console' flag to the logger
if ( args.verbose > 0 ):
  logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))
  logger.info("Debug on")
#### END UTIL VARS
#### UTIL FUNCTIONS
def colorPrint(color, strIn):
  retVal=""
  if ( args.noterm ):
    retVal=strIn
  else:
    retVal=color+"{} \033[00m".format(strIn)
#        print(color+"{} \033[00m".format(strIn))
  return retVal
def cRed(strIn): return colorPrint("\033[91m", strIn)
def cGreen(strIn): return colorPrint("\033[92m", strIn)
def cYellow(strIn): return colorPrint("\033[93m", strIn)
def cBlack(strIn): return colorPrint("\033[98m", strIn)
def colorString(strIn, redVal="dead", greenVal="active", yellowVal="inactive"):
  # force these into strs
  strIn = str(strIn)
  redVal = str(redVal)
  greenVal = str(greenVal)
  yellowVal = str(yellowVal)
  # ordered so that errors come first, then warnings and eventually "I guess it's OK"
  if redVal.lower() in strIn.lower():
    return cRed(strIn)
  elif yellowVal.lower() in strIn.lower():
    return cYellow(strIn)
  elif greenVal.lower() in strIn.lower():
    return cGreen(strIn)
  else:
    return cBlack(strIn)

#### END UTIL FUNCS
### END UTILS
### MAIN CODE
#### Global vars setup
fullPercent=90
wireIP="168.63.129.16"
imdsIP="169.254.169.254"
# debug percentage
if ( args.verbose > 0 ):
  fullPercent=20
# parse out os-release and put the values into a dict

path = pathlib.Path("/etc/os-release")
with open(path) as stream:
  reader = csv.reader(filter(lambda line: line.strip(), stream), delimiter="=")
  os_release = dict(reader)
osrID=os_release.get("ID_LIKE", os_release.get("ID"))
osMajS,osMinS=os_release.get("VERSION_ID").split(".")
osMaj=int(osMajS)
osMin=int(osMinS)

# TODO: Add a family / major version check for 'supported' and "doesn't work" checks
# TODO: perhaps add a best-effort flag, wrap things that might not work in 'best effort' mode
# -- weird versions - OEL, Alma, Rocky

# holding dicts for all the different things we will valiate
bins={}
services={}
checks={}
findings={}
# took out the part to put some default findings in, delete them if we find something bad

#### END Global vars
#### Main logic functions
def validateBin(binPathIn):
  # usage: pass in a binary to check, the following will be determined
  #  - absolute path (dereference links)
  #  - provided by what package
  #  - what repo provides the package
  #  - version for the package or binary if possible
  # output object:
  # load up os-release into a dict for later reference
  logger.info("Validating " + binPathIn)
  # we need to store the passed value in case of exception with the dereferenced path
  binPath=binPathIn
  realBin=os.path.realpath(binPath)
  if ( binPath != realBin ):
    logger.info(f"Link found: {binPath} points to {realBin}, verify outputs if this returns empty data")
    binPath=realBin
  thisBin={"exe":binPathIn}
 
  if (osrID == "debian"):
    noPkg=False # extra exception flag, using pure try/excepts is difficult to follow
    try:
      # Find what package owns the binary
      thisBin["pkg"]=subprocess.check_output("dpkg -S " + binPath, shell=True, stderr=subprocess.DEVNULL).decode().strip().split(":")[0]
    except:
      logger.info(f"issue validating {binPath}, reverting to original path: {binPathIn}")
      try:
        thisBin["pkg"]=subprocess.check_output("dpkg -S " + binPathIn, shell=True, stderr=subprocess.DEVNULL).decode().strip().split(":")[0]
      except subprocess.CalledProcessError as e:
        logger.info(f"All attempts to validate {binPathIn} have failed. Likely a rogue file: {e.output}")
        noPkg=True
    if not noPkg:
      # find what repository the package came from
      try:
        aptOut=subprocess.check_output("apt-cache show --no-all-versions " + thisBin["pkg"] , shell=True, stderr=subprocess.DEVNULL).decode().strip()
        thisBin["repo"]=re.search("Origin.*",aptOut).group()
      except subprocess.CalledProcessError as e:
        # we didn't get a match, probably a manual install (dkpg) or installed from source
        logger.info(f"package {thisBin['pkg']} does not appear to have come from a repository")
        thisBin["repo"]="no repo"
    else: 
      # binary not found or may be source installed (no pkg)
      thisBin["pkg"]=f"no file or owning pkg for {binPathIn}"
      thisBin["repo"]="n/a"
  elif ( osrID == "fedora"):
    try:
      rpm=subprocess.check_output("rpm -q --whatprovides " + binPath, shell=True, stderr=subprocess.DEVNULL).decode().strip()
      thisBin["pkg"]=rpm
      try:
        # expand on this to make the call to 'dnf'
        #dnfOut=subprocess.check_output("dnf info " + rpm, shell=True, stderr=subprocess.DEVNULL).decode().strip()
        result=subprocess.run(["dnf","info",rpm], stdout=subprocess.PIPE, stderr=subprocess.PIPE,check=True)
      except subprocess.CalledProcessError as e:
        # we didn't get a match, probably a manual install (rpm), built from source, or a general DNF failure
        thisBin["repo"]=f"repo search failed: {e.stderr.decode()}"
      else:
        dnfOut=result.stdout.decode().strip()
        # Repo line should look like "From repo   : [reponame]" so clean it up
        thisBin["repo"]=re.search("From repo.*",dnfOut).group().strip().split(":")[1].strip()
    except subprocess.CalledProcessError as e:
      thisBin["pkg"]=f"no file or owning pkg: {e.output}"
      thisBin["repo"]="n/a"
  elif ( osrID == "suse"):
    try:
      rpm=subprocess.check_output('rpm -q --queryformat %{NAME} --whatprovides ' + binPath, shell=True, stderr=subprocess.DEVNULL).decode()
      thisBin["pkg"]=rpm
      try:
        # options:
        zyppOut=subprocess.check_output("zypper --quiet --no-refresh info " + rpm, shell=True, stderr=subprocess.DEVNULL).decode().strip()
        thisBin["repo"]=re.search("Repository.*",zyppOut).group().split(":")[1].strip()
      except:
        # we didn't get a match, probably a manual install (rpm) or from source
        thisBin["repo"]="not from a repo"
    except subprocess.CalledProcessError as e:
      thisBin["pkg"]="no file or owning pkg: " + e
      thisBin["repo"]="n/a"
  elif ( osrID == "mariner" or osrID == "azurelinux"):
    try:
      rpm=subprocess.check_output('rpm -q --queryformat %{NAME} --whatprovides ' + binPath, shell=True).decode()
      thisBin["pkg"]=rpm
      try:
        # options:
        zyppOut=subprocess.check_output("tdnf --installed info " + rpm, shell=True).decode().strip()
        thisBin["repo"]=re.search("Repo.*",zyppOut).group().split(":")[1].strip()
      except:
        # we didn't get a match, probably a manual install (rpm) or from source
        thisBin["repo"]="not from a repo"
    except subprocess.CalledProcessError as e:
      thisBin["pkg"]="no file or owning pkg: " + e
      thisBin["repo"]="n/a"
  else:
    print("Unable to determine OS family from os-release")
    thisBin["pkg"]="packaging system unknown"
    thisBin["repo"]="n/a"
  logString = binPath + " owned by package '" + thisBin["pkg"] + "' from repo '" + thisBin["repo"] + "'"
  logger.info(logString)
  bins[binPathIn]=thisBin
def checkService(unitName, package=False):
  # take in a unit file and check status, enabled, etc.
  # output object:

  logger.info("Service/Unit check " + unitName)

  thisSvc={"svc":unitName}
  unitStat=0          # default service status return, we'll set this based on the 'systemctl status' RC
  thisSvc["status"]="undef" # this will get changed somewhere
  # First off, let us check if the unit even exists
  try:
    throwawayVal=subprocess.check_output(f"systemctl status {unitName}", shell=True)
    #0 program is running or service is OK <<= default value of unitStat
    #1 program is dead and /var/run pid file exists
    #2 program is dead and /var/lock lock file exists
    #3 program is not running
    #4 program or service status is unknown
    #5-99  reserved for future LSB use
    #100-149   reserved for distribution use
    #150-199   reserved for application use
    #200-254   reserved
  except subprocess.CalledProcessError as sysctlErr:
    # we will be referencing this return code later, assuming it's not 4 - see table above
    unitStat=sysctlErr.returncode
    if ( unitStat == 4 ):
      thisSvc["status"]="nonExistantService"
    else:
      logger.info(f"Service {unitName} status returned unexpected value: {sysctlErr.output} with text: {sysctlErr.output}")
  # Unit was determined to exist (not rc=4), so lets validate the service status and maybe some other files
  if ( unitStat < 4 ):
    # Process the configured, active and substate for the service.  Active/Sub could be inactive(dead) in an interactive console
    #  This can be done from systemctl show [service] --property=[UnitFileState|ActiveState|SubState]
    config=subprocess.check_output(f"systemctl show {unitName} --property=UnitFileState",shell=True).decode().strip().split("=")[1]
    active=subprocess.check_output(f"systemctl show {unitName} --property=ActiveState",shell=True).decode().strip().split("=")[1]
    sub=subprocess.check_output(f"systemctl show {unitName} --property=SubState",shell=True).decode().strip().split("=")[1]
    thisSvc["config"]=config
    # make the 'status' look like the output of `systemctl status`
    thisSvc["status"]=f"{active}({sub})"

    # more integrety checks based on digging into the files
    thisSvc["path"]=subprocess.check_output(f"systemctl show {unitName} -p FragmentPath", shell=True).decode().strip().split("=")[1]
    # Which python does the service call?
    # # dive into the file in 'path' and logic out what python is being called for validations
    # who owns it... maybe?
    if ( package ):
      # We need to process the owner and path of the unit if (package) was set by the caller
      logger.info(f"Checking owners for unit: {unitName} using validateBins")
      # No need to re-code all this, just call validateBin(binName)
      validateBin(thisSvc["path"])
      thisSvc["pkg"]=bins[thisSvc["path"]]['pkg']
      thisSvc["repo"]=bins[thisSvc["path"]]['repo']
      # get rid of this extra entry in bins caused by calling validateBins()
      del bins[thisSvc["path"]]
    else:
      logger.info(f"package details for {unitName} not requested, skipping")
      pass
  else:
    #set some defaults when the unit wasn't here
    pass

  unitFile=subprocess.check_output(f"systemctl show {unitName} -p FragmentPath", shell=True).decode().strip().split("=")[1]
  logString = unitName + " unit file found at " + thisSvc["path"] + "owned by package '" + thisSvc["pkg"] + "from repo: " + thisSvc["repo"]
  logger.info(logString)
  services[unitName]=thisSvc
  
def checkHTTPURL(urlIn):
  checkURL = urlIn
  headers = {'Metadata': 'True'}
  returnString=""
  try:
    r = requests.get(checkURL, headers=headers, timeout=5)
    returnString=r.status_code
    r.raise_for_status()
  except requests.exceptions.HTTPError as errh:
    returnString=f"Error:{r.status_code}"
  except requests.exceptions.RetryError as errr:
    returnString=f"MaxRetries"
  except requests.exceptions.Timeout as errt:
    returnString=f"Timeout"
  except requests.exceptions.ConnectionError as errc:
    returnString=f"ConnectErr"
  except requests.exceptions.RequestException as err:
    returnString=f"UnexpectedErr"
  return returnString
def isOpen(ip, port):
  # return true/false if the remote port is/isn't listening, only takes an IP, no DNS is done
  # using connect_ex would give us the error code for analysis, but we're just going for true/false here
  s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  s.settimeout(2)
  try:
    is_open = s.connect((ip, int(port))) == 0 # True if open, False if not
    if is_open:
      s.shutdown(socket.SHUT_RDWR)
    return True
  except Exception:
    is_open = False
  s.close()
  return is_open

def getInterfaces():
  # Get all interfaces present in the system except for loopback, return as a dict
  # -- May have an issue with multiple VIPs on a NIC
  ipOut = subprocess.run(['ip', '-j', 'address', 'show'], stdout=subprocess.PIPE)
  intJSON = json.loads(ipOut.stdout.decode('utf-8'))
  
  addresses = {}
  for iface in intJSON:
    iface_name = iface.get('ifname')
    if iface_name != "lo":
      addresses[iface_name] = {}
      addresses[iface_name]['mac'] = iface.get('address')
      for addr_info in iface.get('addr_info', []):
        if addr_info.get('family') == 'inet':  # Only IPv4 addresses
          addresses[iface_name]['ip'] = addr_info.get('local')
  return addresses
# TODO:: repackage this search function as a generic re: search, and call it with the defined re for MAC addresses
def searchDirForMAC(dirToSearch, returnDict, okMAC):
  # return all MACs found defined in some config file in the passed in directory
  #   We'll add each file, and the MAC(s) found in there, to the passed in dict
  # accept an 'ok' MAC definition, because it would be ok for cloud-init managed
  #   configs to have a mac defined - CI will reset the configs if the mac changes
  # Define a MAC address regex pattern (e.g., 00:1A:2B:3C:4D:5E or 00-1A-2B-3C-4D-5E)
  mac_pattern = re.compile(r'([0-9a-f]{2}(?::[0-9a-f]{2}){5})', re.IGNORECASE)
  # Walk through all files in the directory
  for root, _, files in os.walk(dirToSearch):
    # loop through all files in the dirToSearch
    for file_name in files:
      file_path = os.path.join(root, file_name)
      # Check if it's a regular file (skip links, sockets, pipes, etc.)
      if os.path.isfile(file_path):
        try:
          with open(file_path, 'r') as file:
            content = file.read()
            # Find all MAC addresses in the file
            mac_addresses = mac_pattern.findall(content)
            # If MAC addresses are found, and not the passed in okMAC, add them to the dictionary
            # mac_addresses as returned from findall will be an array of string(s)
            if ( mac_addresses and (okMAC not in mac_addresses) ):
              returnDict[file_path] = mac_addresses
        except (UnicodeDecodeError, PermissionError):
          # Skip files that can't be read due to encoding or permission issues
          # just create the exception code block, but we won't be using it
          pass
              
def searchDirForString(dirToSearch, returnDict, string):
  for root, _, files in os.walk(dirToSearch):
    # loop through all files in the dirToSearch
    for file_name in files:
      file_path = os.path.join(root, file_name)
      # Check if it's a regular file (skip links, sockets, pipes, etc.)
      if os.path.isfile(file_path):
        try:
          # Create a holding space for any/all matched text
          defLines=""
          # Open this file and search for the string
          with open(file_path, 'r') as file:
            for line_number, line in enumerate(file, start=1):
              # store the line number and line in the return string
              if string in line:
                # after the first occurrance add a newline every time we find a line
                if (defLines):
                  defLines = f"{defLines}\n"
                defLines = f"{defLines}{line_number}: {line.strip()}"
          # if we found anything create the file entry in the dict with all lines
          if ( defLines ):
            returnDict[file_path] = defLines
        except (UnicodeDecodeError, PermissionError):
          # Skip files that can't be read due to encoding or permission issues
          # just create the exception code block, but we won't be using it
          pass

#### END main logic funcs

#### START main processing flow

# ToDo list from bash logstring: (delete when completed)
# LOGSTRING="$LOGSTRING|SERVICE=$SERVICE"
# LOGSTRING="$LOGSTRING|PY=$PY"
# LOGSTRING="$LOGSTRING|PYVERS=$PYVERSION"
# LOGSTRING="$LOGSTRING|PYCOUNT=$PYCOUNT"
# LOGSTRING="$LOGSTRING|PYREQ=$PYREQ"
# LOGSTRING="$LOGSTRING|PYALA=$PYALA"

logger.info("args were "+str(parser.parse_args()))
# log anything we've determined above 
logger.info(f"OS family determined as {osrID}")
logger.info(f"OS Major Version={osMaj}")
logger.info(f"OS Minor Version={osMin}")
osOld = False
osFamOK = True
if ( osrID == "fedora" ):
  if ( osMaj < 8 ):
    osOld = True
elif ( osrID == "suse" ):
  if ( osMaj < 15 ):
    osOld = True
elif ( osrID == "debian" ):
  if ( osMaj < 20 ):
    osOld = True
elif ( osrID == "azurelinux" ):
  if ( osMaj < 3 ):
    osOld = True
else:
  osFamOK = False

if ( osOld ):
    logger.warning(f"OS family detected as {osrID} with major version of {osMaj} - this OS is too old too be reliably tested")
    findings['osSup']={'description': 'OS is Old', 'status': f"OS Family:{osrID} with Major Release:{osMaj} is too old to be reliably tested"}
if ( not osFamOK):
    logger.warning(f"Unsupported OS family detected:{osrID}")
    findings['osSup']={'description': 'OS family is minimally or completely untested', 'status': f"OS Family:{osrID}"}


# We'll use the 'bash' arguments from the bash wrapper to seed this script
waaServiceIn=bashArgs.get('SERVICE', "waagent.service") # this may differ per-distro, but offer a default
pythonIn=bashArgs.get('PY', "/usr/bin/python3")
waaBin=subprocess.check_output("which waagent", shell=True, stderr=subprocess.DEVNULL).decode().strip()
logger.info(f"using waagent location {waaBin}")


# look through the os.environ object for any mention of a variable with 'proxy' in the name
osEnv=dict(os.environ)
proxyVars = {key: osEnv[key] for key in osEnv if "proxy" in key.lower()}
# create a check and if needed a finding
if proxyVars:
  logger.info(f"proxy definition found in env: {proxyVars}")
  findings['proxy']={'description': 'ProxyCheck', 'status': f"Found proxy environment vars:\n{proxyVars}"}
  checks['proxy']={"check":"proxy", "value":proxyVars}
else:
  logger.info(f"No proxies found in env")
  checks['proxy']={"check":"proxy", "value":"None Found"}

# Check services and binaries
checkService(waaServiceIn, package=True)
# Check SSHD, Debian based distros started naming it ssh and launching on connect, sometime before Ubuntu 24.04
# TODO: make this version dependent - ubuntu 24.04+ uses JIT activation of sshd
if ( osrID == "debian" ):
  checkService("ssh.service", package=True)
else:
  checkService("sshd.service", package=True)

validateBin(pythonIn)
# PoC for right now to show what we can do, also because changing SSL can cause problems for extensions talking outside wire/IMDS
validateBin("/usr/bin/openssl")
validateBin(waaBin) # just to create another easy-to-check test
# Lets pull the version out of the 'normal' --version output string, for manual comparisons
waaVerOut=subprocess.check_output(f"{waaBin} --version", shell=True, stderr=subprocess.DEVNULL).decode().strip().lower().split('\n')
# if the output changes format we'll have to recode this block
# expected output:
#['walinuxagent-2.7.0.6 running on redhat 8.10',
# 'python: 3.6.8',
# 'goal state agent: 2.7.0.6']
waaVer = "0.0.0.0"
waaGoalVer = "0.0.0.0"
for line in waaVerOut:
  # process the version out of string #1 or #3 above - with an optional 4th v.v.v.v section since some versions only have 3
  verSearch = re.search(r'\d+\.\d+\.\d+(\.\d+)?', line)
  if ( verSearch ):
    if "walinuxagent" in line:
      waaVer = verSearch.group(0)
    elif "goal" in line:
      waaGoalVer = verSearch.group(0)
# log the check
checks["waaVersion"]={
    'description': 'Agent component versions',
    'check': 'waaVersion',
    'value': f"WAA:{waaVer}, Goal:{waaGoalVer}",
    'type': 'config'
    }
logger.info(f"Found agent:{waaVer} and extension handler: {waaGoalVer}")
# if the versions match, it's a 'finding' - these will only match if autoUpg is false or the package is VERY new so likely from source
if waaVer == waaGoalVer:
  logger.info(f"PA and Goal match version {waaVer} - this is probably bad!")
  findings['waaVers']={'description': 'Agent/Goal versions', 'status': f"Agent version and goal state match = {waaVer} - this is unlikely"}

## turn service/bins checks into 'checks' and 'findings'
### Binaries
#### string for the console report
binReportString=""
for binName in bins:
  checks[bins[binName]['exe']] = {'check': bins[binName]['exe'],
                                  'description': f"Binary check of {bins[binName]['exe']}",
                                  'value': f"Package:{bins[binName]['pkg']}, source:{bins[binName]['repo']}"
                                  }
  # check for alarms in the binaries and create findings as needed
  # - is the path include questionable areas - local, home, opt - these aren't "normal"
  if ( re.search("local", bins[binName]['exe']) or 
       re.search("opt", bins[binName]['exe']) or
       re.search("home", bins[binName]['exe'])):
    # this is bad, create a findings from this check
    findings[f"bp:{bins[binName]['exe']}"]={
      'description': f"binpath:{bins[binName]['exe']}",
      'status': "Path includes questionable directories",
      'type': "bin"
    }
    logger.warning(f"Checking path: {bins[binName]['exe']} found in a non-standard location")
    binReportString+=f"{cYellow(bins[binName]['exe'])} => check location\n"
  # - is the repository uncommon
  repoBad=False
  if osrID == "debian":
    # check if the repository is expected, this should usually say "Origin: Ubuntu"
    # We are blissfully ignoring *actual* Debian - which itself would be a cause for concern
    if ( not re.search(r"Origin: Ubuntu", bins[binName]['repo'])):
      repoBad=True
  elif ( osrID == "fedora" or osrID == "azurelinux" ) : 
    # Check if the 'repo' field includes the error indicator 'fail', or check if the repository name 
    #   is either @System or anaconda (initial install for RHEL or AL), or includes 'rhui' or 'azurelinux',
    #   or appstream - which is ok-ish
    if ( re.search("fail", bins[binName]['repo']) 
         or not (re.search(r"@System", bins[binName]['repo']) or
                 re.search("anaconda", bins[binName]['repo']) or
                 re.search("rhui", bins[binName]['repo']) or
                 re.search("AppStream", bins[binName]['repo']) or
                 re.search("azurelinux", bins[binName]['repo'])
             )):
      repoBad=True
  elif osrID == "suse":
    # check if the repository includes 'SLE-Module' or 'SUSE'
    if ( not re.search(r"SLE-Module", bins[binName]['repo'])):
      repoBad=True
  # all distro-specific checks finished, report if needed
  if ( repoBad ):
    findings[f"bs:{bins[binName]['exe']}"]={
      'description': f"binsource:{bins[binName]['exe']}",
      'status': f"Binary came from unusual source: {bins[binName]['repo']}",
      'type': "bin"
    }
    logger.warning(f"Checking {bins[binName]['exe']} found to be sourced from the repo {bins[binName]['repo']}")
    binReportString+=f"{bins[binName]['exe']} => {cRed(bins[binName]['repo'])} - verify repository\n"
if (len(binReportString) == 0 ):
  binReportString=cGreen("-- No issues with checked binaries")
  logger.info("No concerns found with binary checks")
### Services/Units
svcReportString=""
for svcName in services:
  if ( not re.search("running", services[svcName]['status']) ):
    findings[f"ss:{services[svcName]['svc']}"]={
      'description': f"service:{services[svcName]['svc']}",
      'status': f"Service not in 'running' state: {services[svcName]['status']}",
      'type': "svc"
    }
    logger.warning(f"Checking {services[svcName]['svc']} found in state {services[svcName]['status']}")
    svcReportString+=f"{services[svcName]['svc']} => {cRed(services[svcName]['status'])} - check logs\n"
  if ( not re.search("enabled", services[svcName]['config']) ):
    findings[f"sc:{services[svcName]['svc']}"]={
      'description': f"service:{services[svcName]['svc']}",
      'status': f"Service not enabled: {services[svcName]['config']}",
      'type': "svc"
    }
    logger.warning(f"Checking {services[svcName]['svc']} not enabled: {services[svcName]['config']}")
    svcReportString+=f"{services[svcName]['svc']} => {cRed(services[svcName]['config'])} - check config\n"
if (len(svcReportString) == 0 ):
  svcReportString=cGreen("-- No issues with checked services")
  logger.info("No concerns found with service checks")

## Early version report code
  # print(f"Analysis of unit : {services[svcName]['svc']}:")
  # print(f"  Owning pkg     : {services[svcName]['pkg']}" )
  # print(f"  Repo for pkg   : {services[svcName]['repo']}" )
  # print( "  run state      : "+colorString(services[svcName]['status'], redVal="dead", greenVal="active"))
  # print( "  config state   : "+colorString(services[svcName]['config'], redVal="disabled", greenVal="enabled"))

# Connectivity checks
## Wire server
wireCheck=checkHTTPURL(f"http://{wireIP}/?comp=versions")
thisCheck={"check":"wire 80", "value":wireCheck}
checks['wire']=thisCheck
if wireCheck != 200:
  findings['wire80']={
    'description': 'WireServer:80',
    'status': wireCheck,
    'type': "conn"
  }
  logger.warning(f"Wire server port 80 check returned {wireCheck} - check connectivity")
else:
  logger.info(f"Wire server port 80 check returned OK({wireCheck})")
# temp variable clean up, this shouldn't remove the item  in the 'checks' dict, just the temp object
del(thisCheck)
## Wire server "extension" port
wireExt=isOpen(wireIP,32526)
thisCheck={"check":"wire 23526", "value":wireExt}
checks['wireExt']=thisCheck
if not wireExt :
  findings['wire23526']={
    'description': 'WireServer:32526',
    'status': wireExt,
    'type': "conn"
  }
  logger.warning(f"Wire server extension port (32526) test returned {wireExt} - check connectivity")
else:
  logger.info(f"Wire server extension port (32526) returned OK({wireExt})")
# temp variable clean up, this shouldn't remove the item  in the 'checks' dict, just the temp object
del(thisCheck)

## IMDS
imdsCheck=checkHTTPURL(f"http://{imdsIP}/metadata/instance?api-version=2021-02-01")
thisCheck={"check":"imds 443", "value":imdsCheck}
checks['imds']=thisCheck
if imdsCheck != 200:
  findings['imds']={
    'description': 'IMDS',
    'status': imdsCheck,
    'type': "conn"
  }
  logger.warning(f"IMDS port 80 check returned {imdsCheck} - check connectivity")
else:
  logger.info(f"IMDS port 80 check returned OK({imdsCheck})")
# temp variable clean up, this shouldn't remove the item  in the 'checks' dict, just the temp object
del(thisCheck)

# Secondary test for ext. handler version/auto upgrade
# if the wire port state is 200(OK), query the wireserver for the latest goalstate (ext. handler) and check against the current goal state 
if wireCheck == 200:
  try:
    # We only use these modules in here - so far
    import xml.etree.ElementTree as ET
    from urllib.parse import urlparse
    # get the best API version *from the wire server
    endpoint="/?comp=versions"
    conn = http.client.HTTPConnection(wireIP)
    headers = {
      "Accept": "application/xml",  # Requesting XML response
      "User-Agent": "VM assist"  # Optional, helps identify the client
    }
    conn.request("GET", endpoint, headers=headers)
    response = conn.getresponse()
    xmlResp=response.read()
    apiVers=ET.fromstring(xmlResp).find("./Preferred/Version").text
    
    # Find the URLs for the different bits of the goal state
    endpoint="/machine/?comp=goalstate"
    headers = {
      "x-ms-version": apiVers,
      "Accept": "application/xml",  # Requesting XML response
      "User-Agent": "PythonTestHarness"  # Optional, helps identify the client
    }
    conn.request("GET", endpoint, headers=headers)
    response = conn.getresponse()
    xmlResp=response.read().decode()
    extConfURL=ET.fromstring(xmlResp).find("./Container/RoleInstanceList/RoleInstance/Configuration/ExtensionsConfig").text
    
    parsedURL=urlparse(extConfURL)
    endpoint = parsedURL.path + "?" + parsedURL.query
    conn.request("GET", endpoint, headers=headers)
    response = conn.getresponse()
    xmlResp=response.read().decode()
    wireGSVersion=ET.fromstring(xmlResp).find("./GuestAgentExtension/GAFamilies/GAFamily/Version").text
    
    if wireGSVersion != waaGoalVer:
      findings['waaUpgStat']={'status': f"not up to date - Local:{waaGoalVer} Wire:{wireGSVersion}", 'description':"GoalState version mismatch to wireserver"}
  except:
    findings['waaUpgStat']={'status': "failed during testing", 'description':f"GoalState version on VM ({waaGoalVer}) does not match wire server({wireGSVersion})"}
  finally:
    checks['waaUpgStat']={"check":"GoalVersion", "description":"Checking Goal State version against wire server", "value":wireGSVersion}
else:
  # flag that we skipped wireserver capability checks due to failing connectivity checks
  findings['waaUpgStat']={'status': "skipped", 'description':"Did not check GoalState version on wire server"}
  
  
# OS/config checks
## Agent config
waaConfigOut=subprocess.check_output(f"{waaBin} --show-configuration", shell=True, stderr=subprocess.DEVNULL).decode().strip().split('\n')
waaConfig={}
# put all output from the config command into a KVP
for line in waaConfigOut:
  key, value = line.split('=', 1)
  waaConfig[key.strip()] = value.strip()
checks['waaExt']={"check":"WAA Extension", "value":waaConfig['Extensions.Enabled']}
if ( checks['waaExt']['value'] != 'True' ):
  findings['waaExt']={'status': checks['waaExt']['value'], 'description':"Extensions are disabled in WAA config"}
  logger.warning(f"Extensions potentially disabled: {checks['waaExt']['value']}")
else:
  logger.info(f"Extensions enabled in waagent config {checks['waaExt']['value']}")
checks['waaUpg']={"check":"WAA AutoUpgrade", "value":waaConfig['AutoUpdate.Enabled']}
if ( checks['waaUpg']['value'] != 'True' ):
  findings['waaUpg']={'status': checks['waaUpg']['value'], 'description':"Agent extension handler auto-upgrade is disabled in WAA config"}
  logger.warning(f"Agent(ext handler) auto-update possibly disabled: {checks['waaUpg']['value']}")
else:
  logger.info(f"Agent(ext handler) auto-update enabled in waagent config {checks['waaUpg']['value']}")

# Checks against disks and objects
## results of disk space checks
### seed checks with a 'no problems' message, we'll reset it when we find one
checks['fullFS']={"check":"fullFS", "description": f"filesystem util over {fullPercent}%", "none":f"No filesystems over {fullPercent}% util"}
diskFind=""
## find the device 'id' for checking if the extension directory is 'noexec'
vlwaDev=os.stat("/var/lib/waagent").st_dev

mounts=[]

# only check these filesystem types ext4,xfs,vfat,btrfs,ext3
findmnt=subprocess.check_output("findmnt --evaluate -nb -o TARGET,SOURCE,FSTYPE,OPTIONS,USE% --pairs -t=ext2,ext3,ext4,btrfs,xfs,vfat", shell=True, stderr=subprocess.DEVNULL).decode().strip().split("\n")

for fm in findmnt:
  pairs = fm.split()
  dictTemp={}
  for pair in pairs:
    key, value = pair.split('=',1)
    dictTemp[key] = value.strip('"%')
  mounts.append(dictTemp)

# this was initially done in psutils:
#  mounts = psutil.disk_partitions()
#  but was found that certain distros do not include psutils in their marketplace images, so re-wrote with generic python code
for m in mounts:
  logger.info(f"Checking {m['SOURCE']} mounted at {m['TARGET']}")
  # the following hack brought to you by SLES, where USE% is instead USE_PCT
  pcent=0
  if ( 'USE%' in m ):
    pcent = m['USE%']
  elif ( 'USE_PCT' in m ):
    pcent = m['USE_PCT']

  if int(pcent) >= fullPercent:
    logger.warning(f"Filesystem utilization for {m['TARGET']} is over {fullPercent}: {pcent}")
    # delete the 'default empty set' wording in 'checks' for fullFS, because we found a disk over the util threshold
    if 'none' in checks["fullFS"]:
      checks['fullFS']={'check': 'fullFS', 'description':f'Look for filesystems utilized more than {fullPercent}','value':'see findings for details'}
      findings['fullFS']={}
    # Add each full filesystem to the list
    if 'status' in findings['fullFS']:
      findings['fullFS']['status'] = f"{findings['fullFS']['status']}, {m['TARGET']}:{pcent}"
    else:
      findings['fullFS']={'description': f"Filesystems over{fullPercent}",
                           'status': f"{m['TARGET']}:{pcent}",
                           'type':'os'
      }
  # check if this mount (m) is the one holding /var/lib/waagent, if so we will  want to check to see if the mount options include 'noexec'
  if ( os.stat(m['TARGET']).st_dev == vlwaDev ):
    logger.info(f"Found /var/lib/waagent based in filesystem {m['TARGET']} on device {m['SOURCE']}, checking mount options")
    # create the 'checks' data describing this
    checks['noexec']={
      'description': f"Checking mount options for noexec on {m['SOURCE']}",
      'check': 'noexec',
      'value': m['TARGET']
    }
    # add the 'findings' data if it's bad
    if (re.search("noexec", m['OPTIONS'])):
      # Found noexec so flag it
      logger.error(f"mountpoint {m['TARGET']} mounted with 'noexec'")
      findings['noexec']={
        'description':"Found /var/lib/waagent with noexec bit set",
        'status':True
      }

## Networking
# Get a list of all the interfaces and addresses
ints=(getInterfaces())
# Since there's no reliable way to check whether eth0 is static or dhcp, look through the
#   normal networking directories for the eth0 IP address. 
#   If we've found any files holding the IP currently on eth0, that's a problem

### Checks for defined MAC addresses or IPs - pertinent if someone hard coded configs
# set dummy addresses for the search
eth0MAC="de:ad:be:ef:4a:11"
eth0IP="128.0.128.255"
# If eth0 was found, store the MAC for checking for defined MAC addresses in files
if ( 'eth0' in ints ):
  eth0MAC = ints['eth0']['mac']
  eth0IP = ints['eth0']['ip']
  logger.info(f"Found {eth0MAC} on eth0, using this for config checks")
else:
  # if there is no eth0 defined, we're probably going to have some large issues with checks and possibly in 
  #   the system state, so be sure to log it.  Also create a 'finding'
  logger.error(f"Could not find a definition for eth0 - is networking sound?")
  findings['noETH0']={
    'description':"Could not locate an active eth0",
    'status': f"eth0 - MAC:{eth0MAC}|IP{eth0IP}",
    'type': 'os'
  }
filesWithIP={}
# we could check all of /etc, but that can be a lot and catch unrelated service configs, so look in the 
#   usual network dirs
searchDirForString("/etc/sysconfig", filesWithIP, eth0IP)
searchDirForString("/etc/netplan", filesWithIP, eth0IP)
if ( filesWithIP ):
  checks['IPs']={"check":"Static IP addresses", "value":"IP found in files- see findings"}
  fileString=""
  for foundFile in filesWithIP: 
    # if the "second time through" add a ", " seperator
    if (fileString):
      fileString=f"{fileString}, "
    fileString=f"{fileString}{foundFile}"
  # create the 'findings' entry
  findings['staticIP']={'description': 'eth0 IP found in files', 'status': fileString}
  logger.warning(f"Found eth0 IP:{eth0IP} defined in a config file, could be static - check findings report")
else:
  checks['IPs']={"check":"Static IP addresses", "value":"No IP addresses found in configs"}
  logger.info(f"Did not find IP configured on eth0 listed in any config files")

filesWithMACs={}
# We can't search for /etc because certain SSL configs have "MAC looking strings", so check these two 
#   directories, which should cover all common distros
searchDirForMAC("/etc/sysconfig", filesWithMACs,eth0MAC)
searchDirForMAC("/etc/netplan", filesWithMACs,eth0MAC)
if ( filesWithMACs ):
  checks['MACs']={"check":"MAC addresses", "value":"MACs found - see findings"}
  fileString=""
  for foundFile in filesWithMACs: 
    # if the "second time through" add a ", " seperator
    if (fileString):
      fileString=f"{fileString}, "
    fileString=f"{fileString}{foundFile}=>{filesWithMACs[foundFile][0]}"
  # create the 'findings' entry
  findings['badMAC']={'description': 'MACs found', 'status': fileString}
  logger.warning(f"Found eth0 MAC:{eth0MAC} defined in a config file - check findings report")
else:
  checks['MACs']={"check":"MAC addresses", "value":"No MAC addresses found in configs"}
  logger.info(f"Did not find MAC on eth0 listed in any config files")

# END ALL CHECKS

# START OUTPUT
print("------ vmassist.py results ------")
print("Please see https://aka.ms/vmassistlinux for guidance on the information in the above output")
print(f"OS family        : {osrID}")
# things we will always report on:
## WAA service
### => services[waaServiceIn]
#rint(f"OS family        : {osrID}")
print(f"Agent service    : {services[waaServiceIn]['svc']}")
print(f"=> status        : {colorString(services[waaServiceIn]['status'])}")
print(f"=> config state  : {colorString(services[waaServiceIn]['config'], redVal='disabled', greenVal='enabled')}")
print(f"=> source pkg    : {services[waaServiceIn]['pkg']}")
print(f"=> repository    : {services[waaServiceIn]['repo']}")
print(f"Agent version from running {waaBin} --version")
print(f"=> Main version  : {waaVer}")
print(f"=> Goal state    : {colorString(waaGoalVer,redVal=waaVer,greenVal=wireGSVersion)}")

#checkService(waaServiceIn, package=True)
# => {'walinuxagent.service': {'svc': 'walinuxagent.service', 'status': 'active(running)', 'config': 'enabled', 'path': '/usr/lib/systemd/system/walinuxagent.service', 'pkg': 'walinuxagent', 'repo': 'Origin: Ubuntu'}}
print(f"Wire Server")
print(f"  port 80        : {colorString(checks['wire']['value'], redVal='404', yellowVal='timeout', greenVal='200')}")
print(f"  port 32526     : {colorString(checks['wireExt']['value'], redVal='false', greenVal='true')}")
print(f"IMDS             : {colorString(checks['imds']['value'], redVal='404', yellowVal='timeout', greenVal='200')}")

# Always print out something about disk, use the default 'no problems' object, otherwise show what we found
if 'none' in checks["fullFS"]:
  print(f"Disk util > {fullPercent}%  : {checks['fullFS']['none']}")
else:
  print(f"Disk util > {fullPercent}%  : {findings['fullFS']['status']}")

# TODO: clean up and verify color on all core checks - wire server, waagent status
# TODO: optionally output all 'checks' objects
# Output the pre-determined binary findings
print("- Binary check results:")
print(binReportString)
print("- Service check results:")
print(svcReportString)
# TODO: parse findings list
print("- Findings from all checks:")
if ( findings ):
  print(cYellow("-- All Findings (may duplicate Service and Binary checks) ---"))
  for find in findings:
    print(f"--- {findings[find]['description']} : {findings[find]['status']}")
  print(cYellow("-- END Findings ---"))
else:
  print(cGreen("-- No notable findings!"))

# TODO: add the core checks not covered in findings, bins, and services, to the logs
### Log the raw data - don't send to the console
logger.info("--- verbose output of data structures ---")
logger.info("----- Binary check data structure:")
logger.info(str(bins))
logger.info("----- Service checks data structure:")
logger.info(str(services))
logger.info("----- All \"checks\" data structure:")
logger.info(str(checks))
logger.info("----- All \"findings\" data structure:")
logger.info(str(findings))
logger.info("--- END data structures ---")

# # DEBUG
# # semi-debug, looks good for now until we get the checks and findings presentation built up
if ( args.verbose > 0 ):
  print("--- Verbose binary check output")
  for binName in bins:
    print(f"Analysis of      : {bins[binName]['exe']}:")
    print(f"  Owning pkg     : {bins[binName]['pkg']}" )
    print(f"  Repo for pkg   : {bins[binName]['repo']}" )
  print("--- Verbose service check output")
  for svcName in services:
    print(f"Analysis of unit : {services[svcName]['svc']}:")
    print(f"  Owning pkg     : {services[svcName]['pkg']}" )
    print(f"  Repo for pkg   : {services[svcName]['repo']}" )
    print( "  run state      : "+colorString(services[svcName]['status'], redVal="dead", greenVal="active"))
    print( "  config state   : "+colorString(services[svcName]['config'], redVal="disabled", greenVal="enabled"))
  print("--- END Verbose output")
# END DEBUG

print("------ END vmassist.py output ------")
logger.info("Python ended")
#if ( args.debug ):
# print("------------ DATA STRUCTURE DUMP ------------")
# # For development testing, These are the last pprint calls
# from pprint import pprint
# print("bins")
# pprint(bins)
# print("services")
# pprint(services)
# print("findings")
# pprint(findings)
# print("checks")
# pprint(checks)
# print("args")
# pprint(args)
# print("---------- END DATA STRUCTURE DUMP ----------")