gcpdiag/lint/dataproc/warn_2023_001_job_throttling_too_many.py [30:77]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LOG_FILTER = ['severity=WARNING', f'jsonPayload.message=~"{MATCH_STR}"']

logs_by_project = {}
clusters_by_project = []


def prepare_rule(context: models.Context):
  logs_by_project[context.project_id] = logs.query(
      project_id=context.project_id,
      resource_type='cloud_dataproc_cluster',
      log_name='log_id("google.dataproc.agent")',
      filter_str=' AND '.join(LOG_FILTER))


def run_rule(context: models.Context, report: lint.LintReportRuleInterface):
  project = crm.get_project(context.project_id)

  # skip entire rule is logging disabled
  if not apis.is_enabled(context.project_id, 'logging'):
    report.add_skipped(project, 'logging api is disabled')
    return

  clusters = dataproc.get_clusters(context)
  name_to_cluster = {cluster.name: cluster for cluster in clusters}

  if not clusters:
    report.add_skipped(project, 'no clusters found')
    return


  if logs_by_project.get(context.project_id) and \
    logs_by_project[context.project_id].entries:
    for log_entry in logs_by_project[context.project_id].entries:
      # Filter out non-relevant log entries.
      if log_entry['severity'] != 'WARNING' or \
         CLASS_NAME not in get_path(log_entry,
                     ('jsonPayload', 'class'), default='') or \
         MATCH_STR not in get_path(log_entry,
                     ('jsonPayload',  'message'), default=''):
        continue

      cluster_name = get_path(log_entry, ('resource', 'labels', 'cluster_name'),
                              default='')

      if cluster_name and cluster_name not in clusters_by_project:
        clusters_by_project.append(cluster_name)

  for cluster_name in clusters_by_project:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


gcpdiag/lint/dataproc/warn_2023_002_high_system_memory_usage.py [31:78]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LOG_FILTER = ['severity=WARNING', f'jsonPayload.message=~"{MATCH_STR}"']

logs_by_project = {}
clusters_by_project = []


def prepare_rule(context: models.Context):
  logs_by_project[context.project_id] = logs.query(
      project_id=context.project_id,
      resource_type='cloud_dataproc_cluster',
      log_name='log_id("google.dataproc.agent")',
      filter_str=' AND '.join(LOG_FILTER))


def run_rule(context: models.Context, report: lint.LintReportRuleInterface):
  project = crm.get_project(context.project_id)

  # skip entire rule is logging disabled
  if not apis.is_enabled(context.project_id, 'logging'):
    report.add_skipped(project, 'logging api is disabled')
    return

  clusters = dataproc.get_clusters(context)
  name_to_cluster = {cluster.name: cluster for cluster in clusters}

  if not clusters:
    report.add_skipped(project, 'no clusters found')
    return


  if logs_by_project.get(context.project_id) and \
    logs_by_project[context.project_id].entries:
    for log_entry in logs_by_project[context.project_id].entries:
      # Filter out non-relevant log entries.
      if log_entry['severity'] != 'WARNING' or \
         CLASS_NAME not in get_path(log_entry,
                     ('jsonPayload', 'class'), default='') or \
         MATCH_STR not in get_path(log_entry,
                     ('jsonPayload',  'message'), default=''):
        continue

      cluster_name = get_path(log_entry, ('resource', 'labels', 'cluster_name'),
                              default='')

      if cluster_name and cluster_name not in clusters_by_project:
        clusters_by_project.append(cluster_name)

  for cluster_name in clusters_by_project:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -