def execute()

in gcpdiag/runbook/nat/generalized_steps.py [0:0]


  def execute(self):
    """Checking NATGW received_packets_dropped metric for elevated drops"""

    vm = gce.get_instance(project_id=op.get(flags.PROJECT_ID),
                          zone=op.get(flags.ZONE),
                          instance_name=op.get(flags.NAME))

    region = utils.region_from_zone(op.get(flags.ZONE))

    if op.get('nat_gateway_name'):
      gw_name = op.get('nat_gateway_name')
      received_packets_dropped = monitoring.query(
          op.get(flags.PROJECT_ID),
          'fetch nat_gateway::router.googleapis.com/nat/dropped_received_packets_count '
          f'| filter (resource.gateway_name == \'{gw_name}\' && resource.region == \'{region}\')'
          '| align rate(5m) | within 5m | group_by [],'
          '[value_dropped_received_packets_count_aggregate:'
          'aggregate(value.dropped_received_packets_count)]')

      if received_packets_dropped:
        values = received_packets_dropped.values()
        for value in values:
          if value.get('values')[0][0] >= 1:

            op.put('natgw_rcv_pkt_drops', True)

            op.add_uncertain(vm,
                             reason=op.prep_msg(
                                 op.UNCERTAIN_REASON,
                                 nat_gateway_name=op.get('nat_gateway_name'),
                                 metric_value=value.get('values')[0][0]),
                             remediation=op.prep_msg(op.UNCERTAIN_REMEDIATION))

            # Also check the for received packet drops at the vm level
            vm_received_packets_dropped_count = monitoring.query(
                op.get(flags.PROJECT_ID),
                'fetch gce_instance::compute.googleapis.com/nat/dropped_received_packets_count '
                f'| filter (resource.gateway_name == \'{gw_name}\' '
                f'&& resource.region == \'{region}\')'
                '| align rate(5m)'
                '| every 5m'
                '| group_by [resource.instance_id], '
                '[value_dropped_received_packets_count_aggregate: '
                'aggregate(value.dropped_received_packets_count)]')

            if vm_received_packets_dropped_count:
              vm_drop_list = []
              vm_values = vm_received_packets_dropped_count.values()
              for vm_value in vm_values:
                if vm_value.get('values')[0][0] >= 1 and len(vm_drop_list) <= 5:
                  vm_drop_list.append({
                      'instance_id':
                          vm_value.get('labels',
                                       {}).get('resource.instance_id'),
                      'rcv_pkt_drp_count':
                          vm_value.get('values')[0][0]
                  })

              if vm_drop_list:
                op.add_uncertain(
                    vm,
                    reason='Elevated received_packet_drop_count metric noticed'
                    f'for following VMs {str(vm_drop_list)}',
                    remediation=
                    """VMs could be dropping packets for various reasons; however,
                    the drops are not always indicative of an issue.
                    See more on troubleshooting cloud NAT and reducing the drops here [1] and [2]:
                    Open a case to GCP Support for justification for the packet drops.
                      [1] https://cloud.google.com/nat/docs/troubleshooting
                      [2] https://cloud.google.com/knowledge/kb
                      /reduce-received-packets-dropped-count-on-cloud-nat-000006744"""
                )
              else:
                op.add_ok(vm, reason=op.prep_msg(op.SUCCESS_REASON))
          else:
            op.add_ok(vm,
                      reason=op.prep_msg(
                          op.SUCCESS_REASON,
                          nat_gateway_name=op.get('nat_gateway_name')))
    else:
      op.add_uncertain(
          vm, 'Cloud not get dropped_received_packets_count'
          f"metric for NATGW {op.get('nat_gateway_name')}")