internal/ip/ip.go (243 lines of code) (raw):

// Copyright (c) 2016 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal // in the Software without restriction, including without limitation the rights // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell // copies of the Software, and to permit persons to whom the Software is // furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included in // all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN // THE SOFTWARE. package ip import ( "fmt" "net" "syscall" d "github.com/uber/arachne/defines" "github.com/uber/arachne/internal/log" "github.com/google/gopacket" "github.com/google/gopacket/layers" "github.com/pkg/errors" "go.uber.org/zap" "golang.org/x/net/bpf" ) // DSCPValue represents a QoS DSCP value. type DSCPValue uint8 // QoS DSCP values mapped to TOS. const ( DSCPBeLow DSCPValue = 0 // 000000 BE DSCPBeHigh DSCPValue = 4 // 000001 BE DSCPBulkLow DSCPValue = 40 // 001010 AF11 DSCPBulkHigh DSCPValue = 56 // 001110 AF13 DSCPTier2Low DSCPValue = 72 // 010010 AF21 DSCPTier2High DSCPValue = 88 // 010110 AF23 DSCPTier1Low DSCPValue = 104 // 011010 AF31 DSCPTier1High DSCPValue = 120 // 011110 AF33 DSCPTier0Low DSCPValue = 160 // 101000 EF DSCPNc6 DSCPValue = 192 // 110000 CS6 DSCPNc7 DSCPValue = 224 // 111000 CS7 ) // GetDSCP holds all the DSCP values in a slice. var GetDSCP = DSCPSlice{ DSCPBeLow, DSCPBeHigh, DSCPBulkLow, DSCPBulkHigh, DSCPTier2Low, DSCPTier2High, DSCPTier1Low, DSCPTier1High, DSCPTier0Low, DSCPNc6, DSCPNc7, } // DSCPSlice represents a slice of DSCP values. type DSCPSlice []DSCPValue // Pos returns the index of the DSCP value in the DSCPSlice, not the actual DSCP value. func (slice DSCPSlice) Pos(value DSCPValue, logger *log.Logger) uint8 { for p, v := range slice { if v == value { return uint8(p) } } logger.Warn("QoS DSCP value not matching one of supported classes", zap.Any("DSCP_value", value), zap.String("supported_classes", fmt.Sprintf("%v", slice))) return 0 } // Text provides the text description of the DSCPValue. func (q DSCPValue) Text(logger *log.Logger) string { switch q { case DSCPBeLow: return "BE low" case DSCPBeHigh: return "BE high" case DSCPBulkLow: return "AF11" case DSCPBulkHigh: return "AF113" case DSCPTier2Low: return "AF21" case DSCPTier2High: return "AF23" case DSCPTier1Low: return "AF31" case DSCPTier1High: return "AF33" case DSCPTier0Low: return "EF" case DSCPNc6: return "CS6" case DSCPNc7: return "CS7" default: logger.Error("unhandled QoS DSCP value", zap.Any("DSCP_value", q)) return "unknown" } } type recvSource struct { fd int } // Conn represents the underlying functionality to send and recv Arachne echo requests. type Conn struct { SrcAddr net.IP AF int sendFD int recvSrc recvSource ListenPort layers.TCPPort } // Recvfrom mirrors the syscall of the same name, operating on a recvSource file descriptor. func (r *recvSource) Recvfrom(b []byte) (int, syscall.Sockaddr, error) { return syscall.Recvfrom(r.fd, b, 0) } // Close is used to close a Conn's send file descriptor and recv source file desciptor. func (c *Conn) Close(logger *log.Logger) { if err := syscall.Close(c.recvSrc.fd); err != nil { logger.Error("error closing Conn recv file descriptor", zap.Error(err)) } if err := syscall.Close(c.sendFD); err != nil { logger.Error("error closing Conn send file descriptor", zap.Error(err)) } } // NextPacket gets bytes of next available packet, and returns them in a decoded gopacket.Packet func (c *Conn) NextPacket() (gopacket.Packet, error) { buf := make([]byte, d.MaxPacketSizeBytes) if _, _, err := c.recvSrc.Recvfrom(buf); err != nil { return nil, err } switch c.AF { case d.AfInet: return gopacket.NewPacket(buf, layers.LayerTypeIPv4, gopacket.DecodeOptions{Lazy: true}), nil case d.AfInet6: return gopacket.NewPacket(buf, layers.LayerTypeIPv6, gopacket.DecodeOptions{Lazy: true}), nil } return nil, errors.New("no valid decoder available for packet") } func ipToSockaddr(family int, ip net.IP, port int) (syscall.Sockaddr, error) { switch family { case syscall.AF_INET: if len(ip) == 0 { ip = net.IPv4zero } ip4 := ip.To4() if ip4 == nil { return nil, &net.AddrError{Err: "non-IPv4 address", Addr: ip.String()} } sa := &syscall.SockaddrInet4{Port: port} copy(sa.Addr[:], ip4) return sa, nil case syscall.AF_INET6: if len(ip) == 0 || ip.Equal(net.IPv4zero) { ip = net.IPv6zero } ip6 := ip.To16() if ip6 == nil { return nil, &net.AddrError{Err: "non-IPv6 address", Addr: ip.String()} } sa := &syscall.SockaddrInet6{Port: port} copy(sa.Addr[:], ip6) return sa, nil } return nil, &net.AddrError{Err: "unhandled AF family", Addr: ip.String()} } // SendTo operates on a Conn file descriptor and mirrors the Sendto syscall. func (c *Conn) SendTo(b []byte, to net.IP) error { sockAddr, err := ipToSockaddr(c.AF, to, 0) if err != nil { return err } return syscall.Sendto(c.sendFD, b, 0, sockAddr) } // getSendSocket will create a raw socket for sending data. func getSendSocket(af int) (int, error) { fd, err := syscall.Socket(af, syscall.SOCK_RAW, syscall.IPPROTO_RAW) if err != nil { return 0, err } if err = syscall.SetsockoptInt(fd, syscall.IPPROTO_IP, syscall.IP_HDRINCL, 1); err != nil { return 0, err } return fd, nil } func getBPFFilter(ipHeaderOffset uint32, listenPort uint32) ([]bpf.RawInstruction, error) { // The Arachne BPF Filter reads values starting from the TCP Header by adding ipHeaderOffset to all // offsets. It filters for packets of destination port equal to listenPort, or src port equal to HTTP or HTTPS ports // and for packets containing a TCP SYN flag (SYN, or SYN+ACK packets) return bpf.Assemble([]bpf.Instruction{ bpf.LoadAbsolute{Off: ipHeaderOffset + 2, Size: 2}, // Starting from TCP Header, load DstPort (2nd word) bpf.JumpIf{Cond: bpf.JumpEqual, Val: listenPort, SkipTrue: 3}, // Return packet if DstPort is listen Port bpf.LoadAbsolute{Off: ipHeaderOffset, Size: 2}, // Starting from TCP Header, load SrcPort (1st word) bpf.JumpIf{Cond: bpf.JumpEqual, Val: d.PortHTTP, SkipTrue: 1}, // Return packet if SrcPort is HTTP Port bpf.JumpIf{Cond: bpf.JumpEqual, Val: d.PortHTTPS, SkipFalse: 2}, // Discard packet if not HTTPS bpf.LoadAbsolute{Off: ipHeaderOffset + 13, Size: 1}, // Starting from TCP Header, load Flags byte (not including NS bit) bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 2, SkipTrue: 1}, // AND Flags byte with 00000010 (SYN), and drop packet if 0 bpf.RetConstant{Val: 0}, // Drop packet bpf.RetConstant{Val: 4096}, // Return up to 4096 bytes from packet }) } func getRecvSource(af int, listenPort layers.TCPPort, intf string, logger *log.Logger) (recvSource, error) { var ( rs recvSource ipHeaderOffset uint32 ) fd, err := syscall.Socket(af, syscall.SOCK_RAW, syscall.IPPROTO_TCP) if err != nil { return rs, err } if err = bindToDevice(fd, intf); err != nil { return rs, err } rs.fd = fd switch af { case d.AfInet: ipHeaderOffset = d.IPv4HeaderLength case d.AfInet6: ipHeaderOffset = d.IPv6HeaderLength } filter, err := getBPFFilter(ipHeaderOffset, uint32(listenPort)) if err != nil { logger.Warn("Failed to compile BPF Filter", zap.Error(err)) return rs, nil } // Attempt to attach the BPF filter. // This is currently only supported on Linux systems. if err := rs.attachBPF(filter); err != nil { logger.Warn("Failed to attach BPF filter to recvSource. All incoming packets will be processed", zap.Error(err)) } return rs, nil } // NewConn returns a raw socket connection to send and receive packets. func NewConn(af int, listenPort layers.TCPPort, intf string, srcAddr net.IP, logger *log.Logger) *Conn { fdSend, err := getSendSocket(af) if err != nil { logger.Fatal("Error creating send socket", zap.Int("address_family", af), zap.Error(err)) } rs, err := getRecvSource(af, listenPort, intf, logger) if err != nil { logger.Fatal("Error creating recv source", zap.Any("listenPort", listenPort), zap.String("interface", intf), zap.Error(err)) } return &Conn{ SrcAddr: srcAddr, AF: af, sendFD: fdSend, recvSrc: rs, ListenPort: listenPort, } } func getIPHeaderLayerV6(tos DSCPValue, tcpLen uint16, srcIP net.IP, dstIP net.IP) *layers.IPv6 { return &layers.IPv6{ Version: 6, // IP Version 6 TrafficClass: uint8(tos), Length: tcpLen, NextHeader: layers.IPProtocolTCP, SrcIP: srcIP, DstIP: dstIP, } } // GetIPHeaderLayer returns the appriately versioned gopacket IP layer func GetIPHeaderLayer(af int, tos DSCPValue, tcpLen uint16, srcIP net.IP, dstIP net.IP) (gopacket.NetworkLayer, error) { switch af { case d.AfInet: return getIPHeaderLayerV4(tos, tcpLen, srcIP, dstIP), nil case d.AfInet6: return getIPHeaderLayerV6(tos, tcpLen, srcIP, dstIP), nil } return nil, errors.New("unhandled AF family") }