internal/redirects/matching.go (87 lines of code) (raw):
package redirects
import (
"fmt"
"net/url"
"regexp"
"strings"
netlifyRedirects "github.com/tj/go-redirects"
"gitlab.com/gitlab-org/labkit/log"
"gitlab.com/gitlab-org/gitlab-pages/internal/feature"
"gitlab.com/gitlab-org/gitlab-pages/internal/utils"
)
var (
regexMultipleSlashes = regexp.MustCompile(`([^:])//+`)
regexPlaceholderOrSplats = regexp.MustCompile(`(?i)\*|:[a-z]+`)
)
// matchesRule returns `true` if the rule's "from" pattern matches the requested URL.
// This internally calls matchesRuleWithPlaceholderOrSplats to match rules.
//
// For example, given a "from" URL like this:
//
// /a/*/url/with/:placeholders
//
// this function would match URLs like this:
//
// /a/nice/url/with/text
// /a/super/extra/nice/url/with/matches
//
// If the first return value is `true`, the second return value is the path that this
// rule should redirect/rewrite to. This path is effectively the rule's "to" path that
// has been templated with all the placeholders (if any) from the originally requested URL.
func matchesRule(rule *netlifyRedirects.Rule, originalURL *url.URL) (bool, string) {
hostMatches, fromPath := utils.MatchHost(originalURL, rule.From)
if !hostMatches {
return false, ""
}
path := originalURL.Path
// If the requested URL exactly matches this rule's "from" path,
// exit early and return the rule's "to" path to avoid building
// and compiling the regex below.
// However, only do this if there's nothing to template in the "to" path,
// to avoid redirect/rewriting to a originalURL with a literal `:placeholder` in it.
if normalizePath(fromPath) == normalizePath(path) && !regexPlaceholderOrSplats.MatchString(rule.To) {
return true, rule.To
}
return matchesRuleWithPlaceholderOrSplats(path, fromPath, rule.To, rule.Status)
}
// matchesRuleWithPlaceholderOrSplats returns `true` if the rule's "from" pattern matches the requested URL.
// This is specifically for Placeholders and Splats matching
//
// For example, given a "from" URL like this:
//
// /a/*/originalURL/with/:placeholders
//
// this function would match URLs like this:
//
// /a/nice/originalURL/with/text
// /a/super/extra/nice/originalURL/with/matches
//
// If the first return value is `true`, the second return value is the path that this
// rule should redirect/rewrite to. This path is effectively the rule's "to" path that
// has been templated with all the placeholders (if any) from the originally requested URL.
func matchesRuleWithPlaceholderOrSplats(requestPath string, fromPath string, toPath string, status int) (bool, string) {
// Any logic beyond this point handles placeholders and splats.
// If the FF_ENABLE_PLACEHOLDERS feature flag isn't enabled, exit now.
if !feature.RedirectsPlaceholders.Enabled() {
return false, ""
}
regexSegments := convertToRegexSegments(fromPath)
if len(regexSegments) == 0 {
return false, ""
}
fromRegexString := `(?i)^` + strings.Join(regexSegments, "") + `/*$`
fromRegex, err := regexp.Compile(fromRegexString)
if err != nil {
log.WithFields(log.Fields{
"fromRegexString": fromRegexString,
"rule.From": fromPath,
"rule.To": toPath,
"rule.Status": status,
"path": requestPath,
}).WithError(err).Warnf("matchesRule generated an invalid regex: %q", fromRegexString)
return false, ""
}
template := regexPlaceholderReplacement.ReplaceAllString(toPath, `${$placeholder}`)
subMatchIndex := fromRegex.FindStringSubmatchIndex(requestPath)
if subMatchIndex == nil {
return false, ""
}
var templatedToPath []byte
templatedToPath = fromRegex.ExpandString(templatedToPath, template, requestPath, subMatchIndex)
// Some replacements result in subsequent slashes. For example, a rule with a "to"
// like `foo/:splat/bar` will result in a path like `foo//bar` if the splat
// character matches nothing. To avoid this, replace all instances
// of multiple subsequent forward slashes with a single forward slash.
// The regex captures any character except a colon ([^:]) before the double slashes
// and includes it in the replacement.
templatedToPath = regexMultipleSlashes.ReplaceAll(templatedToPath, []byte("$1/"))
return true, string(templatedToPath)
}
// convertToRegexSegments converts the path string to an array of regex segments
// It replaces placeholders with named capture groups and splat characters with a wildcard regex
// This allows matching the path segments to the request path and extracting matched placeholder values
func convertToRegexSegments(path string) []string {
var regexSegments []string
for _, segment := range strings.Split(path, "/") {
if segment == "" {
continue
} else if regexSplat.MatchString(segment) {
regexSegments = append(regexSegments, `(/(?P<splat>.*))?/*`)
} else if regexPlaceholder.MatchString(segment) {
segmentName := strings.Replace(segment, ":", "", 1)
regexSegments = append(regexSegments, fmt.Sprintf(`/+(?P<%s>[^/]+)`, segmentName))
} else {
regexSegments = append(regexSegments, "/+"+regexp.QuoteMeta(segment))
}
}
return regexSegments
}
// `match` returns:
// 1. The first valid redirect or rewrite rule that matches the requested URL
// 2. The URL to redirect/rewrite to
//
// If no rule matches, this function returns `nil` and an empty string
func (r *Redirects) match(originalURL *url.URL) (*netlifyRedirects.Rule, string) {
for i := range r.rules {
if i >= cfg.MaxRuleCount {
// do not process any more rules
return nil, ""
}
// assign rule to a new var to prevent the following gosec error
// G601: Implicit memory aliasing in for loop
rule := r.rules[i]
if validateRule(rule) != nil {
continue
}
if isMatch, path := matchesRule(&rule, originalURL); isMatch {
return &rule, path
}
}
return nil, ""
}