in webindex/modules/core/src/main/java/webindex/core/models/URL.java [84:150]
public static URL from(String rawUrl, Function<String, String> domainFromHost,
Function<String, Boolean> isValidHost) {
if (rawUrl.contains(URI_SEP)) {
badUrl(false, "Skipping raw URL as it contains '" + URI_SEP + "':" + rawUrl);
}
String trimUrl = rawUrl.trim();
if (trimUrl.length() < 8) {
badUrl(false, "Raw URL is too short to start with valid protocol: " + rawUrl);
}
String urlNoProto = "";
boolean secure = false;
int port = 80;
if (trimUrl.substring(0, 7).equalsIgnoreCase(HTTP_PROTO)) {
urlNoProto = trimUrl.substring(7);
} else if (trimUrl.substring(0, 8).equalsIgnoreCase(HTTPS_PROTO)) {
urlNoProto = trimUrl.substring(8);
secure = true;
port = 443;
} else {
badUrl(false, "Raw URL does not start with valid protocol: " + rawUrl);
}
String hostPort;
String[] args = urlNoProto.split(URL_SEP_REGEX, 2);
String path;
String sep;
if (args.length == 2) {
hostPort = args[0].toLowerCase();
int sepIndex = args[0].length();
sep = urlNoProto.substring(sepIndex, sepIndex + 1);
path = sep + args[1];
} else {
hostPort = urlNoProto.toLowerCase();
path = "/";
}
args = hostPort.split(":", 2);
String host;
if (args.length == 2) {
host = args[0];
try {
port = Integer.parseInt(args[1]);
} catch (NumberFormatException e) {
badUrl(false, "Raw URL (" + rawUrl + ") has invalid port: " + args[1]);
}
} else {
host = hostPort;
}
if (host.isEmpty()) {
badUrl(false, "Raw URL cannot have empty host: " + rawUrl);
}
String domain = host;
boolean ipHost = isValidIP(host);
if (!ipHost) {
if (!isValidHost.apply(host)) {
badUrl(false, "Raw URL (" + rawUrl + ") has invalid host: " + host);
}
domain = domainFromHost.apply(host);
}
return new URL(domain, host, path, port, secure, ipHost);
}