go/cmd/aggregate-crls/aggregate-crls.go (399 lines of code) (raw):
package main
import (
"bufio"
"context"
"crypto/sha256"
"encoding/hex"
"flag"
"fmt"
"io/ioutil"
"net/url"
"os"
"os/signal"
"path"
"path/filepath"
"regexp"
"strings"
"sync"
"syscall"
"time"
"github.com/golang/glog"
"github.com/google/certificate-transparency-go/x509"
"github.com/google/certificate-transparency-go/x509/pkix"
"github.com/mozilla/crlite/go"
"github.com/mozilla/crlite/go/config"
"github.com/mozilla/crlite/go/downloader"
"github.com/mozilla/crlite/go/engine"
"github.com/mozilla/crlite/go/rootprogram"
)
const (
permMode = 0644
permModeDir = 0755
)
var (
inccadb = flag.String("ccadb", "<path>", "input CCADB CSV path")
crlpath = flag.String("crlpath", "<path>", "root of folders of the form /<path>/<issuer> containing .crl files to be updated")
revokedpath = flag.String("revokedpath", "<path>", "output folder of revoked serial files of the form <issuer>")
enrolledpath = flag.String("enrolledpath", "<path>", "output JSON file of issuers with their enrollment status")
auditpath = flag.String("auditpath", "<path>", "output JSON audit report")
ctconfig = config.NewCTConfig()
illegalPath = regexp.MustCompile(`[^[:alnum:]\~\-\./]`)
allowableAgeOfLocalCRL, _ = time.ParseDuration("336h")
)
type AggregateEngine struct {
rootPath string
issuers *rootprogram.MozIssuers
auditor *CrlAuditor
}
func makeFilenameFromUrl(crlUrl url.URL) string {
filename := fmt.Sprintf("%s-%s", crlUrl.Hostname(), path.Base(crlUrl.Path))
filename = strings.ToLower(filename)
filename = illegalPath.ReplaceAllString(filename, "")
hash := sha256.Sum256([]byte(crlUrl.String()))
filename = strings.TrimSuffix(filename, ".crl")
filename = fmt.Sprintf("%s-%s.crl", filename, hex.EncodeToString(hash[:8]))
return filename
}
type CrlVerifier struct {
expectedIssuerCert *x509.Certificate
}
func (cv *CrlVerifier) IsValid(path string) error {
_, _, err := loadAndCheckSignatureOfCRL(path, cv.expectedIssuerCert)
return err
}
func (ae *AggregateEngine) crlFetchWorkerProcessOne(ctx context.Context, crlUrl url.URL, issuer types.Issuer) (string, error) {
err := os.MkdirAll(filepath.Join(*crlpath, issuer.ID()), permModeDir)
if err != nil {
glog.Warningf("Couldn't make directory: %s", err)
return "", err
}
filename := makeFilenameFromUrl(crlUrl)
finalPath := filepath.Join(*crlpath, issuer.ID(), filename)
cert, err := ae.issuers.GetCertificateForIssuer(issuer)
if err != nil {
glog.Fatalf("[%s] Could not find certificate for issuer: %s", issuer.ID(), err)
}
verifyFunc := &CrlVerifier{
expectedIssuerCert: cert,
}
fileOnDiskIsAcceptable, dlErr := downloader.DownloadAndVerifyFileSync(ctx, verifyFunc, ae.auditor,
&issuer, crlUrl, finalPath, 3, 300*time.Second)
if !fileOnDiskIsAcceptable {
glog.Errorf("[%s] Could not download, and no local file, will not be populating the "+
"revocations: %s", crlUrl.String(), dlErr)
return "", dlErr
}
if dlErr != nil {
glog.Errorf("[%s] Problem downloading: %s", crlUrl.String(), dlErr)
}
// Ensure the final path is acceptable
localSize, localDate, err := downloader.GetSizeAndDateOfFile(finalPath)
if err != nil {
glog.Errorf("[%s] Unexpected error on local file, will not be populating the "+
"revocations: %s", crlUrl.String(), err)
return "", err
}
age := time.Now().Sub(localDate)
if age > allowableAgeOfLocalCRL {
ae.auditor.Old(&issuer, &crlUrl, age)
glog.Warningf("[%s] CRL appears not very fresh, but proceeding with expiration check. Age: %s", crlUrl.String(), age)
}
glog.Infof("[%s] Updated CRL %s (path=%s) (sz=%d) (age=%s)", issuer.ID(), crlUrl.String(),
finalPath, localSize, age)
return finalPath, nil
}
func (ae *AggregateEngine) crlFetchWorker(ctx context.Context, wg *sync.WaitGroup,
crlsChan <-chan types.IssuerCrlUrls, resultChan chan<- types.IssuerCrlUrlPaths) {
defer wg.Done()
for tuple := range crlsChan {
urlPaths := make([]types.UrlPath, 0)
for _, crlUrl := range tuple.Urls {
select {
case <-ctx.Done():
return
default:
}
path, err := ae.crlFetchWorkerProcessOne(ctx, crlUrl, tuple.Issuer)
if err != nil {
glog.Warningf("[%s] CRL %s path=%s had error=%s", tuple.Issuer.ID(), crlUrl.String(), path, err)
}
// the path here might be blank if err is set
urlPaths = append(urlPaths, types.UrlPath{Path: path, Url: crlUrl})
}
subj, err := ae.issuers.GetSubjectForIssuer(tuple.Issuer)
if err != nil {
glog.Error(err)
}
resultChan <- types.IssuerCrlUrlPaths{
Issuer: tuple.Issuer,
IssuerDN: subj,
CrlUrlPaths: urlPaths,
}
}
}
func loadAndCheckSignatureOfCRL(aPath string, aIssuerCert *x509.Certificate) (*pkix.CertificateList, []byte, error) {
crlBytes, err := ioutil.ReadFile(aPath)
if err != nil {
return nil, []byte{}, fmt.Errorf("Error reading CRL, will not process revocations: %s", err)
}
crl, err := x509.ParseCRL(crlBytes)
if err != nil {
return nil, []byte{}, fmt.Errorf("Error parsing, will not process revocations: %s", err)
}
if err = aIssuerCert.CheckCRLSignature(crl); err != nil {
return nil, []byte{}, fmt.Errorf("Invalid signature on CRL, will not process revocations: %s", err)
}
if crl.HasExpired(time.Now()) {
glog.Warningf("[%s] CRL is expired, but proceeding anyway. (ThisUpdate=%s,"+
" NextUpdate=%s)", aPath, crl.TBSCertList.ThisUpdate, crl.TBSCertList.NextUpdate)
}
shasum := sha256.Sum256(crlBytes)
return crl, shasum[:], err
}
func (ae *AggregateEngine) verifyCRL(aIssuer types.Issuer, dlTracer *downloader.DownloadTracer, crlUrl *url.URL, aPath string, aIssuerCert *x509.Certificate, aPreviousPath string) (*pkix.CertificateList, error) {
glog.V(1).Infof("[%s] Verifying CRL from URL %s", aPath, crlUrl)
crl, _, err := loadAndCheckSignatureOfCRL(aPath, aIssuerCert)
if err != nil {
ae.auditor.FailedVerifyUrl(&aIssuer, crlUrl, dlTracer, err)
return nil, err
}
if _, err = os.Stat(aPreviousPath); err == nil {
previousCrl, _, err := loadAndCheckSignatureOfCRL(aPreviousPath, aIssuerCert)
if err != nil {
ae.auditor.FailedVerifyPath(&aIssuer, crlUrl, aPreviousPath, err)
return nil, err
}
if previousCrl.TBSCertList.ThisUpdate.After(crl.TBSCertList.ThisUpdate) {
ae.auditor.FailedOlderThanPrevious(&aIssuer, crlUrl, dlTracer, previousCrl.TBSCertList.ThisUpdate, crl.TBSCertList.ThisUpdate)
return previousCrl, fmt.Errorf("[%s] CRL is older than the previous CRL (previous=%s, this=%s)",
aPath, previousCrl.TBSCertList.ThisUpdate, crl.TBSCertList.ThisUpdate)
}
}
if crl.HasExpired(time.Now()) {
ae.auditor.Expired(&aIssuer, crlUrl, crl.TBSCertList.NextUpdate)
glog.Warningf("[%s] CRL is expired, but proceeding anyway. (ThisUpdate=%s,"+
" NextUpdate=%s)", aPath, crl.TBSCertList.ThisUpdate, crl.TBSCertList.NextUpdate)
}
return crl, nil
}
func processCRL(aCRL *pkix.CertificateList) ([]types.SerialAndReason, error) {
revokedList, err := types.DecodeRawTBSCertList(aCRL.TBSCertList.Raw)
if err != nil {
return []types.SerialAndReason{}, fmt.Errorf("CRL list couldn't be decoded: %s", err)
}
serials := make([]types.SerialAndReason, 0, 1024*16)
for _, ent := range revokedList.RevokedCertificates {
serial, err := ent.SerialAndReason()
if err != nil {
return []types.SerialAndReason{}, fmt.Errorf("CRL list couldn't be decoded: %s", err)
}
serials = append(serials, serial)
}
return serials, nil
}
func (ae *AggregateEngine) aggregateCRLWorker(ctx context.Context, wg *sync.WaitGroup,
workChan <-chan types.IssuerCrlUrlPaths) {
defer wg.Done()
for tuple := range workChan {
anyCrlFailed := false
cert, err := ae.issuers.GetCertificateForIssuer(tuple.Issuer)
if err != nil {
glog.Fatalf("[%s] Could not find certificate for issuer: %s", tuple.Issuer.ID(), err)
}
serialCount := 0
serials := make([]types.SerialAndReason, 0, 128*1024)
for _, crlUrlPath := range tuple.CrlUrlPaths {
select {
case <-ctx.Done():
return
default:
if crlUrlPath.Path == "" {
anyCrlFailed = true
// DownloadAndVerifyFileSync already notified the auditor
glog.Errorf("[%+v] Failed to download: %s", crlUrlPath, err)
continue
}
crl, sha256sum, err := loadAndCheckSignatureOfCRL(crlUrlPath.Path, cert)
if err != nil {
anyCrlFailed = true
ae.auditor.FailedVerifyPath(&tuple.Issuer, &crlUrlPath.Url, crlUrlPath.Path, err)
glog.Errorf("[%+v] Failed to verify: %s", crlUrlPath, err)
continue
}
revokedSerials, err := processCRL(crl)
if err != nil {
anyCrlFailed = true
ae.auditor.FailedProcessLocal(&tuple.Issuer, &crlUrlPath.Url, crlUrlPath.Path, err)
glog.Errorf("[%+v] Failed to process: %s", crlUrlPath, err)
continue
}
revokedCount := len(revokedSerials)
if revokedCount == 0 {
ae.auditor.NoRevocations(&tuple.Issuer, &crlUrlPath.Url, crlUrlPath.Path)
continue
}
age := time.Since(crl.TBSCertList.ThisUpdate)
ae.auditor.ValidAndProcessed(&tuple.Issuer, &crlUrlPath.Url, crlUrlPath.Path, revokedCount, age, sha256sum)
serials = append(serials, revokedSerials...)
serialCount += revokedCount
}
}
if anyCrlFailed == false {
if err := ae.StoreRevokedCertificateList(ctx, tuple.Issuer, serials); err != nil {
glog.Fatalf("[%s] Could not save revoked certificates file: %s", tuple.Issuer.ID(), err)
}
glog.Infof("[%s] %d total revoked serials for %s (len=%d, cap=%d)", tuple.Issuer.ID(),
serialCount, tuple.IssuerDN, len(serials), cap(serials))
} else {
glog.Infof("May not have all revoked certificates for issuer %s", tuple.Issuer.ID())
}
}
}
func (ae *AggregateEngine) downloadCRLs(ctx context.Context, issuerToUrls types.IssuerCrlMap) (<-chan types.IssuerCrlUrlPaths, int64) {
var wg sync.WaitGroup
crlChan := make(chan types.IssuerCrlUrls, 16*1024*1024)
var count int64
for issuer, crlMap := range issuerToUrls {
var urls []url.URL
for iUrl := range crlMap {
urlObj, err := url.Parse(strings.TrimSpace(iUrl))
if err != nil {
glog.Warningf("Ignoring URL %s: %s", iUrl, err)
continue
}
urls = append(urls, *urlObj)
}
if len(urls) > 0 {
crlChan <- types.IssuerCrlUrls{
Issuer: types.NewIssuerFromString(issuer),
Urls: urls,
}
count = count + 1
}
}
close(crlChan)
resultChan := make(chan types.IssuerCrlUrlPaths, count)
// Start the workers
for t := 0; t < *ctconfig.NumThreads; t++ {
wg.Add(1)
go ae.crlFetchWorker(ctx, &wg, crlChan, resultChan)
}
wg.Wait()
close(resultChan)
return resultChan, count
}
func (ae *AggregateEngine) aggregateCRLs(ctx context.Context, count int64, crlPaths <-chan types.IssuerCrlUrlPaths) {
var wg sync.WaitGroup
// Start the workers
for t := 0; t < *ctconfig.NumThreads; t++ {
wg.Add(1)
go ae.aggregateCRLWorker(ctx, &wg, crlPaths)
}
wg.Wait()
}
// Write a line delimited list of serial numbers and reason codes to a text
// file. Each line contains hex encoded binary data. The first (encoded) byte
// in each line is the reason code. The remaining bytes are the serial number.
func (ae *AggregateEngine) StoreRevokedCertificateList(ctx context.Context, issuer types.Issuer,
serials []types.SerialAndReason) error {
// Ensure that the output directory exists
err := os.MkdirAll(ae.rootPath, permModeDir)
if err != nil {
return err
}
path := filepath.Join(ae.rootPath, issuer.ID())
fd, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, permMode)
if err != nil {
return err
}
defer fd.Close()
writer := bufio.NewWriter(fd)
defer writer.Flush()
for _, s := range serials {
select {
case <-ctx.Done():
return ctx.Err()
default:
_, err = writer.WriteString(hex.EncodeToString([]byte{s.Reason}))
if err != nil {
return err
}
_, err := writer.WriteString(s.Serial.HexString())
if err != nil {
return err
}
err = writer.WriteByte('\n')
if err != nil {
return err
}
}
}
return nil
}
func checkPathArg(strObj string, confOptionName string, ctconfig *config.CTConfig) {
if strObj == "<path>" {
glog.Errorf("Flag %s is not set", confOptionName)
ctconfig.Usage()
os.Exit(2)
}
}
func main() {
ctconfig.Init()
ctx, cancel := context.WithCancel(context.Background())
defer glog.Flush()
checkPathArg(*revokedpath, "revokedpath", ctconfig)
checkPathArg(*crlpath, "crlpath", ctconfig)
checkPathArg(*enrolledpath, "enrolledpath", ctconfig)
checkPathArg(*auditpath, "auditpath", ctconfig)
if err := os.MkdirAll(*revokedpath, permModeDir); err != nil {
glog.Fatalf("Unable to make the revokedpath directory: %s", err)
}
if err := os.MkdirAll(*crlpath, permModeDir); err != nil {
glog.Fatalf("Unable to make the CRL directory: %s", err)
}
engine.PrepareTelemetry("aggregate-crls", ctconfig)
mozIssuers := rootprogram.NewMozillaIssuers()
if *inccadb != "<path>" {
mozIssuers.DiskPath = *inccadb
}
err := mozIssuers.Load()
if err != nil {
glog.Fatalf("Unable to load the Mozilla issuers: %s", err)
return
}
glog.Infof("Issuer file age: %s", mozIssuers.DatasetAge().Round(time.Second))
// Exit signal, used by signals from the OS
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGTERM, os.Interrupt)
defer signal.Stop(sigChan)
go func() {
<-sigChan
glog.Infof("Signal caught, stopping threads at next opportunity.")
cancel()
signal.Stop(sigChan)
}()
auditor := NewCrlAuditor(mozIssuers)
ae := AggregateEngine{
rootPath: *revokedpath,
issuers: mozIssuers,
auditor: auditor,
}
issuerCrlMap := make(types.IssuerCrlMap)
for issuer, crls := range mozIssuers.CrlMap {
issuerCrlMap[issuer] = make(map[string]bool)
for crl, _ := range crls {
issuerCrlMap[issuer][crl] = true
}
}
crlPaths, count := ae.downloadCRLs(ctx, issuerCrlMap)
if ctx.Err() != nil {
return
}
ae.aggregateCRLs(ctx, count, crlPaths)
if err := mozIssuers.SaveIssuersList(*enrolledpath); err != nil {
glog.Fatalf("Unable to save the crlite-informed intermediate issuers to %s: %s", *enrolledpath, err)
}
glog.Infof("Saved crlite-informed intermediate issuers to %s", *enrolledpath)
fd, err := os.Create(*auditpath)
if err != nil {
glog.Warningf("Could not open audit report path %s: %v", *auditpath, err)
return
}
if err = auditor.WriteReport(fd); err != nil {
glog.Warningf("Could not write audit report %s: %v", *auditpath, err)
}
err = fd.Close()
if err != nil {
glog.Warningf("Could not close audit report %s: %v", *auditpath, err)
}
}