go/downloader/downloader.go (161 lines of code) (raw):

package downloader import ( "context" "fmt" "io" "net/http" "net/url" "os" "strconv" "time" "github.com/golang/glog" ) type DownloadAction int const ( Create DownloadAction = 0 Resume DownloadAction = 1 UpToDate DownloadAction = 2 ) func GetSizeAndDateOfFile(path string) (int64, time.Time, error) { curFile, err := os.Open(path) if err != nil { return 0, time.Time{}, err } stat, err := curFile.Stat() if err != nil { return 0, time.Time{}, err } curFile.Close() return stat.Size(), stat.ModTime(), nil } func determineAction(client *http.Client, crlUrl url.URL, path string) (DownloadAction, int64, int64) { szOnDisk, localDate, err := GetSizeAndDateOfFile(path) if err != nil { glog.V(1).Infof("[%s] CREATE: File not on disk: %s ", crlUrl.String(), err) return Create, 0, 0 } req, err := http.NewRequest("HEAD", crlUrl.String(), nil) if err != nil { return Create, szOnDisk, 0 } req.Header.Add("X-Automated-Tool", "https://github.com/mozilla/crlite") resp, err := client.Do(req) if err != nil { return Create, szOnDisk, 0 } eTag := resp.Header.Get("Etag") lastMod, err := http.ParseTime(resp.Header.Get("Last-Modified")) if err != nil { glog.V(1).Infof("[%s] CREATE: Invalid last-modified: %s [%s]", crlUrl.String(), err, resp.Header.Get("Last-Modified")) return Create, szOnDisk, 0 } szOnServer, err := strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64) if err != nil { glog.V(1).Infof("[%s] CREATE: No content length: %s [%s]", crlUrl.String(), err, resp.Header.Get("Content-Length")) return Create, szOnDisk, 0 } if localDate.Before(lastMod) { glog.V(1).Infof("[%s] CREATE: Local Date is before last modified header date, assuming out-of-date", crlUrl.String()) return Create, szOnDisk, szOnServer } if szOnServer == szOnDisk { glog.V(1).Infof("[%s] UP TO DATE", crlUrl.String()) return UpToDate, szOnDisk, szOnServer } if szOnServer > szOnDisk { if resp.Header.Get("Accept-Ranges") == "bytes" { glog.V(1).Infof("[%s] RESUME: { Already on disk: %d %s, Last-Modified: %s, Etag: %s, Length: %d }", crlUrl.String(), szOnDisk, localDate.String(), lastMod.String(), eTag, szOnServer) return Resume, szOnDisk, szOnServer } glog.V(1).Infof("[%s] Accept-Ranges not supported, unable to resume", crlUrl.String()) } glog.V(1).Infof("[%s] CREATE: Fallthrough", crlUrl.String()) return Create, szOnDisk, szOnServer } func download(ctx context.Context, crlUrl url.URL, path string, timeout time.Duration) error { client := &http.Client{Timeout: timeout} action, offset, size := determineAction(client, crlUrl, path) if action == UpToDate { return nil } req, err := http.NewRequestWithContext(ctx, "GET", crlUrl.String(), nil) if err != nil { return err } req.Header.Add("X-Automated-Tool", "https://github.com/mozilla/crlite") if action == Resume { req.Header.Add("Content-Range", fmt.Sprintf("bytes: %d-%d/%d", offset, size, offset-size)) } resp, err := client.Do(req) if err != nil { return err } defer resp.Body.Close() var outFileParams int switch resp.StatusCode { case http.StatusPartialContent: // Depending on what the server responds with, we may have to go back to Create outFileParams = os.O_APPEND | os.O_WRONLY action = Resume glog.V(1).Infof("[%s] Successfully resumed download at offset %d", crlUrl.String(), offset) case http.StatusOK: outFileParams = os.O_TRUNC | os.O_CREATE | os.O_WRONLY action = Create default: return fmt.Errorf("Non-OK status: %s", resp.Status) } outFile, err := os.OpenFile(path, outFileParams, 0644) if err != nil { return err } defer outFile.Close() if ctx.Err() != nil { return ctx.Err() } defer resp.Body.Close() // and copy from reader, propagating errors totalBytes, err := io.Copy(outFile, resp.Body) if err != nil { return err } if action == Create && size != 0 && totalBytes != size { glog.Warningf("[%s] Didn't seem to download the right number of bytes, expected=%d got %d", crlUrl.String(), size, totalBytes) } if action == Resume && size != 0 && totalBytes+offset != size { glog.Warningf("[%s] Didn't seem to download the right number of bytes, expected=%d got %d with %d already local", crlUrl.String(), size, totalBytes, offset) } lastModStr := resp.Header.Get("Last-Modified") // http.TimeFormat is 29 characters if len(lastModStr) < 16 { glog.Infof("[%s] No compliant reported last-modified time, file may expire early: [%s]", crlUrl.String(), lastModStr) return nil } lastMod, err := http.ParseTime(resp.Header.Get("Last-Modified")) if err != nil { glog.Warningf("[%s] Couldn't parse modified time: %s [%s]", crlUrl.String(), err, lastModStr) return nil } if err := os.Chtimes(path, lastMod, lastMod); err != nil { glog.Warningf("Couldn't set modified time: %s", err) } return nil } func DownloadFileSync(ctx context.Context, crlUrl url.URL, path string, maxRetries uint, timeout time.Duration) error { glog.V(1).Infof("Downloading %s from %s", path, crlUrl.String()) var err error var i uint for ; i <= maxRetries; i++ { select { case <-ctx.Done(): glog.Infof("Signal caught, stopping threads at next opportunity.") return nil default: err = download(ctx, crlUrl, path, timeout) if err == nil { return nil } } glog.Infof("Failed to download %s (%d/%d): %s", path, i, maxRetries, err) } return err }