[management] Auto update geolite (#2297)

introduces helper functions to fetch and verify database versions, downloads new files if outdated, and deletes old ones. It also refactors filename handling to improve clarity and consistency, adding options to disable auto-updating via a flag. The changes aim to simplify GeoLite database management for admins.
This commit is contained in:
benniekiss
2024-09-09 12:27:42 -04:00
committed by GitHub
parent c720d54de6
commit 12c36312b5
14 changed files with 199 additions and 334 deletions

View File

@ -1,29 +1,25 @@
package geolocation
import (
"bytes"
"context"
"fmt"
"net"
"os"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/oschwald/maxminddb-golang"
log "github.com/sirupsen/logrus"
)
const MMDBFileName = "GeoLite2-City.mmdb"
type Geolocation struct {
mmdbPath string
mux sync.RWMutex
sha256sum []byte
db *maxminddb.Reader
locationDB *SqliteStore
stopCh chan struct{}
reloadCheckInterval time.Duration
mmdbPath string
mux sync.RWMutex
db *maxminddb.Reader
locationDB *SqliteStore
stopCh chan struct{}
}
type Record struct {
@ -53,45 +49,56 @@ type Country struct {
CountryName string
}
func NewGeolocation(ctx context.Context, dataDir string) (*Geolocation, error) {
if err := loadGeolocationDatabases(dataDir); err != nil {
const (
mmdbPattern = "GeoLite2-City_*.mmdb"
geonamesdbPattern = "geonames_*.db"
)
func NewGeolocation(ctx context.Context, dataDir string, autoUpdate bool) (*Geolocation, error) {
mmdbGlobPattern := filepath.Join(dataDir, mmdbPattern)
mmdbFile, err := getDatabaseFilename(ctx, geoLiteCityTarGZURL, mmdbGlobPattern, autoUpdate)
if err != nil {
return nil, fmt.Errorf("failed to get database filename: %v", err)
}
geonamesDbGlobPattern := filepath.Join(dataDir, geonamesdbPattern)
geonamesDbFile, err := getDatabaseFilename(ctx, geoLiteCityZipURL, geonamesDbGlobPattern, autoUpdate)
if err != nil {
return nil, fmt.Errorf("failed to get database filename: %v", err)
}
if err := loadGeolocationDatabases(ctx, dataDir, mmdbFile, geonamesDbFile); err != nil {
return nil, fmt.Errorf("failed to load MaxMind databases: %v", err)
}
mmdbPath := path.Join(dataDir, MMDBFileName)
if err := cleanupMaxMindDatabases(ctx, dataDir, mmdbFile, geonamesDbFile); err != nil {
return nil, fmt.Errorf("failed to remove old MaxMind databases: %v", err)
}
mmdbPath := path.Join(dataDir, mmdbFile)
db, err := openDB(mmdbPath)
if err != nil {
return nil, err
}
sha256sum, err := calculateFileSHA256(mmdbPath)
if err != nil {
return nil, err
}
locationDB, err := NewSqliteStore(ctx, dataDir)
locationDB, err := NewSqliteStore(ctx, dataDir, geonamesDbFile)
if err != nil {
return nil, err
}
geo := &Geolocation{
mmdbPath: mmdbPath,
mux: sync.RWMutex{},
sha256sum: sha256sum,
db: db,
locationDB: locationDB,
reloadCheckInterval: 300 * time.Second, // TODO: make configurable
stopCh: make(chan struct{}),
mmdbPath: mmdbPath,
mux: sync.RWMutex{},
db: db,
locationDB: locationDB,
stopCh: make(chan struct{}),
}
go geo.reloader(ctx)
return geo, nil
}
func openDB(mmdbPath string) (*maxminddb.Reader, error) {
_, err := os.Stat(mmdbPath)
if os.IsNotExist(err) {
return nil, fmt.Errorf("%v does not exist", mmdbPath)
} else if err != nil {
@ -166,70 +173,6 @@ func (gl *Geolocation) Stop() error {
return nil
}
func (gl *Geolocation) reloader(ctx context.Context) {
for {
select {
case <-gl.stopCh:
return
case <-time.After(gl.reloadCheckInterval):
if err := gl.locationDB.reload(ctx); err != nil {
log.WithContext(ctx).Errorf("geonames db reload failed: %s", err)
}
newSha256sum1, err := calculateFileSHA256(gl.mmdbPath)
if err != nil {
log.WithContext(ctx).Errorf("failed to calculate sha256 sum for '%s': %s", gl.mmdbPath, err)
continue
}
if !bytes.Equal(gl.sha256sum, newSha256sum1) {
// we check sum twice just to avoid possible case when we reload during update of the file
// considering the frequency of file update (few times a week) checking sum twice should be enough
time.Sleep(50 * time.Millisecond)
newSha256sum2, err := calculateFileSHA256(gl.mmdbPath)
if err != nil {
log.WithContext(ctx).Errorf("failed to calculate sha256 sum for '%s': %s", gl.mmdbPath, err)
continue
}
if !bytes.Equal(newSha256sum1, newSha256sum2) {
log.WithContext(ctx).Errorf("sha256 sum changed during reloading of '%s'", gl.mmdbPath)
continue
}
err = gl.reload(ctx, newSha256sum2)
if err != nil {
log.WithContext(ctx).Errorf("mmdb reload failed: %s", err)
}
} else {
log.WithContext(ctx).Tracef("No changes in '%s', no need to reload. Next check is in %.0f seconds.",
gl.mmdbPath, gl.reloadCheckInterval.Seconds())
}
}
}
}
func (gl *Geolocation) reload(ctx context.Context, newSha256sum []byte) error {
gl.mux.Lock()
defer gl.mux.Unlock()
log.WithContext(ctx).Infof("Reloading '%s'", gl.mmdbPath)
err := gl.db.Close()
if err != nil {
return err
}
db, err := openDB(gl.mmdbPath)
if err != nil {
return err
}
gl.db = db
gl.sha256sum = newSha256sum
log.WithContext(ctx).Infof("Successfully reloaded '%s'", gl.mmdbPath)
return nil
}
func fileExists(filePath string) (bool, error) {
_, err := os.Stat(filePath)
if err == nil {
@ -240,3 +183,79 @@ func fileExists(filePath string) (bool, error) {
}
return false, err
}
func getExistingDatabases(pattern string) []string {
files, _ := filepath.Glob(pattern)
return files
}
func getDatabaseFilename(ctx context.Context, databaseURL string, filenamePattern string, autoUpdate bool) (string, error) {
var (
filename string
err error
)
if autoUpdate {
filename, err = getFilenameFromURL(databaseURL)
if err != nil {
log.WithContext(ctx).Debugf("Failed to update database from url: %s", databaseURL)
return "", err
}
} else {
files := getExistingDatabases(filenamePattern)
if len(files) < 1 {
filename, err = getFilenameFromURL(databaseURL)
if err != nil {
log.WithContext(ctx).Debugf("Failed to get database from url: %s", databaseURL)
return "", err
}
} else {
filename = filepath.Base(files[len(files)-1])
log.WithContext(ctx).Debugf("Using existing database, %s", filename)
return filename, nil
}
}
// strip suffixes that may be nested, such as .tar.gz
basename := strings.SplitN(filename, ".", 2)[0]
// get date version from basename
date := strings.SplitN(basename, "_", 2)[1]
// format db as "GeoLite2-Cities-{maxmind|geonames}_{DATE}.{mmdb|db}"
databaseFilename := filepath.Base(strings.Replace(filenamePattern, "*", date, 1))
return databaseFilename, nil
}
func cleanupOldDatabases(ctx context.Context, pattern string, currentFile string) error {
files := getExistingDatabases(pattern)
for _, db := range files {
if filepath.Base(db) == currentFile {
continue
}
log.WithContext(ctx).Debugf("Removing old database: %s", db)
err := os.Remove(db)
if err != nil {
return err
}
}
return nil
}
func cleanupMaxMindDatabases(ctx context.Context, dataDir string, mmdbFile string, geonamesdbFile string) error {
for _, file := range []string{mmdbFile, geonamesdbFile} {
switch file {
case mmdbFile:
pattern := filepath.Join(dataDir, mmdbPattern)
if err := cleanupOldDatabases(ctx, pattern, file); err != nil {
return err
}
case geonamesdbFile:
pattern := filepath.Join(dataDir, geonamesdbPattern)
if err := cleanupOldDatabases(ctx, pattern, file); err != nil {
return err
}
}
}
return nil
}