ソースを参照

added nginx log parser

Tobias Begalke 6 年 前
コミット
596d3f3541
1 ファイル変更108 行追加4 行削除
  1. 108 4
      main.go

+ 108 - 4
main.go

@@ -12,6 +12,7 @@ import (
 	"net"
 	"net/http"
 	"os"
+	"regexp"
 	"strings"
 	"time"
 
@@ -23,6 +24,7 @@ import (
 	"github.com/hpcloud/tail"
 	"github.com/nats-io/nats"
 	"github.com/nats-io/nats/encoders/protobuf"
+	"github.com/satyrius/gonx"
 
 	"git.scraperwall.com/scw/ajp13"
 	"git.scraperwall.com/scw/data"
@@ -46,13 +48,15 @@ var (
 	useXForwardedAsSource = flag.Bool("use-x-forwarded", false, "Use the IP address in X-Forwarded-For as source")
 	trace                 = flag.Bool("trace", false, "Trace the packet capturing")
 	apacheLog             = flag.String("apache-log", "", "Parse an Apache Log file")
+	nginxLog              = flag.String("nginx-log", "", "Nginx log file to tail")
+	nginxFormat           = flag.String("nginx-format", "", "The nginx log file format")
 	configFile            = flag.String("config", "", "The location of the TOML config file")
 
 	beQuiet   = flag.Bool("quiet", true, "Be quiet")
 	doVersion = flag.Bool("version", false, "Show version information")
 
 	natsEC          *nats.EncodedConn
-	natsJsonEC      *nats.EncodedConn
+	natsJSONEC      *nats.EncodedConn
 	natsErrorChan   chan error
 	natsIsAvailable bool
 	count           uint64
@@ -86,6 +90,8 @@ type Config struct {
 	Protocol              string
 	Trace                 bool
 	ApacheLog             string
+	NginxLog              string
+	NginxLogFormat        string
 }
 
 type duration struct {
@@ -112,6 +118,8 @@ func (c Config) print() {
 	fmt.Printf("SleepFor:              %s\n", c.SleepFor.String())
 	fmt.Printf("RequestsFile:          %s\n", c.RequestsFile)
 	fmt.Printf("Apache Log:            %s\n", c.ApacheLog)
+	fmt.Printf("Nginx Log:             %s\n", c.NginxLog)
+	fmt.Printf("Nginx Log Format:      %s\n", c.NginxLogFormat)
 	fmt.Printf("UseXForwardedAsSource: %t\n", c.UseXForwardedAsSource)
 	fmt.Printf("Protocol:              %s\n", c.Protocol)
 	fmt.Printf("Quiet:                 %t\n", c.Quiet)
@@ -167,6 +175,8 @@ func main() {
 	} else if config.Live {
 		fmt.Printf("live capture (%s, %s) to %s\n", config.Interface, config.Filter, config.NatsURL)
 		liveCapture()
+	} else if config.NginxLog != "" && config.NginxLogFormat != "" {
+		nginxLogCapture(config.NginxLog, config.NginxLogFormat)
 	}
 }
 
@@ -217,7 +227,7 @@ func connectToNATS() error {
 		return fmt.Errorf("Encoded Connection: %v", err)
 	}
 
-	natsJsonEC, err = nats.NewEncodedConn(natsConn, nats.JSON_ENCODER)
+	natsJSONEC, err = nats.NewEncodedConn(natsConn, nats.JSON_ENCODER)
 	if err != nil {
 		return fmt.Errorf("Encoded Connection: %v", err)
 	}
@@ -226,6 +236,95 @@ func connectToNATS() error {
 	return nil
 }
 
+func nginxLogCapture(logfile, format string) {
+	if _, err := os.Stat(logfile); err != nil {
+		log.Fatalf("%s: %s", logfile, err)
+	}
+
+	t, err := tail.TailFile(logfile, tail.Config{
+		Follow:   true,                                 // follow the file
+		ReOpen:   true,                                 // reopen log file when it gets closed/rotated
+		Logger:   tail.DiscardingLogger,                // don't log anything
+		Location: &tail.SeekInfo{Offset: 0, Whence: 2}, // start at the end of the file
+	})
+	if err != nil {
+		log.Fatalf("%s: %s", logfile, err)
+	}
+
+	// `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
+	p := gonx.NewParser(format)
+	reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
+
+	for line := range t.Lines {
+		l := line.Text
+
+		logEntry, err := p.ParseString(l)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		remote, err := logEntry.Field("remote_addr")
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		// only use the first host in case there are multiple hosts in the log
+		if cidx := strings.Index(remote, ","); cidx >= 0 {
+			remote = remote[0:cidx]
+		}
+
+		timestampStr, err := logEntry.Field("time_local")
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+		timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		httpRequest, err := logEntry.Field("request")
+		if err != nil {
+			log.Println(err)
+			continue
+		}
+
+		reqData := reqRegexp.FindStringSubmatch(httpRequest)
+		if len(reqData) < 4 {
+			log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
+			continue
+		}
+
+		request := data.Request{
+			IpSrc:     remote,
+			Origin:    remote,
+			Source:    remote,
+			IpDst:     "127.0.0.1",
+			PortSrc:   0,
+			PortDst:   0,
+			TcpSeq:    0,
+			CreatedAt: timeStamp.Unix(),
+			Url:       reqData[2],
+			Method:    reqData[1],
+			Host:      "[not available]",
+			Protocol:  reqData[3],
+		}
+
+		request.Referer, _ = logEntry.Field("http_referer")
+		request.UserAgent, _ = logEntry.Field("http_user_agent")
+
+		if config.Trace {
+			log.Printf("[%s] %s\n", request.Source, request.Url)
+		}
+
+		count++
+		publishRequest(config.NatsQueue, &request)
+	}
+}
+
 func apacheLogCapture(logfile string) {
 	if _, err := os.Stat(logfile); err != nil {
 		log.Fatalf("%s: %s", logfile, err)
@@ -278,6 +377,8 @@ func apacheLogCapture(logfile string) {
 		if vhost != "" {
 			vhostAndPort := strings.Split(logEntry.VirtualHost, ":")
 			virtualHost = vhostAndPort[0]
+		} else {
+			vhost = "[not available]"
 		}
 
 		request := data.Request{
@@ -302,6 +403,7 @@ func apacheLogCapture(logfile string) {
 			log.Printf("[%s] %s\n", request.Source, request.Url)
 		}
 
+		count++
 		publishRequest(config.NatsQueue, &request)
 	}
 }
@@ -617,9 +719,9 @@ func replayFile() {
 
 				if strings.HasPrefix(r[0], "50.31.") {
 					fp.Fingerprint = "a1f2c2ee560ce6580d66d451a9c8dfbf"
-					natsJsonEC.Publish("fingerprints_scw", fp)
+					natsJSONEC.Publish("fingerprints_scw", fp)
 				} else if rand.Intn(10) < 5 {
-					natsJsonEC.Publish("fingerprints_scw", fp)
+					natsJSONEC.Publish("fingerprints_scw", fp)
 				}
 
 			}
@@ -653,6 +755,8 @@ func loadConfig() {
 	config.UseXForwardedAsSource = *useXForwardedAsSource
 	config.Protocol = *protocol
 	config.ApacheLog = *apacheLog
+	config.NginxLog = *nginxLog
+	config.NginxLogFormat = *nginxFormat
 	config.Quiet = *beQuiet
 	config.Trace = *trace