123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- package main
- import (
- "bufio"
- "log"
- "os"
- "regexp"
- "strings"
- "time"
- "git.scraperwall.com/scw/data/v2"
- "github.com/hpcloud/tail"
- "github.com/kr/pretty"
- "github.com/satyrius/gonx"
- )
- func nginxLogCapture(logfile, format, serverHost string) {
- if _, err := os.Stat(logfile); err != nil {
- log.Fatalf("%s: %s", logfile, err)
- }
- t, err := tail.TailFile(logfile, tail.Config{
- Follow: true, // follow the file
- ReOpen: true, // reopen log file when it gets closed/rotated
- Poll: config.TailPoll, // use file polling to detect a file rollover
- Logger: tail.DiscardingLogger, // don't log anything
- Location: &tail.SeekInfo{Offset: 0, Whence: 2}, // start at the end of the file
- })
- if err != nil {
- log.Fatalf("%s: %s", logfile, err)
- }
- // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
- p := gonx.NewParser(format)
- reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
- for line := range t.Lines {
- var remote string
- var err error
- l := line.Text
- logEntry, err := p.ParseString(l)
- if err != nil {
- log.Println(err)
- continue
- }
- remote, err = logEntry.Field("remote_addr")
- if err != nil {
- log.Println(err)
- continue
- }
- xff, err := logEntry.Field("http_x_forwarded_for")
- if err != nil && xff != "" {
- if config.Trace {
- log.Printf("Using XFF: %s\n", xff)
- }
- remote = xff
- }
- if remote == "" {
- log.Println("remote is empty: ignoring request.")
- continue
- }
- // only use the first host in case there are multiple hosts in the log
- if cidx := strings.Index(remote, ","); cidx >= 0 {
- remote = remote[0:cidx]
- }
- timestampStr, err := logEntry.Field("time_local")
- if err != nil {
- log.Println(err)
- continue
- }
- timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
- if err != nil {
- log.Println(err)
- continue
- }
- httpRequest, err := logEntry.Field("request")
- if err != nil {
- log.Println(err)
- continue
- }
- reqData := reqRegexp.FindStringSubmatch(httpRequest)
- if len(reqData) < 4 {
- log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
- continue
- }
- host, err := logEntry.Field("host")
- if err != nil {
- host = serverHost
- if host == "" {
- host = "[not available]"
- }
- }
- customer, err := logEntry.Field("sc_customer")
- if err != nil {
- customer = "[not set]"
- }
- request := data.Request{
- IpSrc: remote,
- Origin: remote,
- Source: remote,
- IpDst: "127.0.0.1",
- PortSrc: 0,
- PortDst: 0,
- TcpSeq: 0,
- CreatedAt: timeStamp.Unix(),
- Url: reqData[2],
- Method: reqData[1],
- Host: host,
- Protocol: reqData[3],
- Customer: customer,
- }
- request.Referer, _ = logEntry.Field("http_referer")
- request.UserAgent, _ = logEntry.Field("http_user_agent")
- if config.Trace {
- log.Printf("[%s] %s\n", request.Source, request.Url)
- }
- count++
- publishRequest(config.NatsQueue, &request)
- }
- }
- func nginxLogReplay(logfile, format string) {
- file, err := os.Open(logfile)
- if err != nil {
- log.Fatalf("%s: %s", logfile, err)
- }
- defer file.Close()
- scanner := bufio.NewScanner(file)
- var tOffset time.Duration
- // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
- p := gonx.NewParser(format)
- reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
- for scanner.Scan() {
- l := scanner.Text()
- if err := scanner.Err(); err != nil {
- log.Fatal(err)
- }
- var remote string
- var err error
- logEntry, err := p.ParseString(l)
- if err != nil {
- log.Println(err)
- continue
- }
- if config.Trace {
- pretty.Println(logEntry)
- }
- remote, err = logEntry.Field("remote_addr")
- if err != nil {
- log.Println(err)
- continue
- }
- xff, err := logEntry.Field("http_x_forwarded_for")
- if err != nil && xff != "" {
- if config.Trace {
- log.Printf("Using XFF: %s\n", xff)
- }
- remote = xff
- }
- if remote == "" {
- log.Println("remote is empty: ignoring request.")
- continue
- }
- // only use the first host in case there are multiple hosts in the log
- if cidx := strings.Index(remote, ","); cidx >= 0 {
- remote = remote[0:cidx]
- }
- timestampStr, err := logEntry.Field("time_local")
- if err != nil {
- log.Println(err)
- continue
- }
- timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
- if err != nil {
- log.Println(err)
- continue
- }
- if tOffset == 0 {
- tOffset = time.Now().Sub(timeStamp)
- }
- if timeStamp.Add(tOffset).After(time.Now()) {
- time.Sleep(timeStamp.Add(tOffset).Sub(time.Now()))
- }
- httpRequest, err := logEntry.Field("request")
- if err != nil {
- log.Println(err)
- continue
- }
- reqData := reqRegexp.FindStringSubmatch(httpRequest)
- if len(reqData) < 4 {
- log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
- continue
- }
- host := config.HostName
- if host == "" {
- host = "[not available]"
- }
- request := data.Request{
- IpSrc: remote,
- Origin: remote,
- Source: remote,
- IpDst: "127.0.0.1",
- PortSrc: 0,
- PortDst: 0,
- TcpSeq: 0,
- CreatedAt: timeStamp.Add(tOffset).UnixNano(),
- Url: reqData[2],
- Method: reqData[1],
- Host: host,
- Protocol: reqData[3],
- }
- request.Referer, _ = logEntry.Field("http_referer")
- request.UserAgent, _ = logEntry.Field("http_user_agent")
- if config.Trace {
- log.Printf("[%s] %s\n", request.Source, request.Url)
- }
- count++
- publishRequest(config.NatsQueue, &request)
- }
- }
|