nginx.go 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. package main
  2. import (
  3. "bufio"
  4. "log"
  5. "os"
  6. "regexp"
  7. "strings"
  8. "time"
  9. "git.scraperwall.com/scw/data"
  10. "github.com/hpcloud/tail"
  11. "github.com/kr/pretty"
  12. "github.com/satyrius/gonx"
  13. )
  14. func nginxLogCapture(logfile, format string) {
  15. if _, err := os.Stat(logfile); err != nil {
  16. log.Fatalf("%s: %s", logfile, err)
  17. }
  18. t, err := tail.TailFile(logfile, tail.Config{
  19. Follow: true, // follow the file
  20. ReOpen: true, // reopen log file when it gets closed/rotated
  21. Logger: tail.DiscardingLogger, // don't log anything
  22. Location: &tail.SeekInfo{Offset: 0, Whence: 2}, // start at the end of the file
  23. })
  24. if err != nil {
  25. log.Fatalf("%s: %s", logfile, err)
  26. }
  27. // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
  28. p := gonx.NewParser(format)
  29. reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
  30. for line := range t.Lines {
  31. var remote string
  32. var err error
  33. l := line.Text
  34. logEntry, err := p.ParseString(l)
  35. if err != nil {
  36. log.Println(err)
  37. continue
  38. }
  39. if config.Trace {
  40. pretty.Println(logEntry)
  41. }
  42. remote, err = logEntry.Field("remote_addr")
  43. if err != nil {
  44. log.Println(err)
  45. continue
  46. }
  47. xff, err := logEntry.Field("http_x_forwarded_for")
  48. if err != nil && xff != "" {
  49. if config.Trace {
  50. log.Printf("Using XFF: %s\n", xff)
  51. }
  52. remote = xff
  53. }
  54. if remote == "" {
  55. log.Println("remote is empty: ignoring request.")
  56. continue
  57. }
  58. // only use the first host in case there are multiple hosts in the log
  59. if cidx := strings.Index(remote, ","); cidx >= 0 {
  60. remote = remote[0:cidx]
  61. }
  62. timestampStr, err := logEntry.Field("time_local")
  63. if err != nil {
  64. log.Println(err)
  65. continue
  66. }
  67. timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
  68. if err != nil {
  69. log.Println(err)
  70. continue
  71. }
  72. httpRequest, err := logEntry.Field("request")
  73. if err != nil {
  74. log.Println(err)
  75. continue
  76. }
  77. reqData := reqRegexp.FindStringSubmatch(httpRequest)
  78. if len(reqData) < 4 {
  79. log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
  80. continue
  81. }
  82. host, err := logEntry.Field("host")
  83. if err != nil {
  84. host = config.HostName
  85. if host == "" {
  86. host = "[not available]"
  87. }
  88. }
  89. request := data.Request{
  90. IpSrc: remote,
  91. Origin: remote,
  92. Source: remote,
  93. IpDst: "127.0.0.1",
  94. PortSrc: 0,
  95. PortDst: 0,
  96. TcpSeq: 0,
  97. CreatedAt: timeStamp.Unix(),
  98. Url: reqData[2],
  99. Method: reqData[1],
  100. Host: host,
  101. Protocol: reqData[3],
  102. }
  103. request.Referer, _ = logEntry.Field("http_referer")
  104. request.UserAgent, _ = logEntry.Field("http_user_agent")
  105. if config.Trace {
  106. log.Printf("[%s] %s\n", request.Source, request.Url)
  107. }
  108. count++
  109. publishRequest(config.NatsQueue, &request)
  110. }
  111. }
  112. func nginx2LogCapture(logfile, format string) {
  113. if _, err := os.Stat(logfile); err != nil {
  114. log.Fatalf("%s: %s", logfile, err)
  115. }
  116. t, err := tail.TailFile(logfile, tail.Config{
  117. Follow: true, // follow the file
  118. ReOpen: true, // reopen log file when it gets closed/rotated
  119. Logger: tail.DiscardingLogger, // don't log anything
  120. Location: &tail.SeekInfo{Offset: 0, Whence: 2}, // start at the end of the file
  121. })
  122. if err != nil {
  123. log.Fatalf("%s: %s", logfile, err)
  124. }
  125. // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
  126. p := gonx.NewParser(format)
  127. reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
  128. var tOffset time.Duration
  129. var ts time.Time
  130. for line := range t.Lines {
  131. var remote string
  132. var err error
  133. l := line.Text
  134. logEntry, err := p.ParseString(l)
  135. if err != nil {
  136. log.Println(err)
  137. continue
  138. }
  139. if tOffset == 0 {
  140. tsStr, err := logEntry.Field("time_local")
  141. if err != nil {
  142. log.Printf("%s: %s\n", tsStr, err)
  143. continue
  144. }
  145. ts, err = time.Parse("02/Jan/2006 15:04:05 -0700", tsStr)
  146. if err != nil {
  147. log.Printf("%s: %s\n", tsStr, err)
  148. continue
  149. }
  150. tOffset = time.Now().Sub(ts)
  151. }
  152. tsCheck := ts.Add(tOffset)
  153. if tsCheck.After(time.Now()) {
  154. time.Sleep(tsCheck.Sub(time.Now()))
  155. }
  156. if config.Trace {
  157. pretty.Println(logEntry)
  158. }
  159. remote, err = logEntry.Field("remote_addr")
  160. if err != nil {
  161. log.Println(err)
  162. continue
  163. }
  164. xff, err := logEntry.Field("http_x_forwarded_for")
  165. if err != nil && xff != "" {
  166. if config.Trace {
  167. log.Printf("Using XFF: %s\n", xff)
  168. }
  169. remote = xff
  170. }
  171. if remote == "" {
  172. log.Println("remote is empty: ignoring request.")
  173. continue
  174. }
  175. // only use the first host in case there are multiple hosts in the log
  176. if cidx := strings.Index(remote, ","); cidx >= 0 {
  177. remote = remote[0:cidx]
  178. }
  179. /*
  180. timestampStr, err := logEntry.Field("time_local")
  181. if err != nil {
  182. log.Println(err)
  183. continue
  184. }
  185. timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
  186. if err != nil {
  187. log.Println(err)
  188. continue
  189. }
  190. */
  191. httpRequest, err := logEntry.Field("request")
  192. if err != nil {
  193. log.Println(err)
  194. continue
  195. }
  196. reqData := reqRegexp.FindStringSubmatch(httpRequest)
  197. if len(reqData) < 4 {
  198. log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
  199. continue
  200. }
  201. host := config.HostName
  202. if host == "" {
  203. host = "[not available]"
  204. }
  205. request := data.Request{
  206. IpSrc: remote,
  207. Origin: remote,
  208. Source: remote,
  209. IpDst: "127.0.0.1",
  210. PortSrc: 0,
  211. PortDst: 0,
  212. TcpSeq: 0,
  213. CreatedAt: tsCheck.UnixNano(),
  214. Url: reqData[2],
  215. Method: reqData[1],
  216. Host: host,
  217. Protocol: reqData[3],
  218. }
  219. request.Referer, _ = logEntry.Field("http_referer")
  220. request.UserAgent, _ = logEntry.Field("http_user_agent")
  221. if config.Trace {
  222. log.Printf("[%s] %s\n", request.Source, request.Url)
  223. }
  224. count++
  225. publishRequest(config.NatsQueue, &request)
  226. }
  227. }
  228. func nginxLogReplay(logfile, format string) {
  229. file, err := os.Open(logfile)
  230. if err != nil {
  231. log.Fatalf("%s: %s", logfile, err)
  232. }
  233. defer file.Close()
  234. scanner := bufio.NewScanner(file)
  235. var tOffset time.Duration
  236. // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
  237. p := gonx.NewParser(format)
  238. reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
  239. for scanner.Scan() {
  240. l := scanner.Text()
  241. if err := scanner.Err(); err != nil {
  242. log.Fatal(err)
  243. }
  244. var remote string
  245. var err error
  246. logEntry, err := p.ParseString(l)
  247. if err != nil {
  248. log.Println(err)
  249. continue
  250. }
  251. if config.Trace {
  252. pretty.Println(logEntry)
  253. }
  254. remote, err = logEntry.Field("remote_addr")
  255. if err != nil {
  256. log.Println(err)
  257. continue
  258. }
  259. xff, err := logEntry.Field("http_x_forwarded_for")
  260. if err != nil && xff != "" {
  261. if config.Trace {
  262. log.Printf("Using XFF: %s\n", xff)
  263. }
  264. remote = xff
  265. }
  266. if remote == "" {
  267. log.Println("remote is empty: ignoring request.")
  268. continue
  269. }
  270. // only use the first host in case there are multiple hosts in the log
  271. if cidx := strings.Index(remote, ","); cidx >= 0 {
  272. remote = remote[0:cidx]
  273. }
  274. timestampStr, err := logEntry.Field("time_local")
  275. if err != nil {
  276. log.Println(err)
  277. continue
  278. }
  279. timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
  280. if err != nil {
  281. log.Println(err)
  282. continue
  283. }
  284. if tOffset == 0 {
  285. tOffset = time.Now().Sub(timeStamp)
  286. }
  287. if timeStamp.Add(tOffset).After(time.Now()) {
  288. time.Sleep(timeStamp.Add(tOffset).Sub(time.Now()))
  289. }
  290. httpRequest, err := logEntry.Field("request")
  291. if err != nil {
  292. log.Println(err)
  293. continue
  294. }
  295. reqData := reqRegexp.FindStringSubmatch(httpRequest)
  296. if len(reqData) < 4 {
  297. log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
  298. continue
  299. }
  300. host := config.HostName
  301. if host == "" {
  302. host = "[not available]"
  303. }
  304. request := data.Request{
  305. IpSrc: remote,
  306. Origin: remote,
  307. Source: remote,
  308. IpDst: "127.0.0.1",
  309. PortSrc: 0,
  310. PortDst: 0,
  311. TcpSeq: 0,
  312. CreatedAt: timeStamp.Add(tOffset).UnixNano(),
  313. Url: reqData[2],
  314. Method: reqData[1],
  315. Host: host,
  316. Protocol: reqData[3],
  317. }
  318. request.Referer, _ = logEntry.Field("http_referer")
  319. request.UserAgent, _ = logEntry.Field("http_user_agent")
  320. if config.Trace {
  321. log.Printf("[%s] %s\n", request.Source, request.Url)
  322. }
  323. count++
  324. publishRequest(config.NatsQueue, &request)
  325. }
  326. }