nginx.go 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393
  1. package main
  2. import (
  3. "bufio"
  4. "log"
  5. "os"
  6. "regexp"
  7. "strings"
  8. "time"
  9. "git.scraperwall.com/scw/data"
  10. "github.com/hpcloud/tail"
  11. "github.com/kr/pretty"
  12. "github.com/satyrius/gonx"
  13. )
  14. func nginxLogCapture(logfile, format string) {
  15. if _, err := os.Stat(logfile); err != nil {
  16. log.Fatalf("%s: %s", logfile, err)
  17. }
  18. t, err := tail.TailFile(logfile, tail.Config{
  19. Follow: true, // follow the file
  20. ReOpen: true, // reopen log file when it gets closed/rotated
  21. Logger: tail.DiscardingLogger, // don't log anything
  22. Location: &tail.SeekInfo{Offset: 0, Whence: 2}, // start at the end of the file
  23. })
  24. if err != nil {
  25. log.Fatalf("%s: %s", logfile, err)
  26. }
  27. // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
  28. p := gonx.NewParser(format)
  29. reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
  30. for line := range t.Lines {
  31. var remote string
  32. var err error
  33. l := line.Text
  34. logEntry, err := p.ParseString(l)
  35. if err != nil {
  36. log.Println(err)
  37. continue
  38. }
  39. if config.Trace {
  40. pretty.Println(logEntry)
  41. }
  42. remote, err = logEntry.Field("remote_addr")
  43. if err != nil {
  44. log.Println(err)
  45. continue
  46. }
  47. xff, err := logEntry.Field("http_x_forwarded_for")
  48. if err != nil && xff != "" {
  49. if config.Trace {
  50. log.Printf("Using XFF: %s\n", xff)
  51. }
  52. remote = xff
  53. }
  54. if remote == "" {
  55. log.Println("remote is empty: ignoring request.")
  56. continue
  57. }
  58. // only use the first host in case there are multiple hosts in the log
  59. if cidx := strings.Index(remote, ","); cidx >= 0 {
  60. remote = remote[0:cidx]
  61. }
  62. timestampStr, err := logEntry.Field("time_local")
  63. if err != nil {
  64. log.Println(err)
  65. continue
  66. }
  67. timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
  68. if err != nil {
  69. log.Println(err)
  70. continue
  71. }
  72. httpRequest, err := logEntry.Field("request")
  73. if err != nil {
  74. log.Println(err)
  75. continue
  76. }
  77. reqData := reqRegexp.FindStringSubmatch(httpRequest)
  78. if len(reqData) < 4 {
  79. log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
  80. continue
  81. }
  82. host := config.HostName
  83. if host == "" {
  84. host = "[not available]"
  85. }
  86. request := data.Request{
  87. IpSrc: remote,
  88. Origin: remote,
  89. Source: remote,
  90. IpDst: "127.0.0.1",
  91. PortSrc: 0,
  92. PortDst: 0,
  93. TcpSeq: 0,
  94. CreatedAt: timeStamp.Unix(),
  95. Url: reqData[2],
  96. Method: reqData[1],
  97. Host: host,
  98. Protocol: reqData[3],
  99. }
  100. request.Referer, _ = logEntry.Field("http_referer")
  101. request.UserAgent, _ = logEntry.Field("http_user_agent")
  102. if config.Trace {
  103. log.Printf("[%s] %s\n", request.Source, request.Url)
  104. }
  105. count++
  106. publishRequest(config.NatsQueue, &request)
  107. }
  108. }
  109. func nginx2LogCapture(logfile, format string) {
  110. if _, err := os.Stat(logfile); err != nil {
  111. log.Fatalf("%s: %s", logfile, err)
  112. }
  113. t, err := tail.TailFile(logfile, tail.Config{
  114. Follow: true, // follow the file
  115. ReOpen: true, // reopen log file when it gets closed/rotated
  116. Logger: tail.DiscardingLogger, // don't log anything
  117. Location: &tail.SeekInfo{Offset: 0, Whence: 2}, // start at the end of the file
  118. })
  119. if err != nil {
  120. log.Fatalf("%s: %s", logfile, err)
  121. }
  122. // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
  123. p := gonx.NewParser(format)
  124. reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
  125. var tOffset time.Duration
  126. var ts time.Time
  127. for line := range t.Lines {
  128. var remote string
  129. var err error
  130. l := line.Text
  131. logEntry, err := p.ParseString(l)
  132. if err != nil {
  133. log.Println(err)
  134. continue
  135. }
  136. if tOffset == 0 {
  137. tsStr, err := logEntry.Field("time_local")
  138. if err != nil {
  139. log.Printf("%s: %s\n", tsStr, err)
  140. continue
  141. }
  142. ts, err = time.Parse("02/Jan/2006 15:04:05 -0700", tsStr)
  143. if err != nil {
  144. log.Printf("%s: %s\n", tsStr, err)
  145. continue
  146. }
  147. tOffset = time.Now().Sub(ts)
  148. }
  149. tsCheck := ts.Add(tOffset)
  150. if tsCheck.After(time.Now()) {
  151. time.Sleep(tsCheck.Sub(time.Now()))
  152. }
  153. if config.Trace {
  154. pretty.Println(logEntry)
  155. }
  156. remote, err = logEntry.Field("remote_addr")
  157. if err != nil {
  158. log.Println(err)
  159. continue
  160. }
  161. xff, err := logEntry.Field("http_x_forwarded_for")
  162. if err != nil && xff != "" {
  163. if config.Trace {
  164. log.Printf("Using XFF: %s\n", xff)
  165. }
  166. remote = xff
  167. }
  168. if remote == "" {
  169. log.Println("remote is empty: ignoring request.")
  170. continue
  171. }
  172. // only use the first host in case there are multiple hosts in the log
  173. if cidx := strings.Index(remote, ","); cidx >= 0 {
  174. remote = remote[0:cidx]
  175. }
  176. /*
  177. timestampStr, err := logEntry.Field("time_local")
  178. if err != nil {
  179. log.Println(err)
  180. continue
  181. }
  182. timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
  183. if err != nil {
  184. log.Println(err)
  185. continue
  186. }
  187. */
  188. httpRequest, err := logEntry.Field("request")
  189. if err != nil {
  190. log.Println(err)
  191. continue
  192. }
  193. reqData := reqRegexp.FindStringSubmatch(httpRequest)
  194. if len(reqData) < 4 {
  195. log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
  196. continue
  197. }
  198. host := config.HostName
  199. if host == "" {
  200. host = "[not available]"
  201. }
  202. request := data.Request{
  203. IpSrc: remote,
  204. Origin: remote,
  205. Source: remote,
  206. IpDst: "127.0.0.1",
  207. PortSrc: 0,
  208. PortDst: 0,
  209. TcpSeq: 0,
  210. CreatedAt: tsCheck.UnixNano(),
  211. Url: reqData[2],
  212. Method: reqData[1],
  213. Host: host,
  214. Protocol: reqData[3],
  215. }
  216. request.Referer, _ = logEntry.Field("http_referer")
  217. request.UserAgent, _ = logEntry.Field("http_user_agent")
  218. if config.Trace {
  219. log.Printf("[%s] %s\n", request.Source, request.Url)
  220. }
  221. count++
  222. publishRequest(config.NatsQueue, &request)
  223. }
  224. }
  225. func nginxLogReplay(logfile, format string) {
  226. file, err := os.Open(logfile)
  227. if err != nil {
  228. log.Fatalf("%s: %s", logfile, err)
  229. }
  230. defer file.Close()
  231. scanner := bufio.NewScanner(file)
  232. var tOffset time.Duration
  233. // `$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"`
  234. p := gonx.NewParser(format)
  235. reqRegexp := regexp.MustCompile(`^([A-Z]+)\s+(.+?)\s+(HTTP/\d+\.\d+)$`)
  236. for scanner.Scan() {
  237. l := scanner.Text()
  238. if err := scanner.Err(); err != nil {
  239. log.Fatal(err)
  240. }
  241. var remote string
  242. var err error
  243. logEntry, err := p.ParseString(l)
  244. if err != nil {
  245. log.Println(err)
  246. continue
  247. }
  248. if config.Trace {
  249. pretty.Println(logEntry)
  250. }
  251. remote, err = logEntry.Field("remote_addr")
  252. if err != nil {
  253. log.Println(err)
  254. continue
  255. }
  256. xff, err := logEntry.Field("http_x_forwarded_for")
  257. if err != nil && xff != "" {
  258. if config.Trace {
  259. log.Printf("Using XFF: %s\n", xff)
  260. }
  261. remote = xff
  262. }
  263. if remote == "" {
  264. log.Println("remote is empty: ignoring request.")
  265. continue
  266. }
  267. // only use the first host in case there are multiple hosts in the log
  268. if cidx := strings.Index(remote, ","); cidx >= 0 {
  269. remote = remote[0:cidx]
  270. }
  271. timestampStr, err := logEntry.Field("time_local")
  272. if err != nil {
  273. log.Println(err)
  274. continue
  275. }
  276. timeStamp, err := time.Parse("02/Jan/2006:15:04:05 -0700", timestampStr)
  277. if err != nil {
  278. log.Println(err)
  279. continue
  280. }
  281. if tOffset == 0 {
  282. tOffset = time.Now().Sub(timeStamp)
  283. }
  284. if timeStamp.Add(tOffset).After(time.Now()) {
  285. time.Sleep(timeStamp.Add(tOffset).Sub(time.Now()))
  286. }
  287. httpRequest, err := logEntry.Field("request")
  288. if err != nil {
  289. log.Println(err)
  290. continue
  291. }
  292. reqData := reqRegexp.FindStringSubmatch(httpRequest)
  293. if len(reqData) < 4 {
  294. log.Printf("reqData is too short: %d instead of 4\n", len(reqData))
  295. continue
  296. }
  297. host := config.HostName
  298. if host == "" {
  299. host = "[not available]"
  300. }
  301. request := data.Request{
  302. IpSrc: remote,
  303. Origin: remote,
  304. Source: remote,
  305. IpDst: "127.0.0.1",
  306. PortSrc: 0,
  307. PortDst: 0,
  308. TcpSeq: 0,
  309. CreatedAt: timeStamp.Add(tOffset).UnixNano(),
  310. Url: reqData[2],
  311. Method: reqData[1],
  312. Host: host,
  313. Protocol: reqData[3],
  314. }
  315. request.Referer, _ = logEntry.Field("http_referer")
  316. request.UserAgent, _ = logEntry.Field("http_user_agent")
  317. if config.Trace {
  318. log.Printf("[%s] %s\n", request.Source, request.Url)
  319. }
  320. count++
  321. publishRequest(config.NatsQueue, &request)
  322. }
  323. }