httplex.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package httpguts
  5. import (
  6. "net"
  7. "strings"
  8. "unicode/utf8"
  9. "golang.org/x/net/idna"
  10. )
  11. var isTokenTable = [127]bool{
  12. '!': true,
  13. '#': true,
  14. '$': true,
  15. '%': true,
  16. '&': true,
  17. '\'': true,
  18. '*': true,
  19. '+': true,
  20. '-': true,
  21. '.': true,
  22. '0': true,
  23. '1': true,
  24. '2': true,
  25. '3': true,
  26. '4': true,
  27. '5': true,
  28. '6': true,
  29. '7': true,
  30. '8': true,
  31. '9': true,
  32. 'A': true,
  33. 'B': true,
  34. 'C': true,
  35. 'D': true,
  36. 'E': true,
  37. 'F': true,
  38. 'G': true,
  39. 'H': true,
  40. 'I': true,
  41. 'J': true,
  42. 'K': true,
  43. 'L': true,
  44. 'M': true,
  45. 'N': true,
  46. 'O': true,
  47. 'P': true,
  48. 'Q': true,
  49. 'R': true,
  50. 'S': true,
  51. 'T': true,
  52. 'U': true,
  53. 'W': true,
  54. 'V': true,
  55. 'X': true,
  56. 'Y': true,
  57. 'Z': true,
  58. '^': true,
  59. '_': true,
  60. '`': true,
  61. 'a': true,
  62. 'b': true,
  63. 'c': true,
  64. 'd': true,
  65. 'e': true,
  66. 'f': true,
  67. 'g': true,
  68. 'h': true,
  69. 'i': true,
  70. 'j': true,
  71. 'k': true,
  72. 'l': true,
  73. 'm': true,
  74. 'n': true,
  75. 'o': true,
  76. 'p': true,
  77. 'q': true,
  78. 'r': true,
  79. 's': true,
  80. 't': true,
  81. 'u': true,
  82. 'v': true,
  83. 'w': true,
  84. 'x': true,
  85. 'y': true,
  86. 'z': true,
  87. '|': true,
  88. '~': true,
  89. }
  90. func IsTokenRune(r rune) bool {
  91. i := int(r)
  92. return i < len(isTokenTable) && isTokenTable[i]
  93. }
  94. func isNotToken(r rune) bool {
  95. return !IsTokenRune(r)
  96. }
  97. // HeaderValuesContainsToken reports whether any string in values
  98. // contains the provided token, ASCII case-insensitively.
  99. func HeaderValuesContainsToken(values []string, token string) bool {
  100. for _, v := range values {
  101. if headerValueContainsToken(v, token) {
  102. return true
  103. }
  104. }
  105. return false
  106. }
  107. // isOWS reports whether b is an optional whitespace byte, as defined
  108. // by RFC 7230 section 3.2.3.
  109. func isOWS(b byte) bool { return b == ' ' || b == '\t' }
  110. // trimOWS returns x with all optional whitespace removes from the
  111. // beginning and end.
  112. func trimOWS(x string) string {
  113. // TODO: consider using strings.Trim(x, " \t") instead,
  114. // if and when it's fast enough. See issue 10292.
  115. // But this ASCII-only code will probably always beat UTF-8
  116. // aware code.
  117. for len(x) > 0 && isOWS(x[0]) {
  118. x = x[1:]
  119. }
  120. for len(x) > 0 && isOWS(x[len(x)-1]) {
  121. x = x[:len(x)-1]
  122. }
  123. return x
  124. }
  125. // headerValueContainsToken reports whether v (assumed to be a
  126. // 0#element, in the ABNF extension described in RFC 7230 section 7)
  127. // contains token amongst its comma-separated tokens, ASCII
  128. // case-insensitively.
  129. func headerValueContainsToken(v string, token string) bool {
  130. v = trimOWS(v)
  131. if comma := strings.IndexByte(v, ','); comma != -1 {
  132. return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token)
  133. }
  134. return tokenEqual(v, token)
  135. }
  136. // lowerASCII returns the ASCII lowercase version of b.
  137. func lowerASCII(b byte) byte {
  138. if 'A' <= b && b <= 'Z' {
  139. return b + ('a' - 'A')
  140. }
  141. return b
  142. }
  143. // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
  144. func tokenEqual(t1, t2 string) bool {
  145. if len(t1) != len(t2) {
  146. return false
  147. }
  148. for i, b := range t1 {
  149. if b >= utf8.RuneSelf {
  150. // No UTF-8 or non-ASCII allowed in tokens.
  151. return false
  152. }
  153. if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
  154. return false
  155. }
  156. }
  157. return true
  158. }
  159. // isLWS reports whether b is linear white space, according
  160. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  161. // LWS = [CRLF] 1*( SP | HT )
  162. func isLWS(b byte) bool { return b == ' ' || b == '\t' }
  163. // isCTL reports whether b is a control byte, according
  164. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  165. // CTL = <any US-ASCII control character
  166. // (octets 0 - 31) and DEL (127)>
  167. func isCTL(b byte) bool {
  168. const del = 0x7f // a CTL
  169. return b < ' ' || b == del
  170. }
  171. // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
  172. // HTTP/2 imposes the additional restriction that uppercase ASCII
  173. // letters are not allowed.
  174. //
  175. // RFC 7230 says:
  176. // header-field = field-name ":" OWS field-value OWS
  177. // field-name = token
  178. // token = 1*tchar
  179. // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  180. // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  181. func ValidHeaderFieldName(v string) bool {
  182. if len(v) == 0 {
  183. return false
  184. }
  185. for _, r := range v {
  186. if !IsTokenRune(r) {
  187. return false
  188. }
  189. }
  190. return true
  191. }
  192. // ValidHostHeader reports whether h is a valid host header.
  193. func ValidHostHeader(h string) bool {
  194. // The latest spec is actually this:
  195. //
  196. // http://tools.ietf.org/html/rfc7230#section-5.4
  197. // Host = uri-host [ ":" port ]
  198. //
  199. // Where uri-host is:
  200. // http://tools.ietf.org/html/rfc3986#section-3.2.2
  201. //
  202. // But we're going to be much more lenient for now and just
  203. // search for any byte that's not a valid byte in any of those
  204. // expressions.
  205. for i := 0; i < len(h); i++ {
  206. if !validHostByte[h[i]] {
  207. return false
  208. }
  209. }
  210. return true
  211. }
  212. // See the validHostHeader comment.
  213. var validHostByte = [256]bool{
  214. '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
  215. '8': true, '9': true,
  216. 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
  217. 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
  218. 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  219. 'y': true, 'z': true,
  220. 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
  221. 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
  222. 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  223. 'Y': true, 'Z': true,
  224. '!': true, // sub-delims
  225. '$': true, // sub-delims
  226. '%': true, // pct-encoded (and used in IPv6 zones)
  227. '&': true, // sub-delims
  228. '(': true, // sub-delims
  229. ')': true, // sub-delims
  230. '*': true, // sub-delims
  231. '+': true, // sub-delims
  232. ',': true, // sub-delims
  233. '-': true, // unreserved
  234. '.': true, // unreserved
  235. ':': true, // IPv6address + Host expression's optional port
  236. ';': true, // sub-delims
  237. '=': true, // sub-delims
  238. '[': true,
  239. '\'': true, // sub-delims
  240. ']': true,
  241. '_': true, // unreserved
  242. '~': true, // unreserved
  243. }
  244. // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
  245. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
  246. //
  247. // message-header = field-name ":" [ field-value ]
  248. // field-value = *( field-content | LWS )
  249. // field-content = <the OCTETs making up the field-value
  250. // and consisting of either *TEXT or combinations
  251. // of token, separators, and quoted-string>
  252. //
  253. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
  254. //
  255. // TEXT = <any OCTET except CTLs,
  256. // but including LWS>
  257. // LWS = [CRLF] 1*( SP | HT )
  258. // CTL = <any US-ASCII control character
  259. // (octets 0 - 31) and DEL (127)>
  260. //
  261. // RFC 7230 says:
  262. // field-value = *( field-content / obs-fold )
  263. // obj-fold = N/A to http2, and deprecated
  264. // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  265. // field-vchar = VCHAR / obs-text
  266. // obs-text = %x80-FF
  267. // VCHAR = "any visible [USASCII] character"
  268. //
  269. // http2 further says: "Similarly, HTTP/2 allows header field values
  270. // that are not valid. While most of the values that can be encoded
  271. // will not alter header field parsing, carriage return (CR, ASCII
  272. // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
  273. // 0x0) might be exploited by an attacker if they are translated
  274. // verbatim. Any request or response that contains a character not
  275. // permitted in a header field value MUST be treated as malformed
  276. // (Section 8.1.2.6). Valid characters are defined by the
  277. // field-content ABNF rule in Section 3.2 of [RFC7230]."
  278. //
  279. // This function does not (yet?) properly handle the rejection of
  280. // strings that begin or end with SP or HTAB.
  281. func ValidHeaderFieldValue(v string) bool {
  282. for i := 0; i < len(v); i++ {
  283. b := v[i]
  284. if isCTL(b) && !isLWS(b) {
  285. return false
  286. }
  287. }
  288. return true
  289. }
  290. func isASCII(s string) bool {
  291. for i := 0; i < len(s); i++ {
  292. if s[i] >= utf8.RuneSelf {
  293. return false
  294. }
  295. }
  296. return true
  297. }
  298. // PunycodeHostPort returns the IDNA Punycode version
  299. // of the provided "host" or "host:port" string.
  300. func PunycodeHostPort(v string) (string, error) {
  301. if isASCII(v) {
  302. return v, nil
  303. }
  304. host, port, err := net.SplitHostPort(v)
  305. if err != nil {
  306. // The input 'v' argument was just a "host" argument,
  307. // without a port. This error should not be returned
  308. // to the caller.
  309. host = v
  310. port = ""
  311. }
  312. host, err = idna.ToASCII(host)
  313. if err != nil {
  314. // Non-UTF-8? Not representable in Punycode, in any
  315. // case.
  316. return "", err
  317. }
  318. if port == "" {
  319. return host, nil
  320. }
  321. return net.JoinHostPort(host, port), nil
  322. }