utils.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /*
  2. * MinIO Go Library for Amazon S3 Compatible Cloud Storage
  3. * Copyright 2015-2020 MinIO, Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package s3utils
  18. import (
  19. "bytes"
  20. "encoding/hex"
  21. "errors"
  22. "net"
  23. "net/url"
  24. "regexp"
  25. "sort"
  26. "strings"
  27. "unicode/utf8"
  28. )
  29. // Sentinel URL is the default url value which is invalid.
  30. var sentinelURL = url.URL{}
  31. // IsValidDomain validates if input string is a valid domain name.
  32. func IsValidDomain(host string) bool {
  33. // See RFC 1035, RFC 3696.
  34. host = strings.TrimSpace(host)
  35. if len(host) == 0 || len(host) > 255 {
  36. return false
  37. }
  38. // host cannot start or end with "-"
  39. if host[len(host)-1:] == "-" || host[:1] == "-" {
  40. return false
  41. }
  42. // host cannot start or end with "_"
  43. if host[len(host)-1:] == "_" || host[:1] == "_" {
  44. return false
  45. }
  46. // host cannot start with a "."
  47. if host[:1] == "." {
  48. return false
  49. }
  50. // All non alphanumeric characters are invalid.
  51. if strings.ContainsAny(host, "`~!@#$%^&*()+={}[]|\\\"';:><?/") {
  52. return false
  53. }
  54. // No need to regexp match, since the list is non-exhaustive.
  55. // We let it valid and fail later.
  56. return true
  57. }
  58. // IsValidIP parses input string for ip address validity.
  59. func IsValidIP(ip string) bool {
  60. return net.ParseIP(ip) != nil
  61. }
  62. // IsVirtualHostSupported - verifies if bucketName can be part of
  63. // virtual host. Currently only Amazon S3 and Google Cloud Storage
  64. // would support this.
  65. func IsVirtualHostSupported(endpointURL url.URL, bucketName string) bool {
  66. if endpointURL == sentinelURL {
  67. return false
  68. }
  69. // bucketName can be valid but '.' in the hostname will fail SSL
  70. // certificate validation. So do not use host-style for such buckets.
  71. if endpointURL.Scheme == "https" && strings.Contains(bucketName, ".") {
  72. return false
  73. }
  74. // Return true for all other cases
  75. return IsAmazonEndpoint(endpointURL) || IsGoogleEndpoint(endpointURL) || IsAliyunOSSEndpoint(endpointURL)
  76. }
  77. // Refer for region styles - https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
  78. // amazonS3HostHyphen - regular expression used to determine if an arg is s3 host in hyphenated style.
  79. var amazonS3HostHyphen = regexp.MustCompile(`^s3-(.*?).amazonaws.com$`)
  80. // amazonS3HostDualStack - regular expression used to determine if an arg is s3 host dualstack.
  81. var amazonS3HostDualStack = regexp.MustCompile(`^s3.dualstack.(.*?).amazonaws.com$`)
  82. // amazonS3HostDot - regular expression used to determine if an arg is s3 host in . style.
  83. var amazonS3HostDot = regexp.MustCompile(`^s3.(.*?).amazonaws.com$`)
  84. // amazonS3ChinaHost - regular expression used to determine if the arg is s3 china host.
  85. var amazonS3ChinaHost = regexp.MustCompile(`^s3.(cn.*?).amazonaws.com.cn$`)
  86. // amazonS3ChinaHostDualStack - regular expression used to determine if the arg is s3 china host dualstack.
  87. var amazonS3ChinaHostDualStack = regexp.MustCompile(`^s3.dualstack.(cn.*?).amazonaws.com.cn$`)
  88. // Regular expression used to determine if the arg is elb host.
  89. var elbAmazonRegex = regexp.MustCompile(`elb(.*?).amazonaws.com$`)
  90. // Regular expression used to determine if the arg is elb host in china.
  91. var elbAmazonCnRegex = regexp.MustCompile(`elb(.*?).amazonaws.com.cn$`)
  92. // GetRegionFromURL - returns a region from url host.
  93. func GetRegionFromURL(endpointURL url.URL) string {
  94. if endpointURL == sentinelURL {
  95. return ""
  96. }
  97. if endpointURL.Host == "s3-external-1.amazonaws.com" {
  98. return ""
  99. }
  100. if IsAmazonGovCloudEndpoint(endpointURL) {
  101. return "us-gov-west-1"
  102. }
  103. // if elb's are used we cannot calculate which region it may be, just return empty.
  104. if elbAmazonRegex.MatchString(endpointURL.Host) || elbAmazonCnRegex.MatchString(endpointURL.Host) {
  105. return ""
  106. }
  107. parts := amazonS3HostDualStack.FindStringSubmatch(endpointURL.Host)
  108. if len(parts) > 1 {
  109. return parts[1]
  110. }
  111. parts = amazonS3HostHyphen.FindStringSubmatch(endpointURL.Host)
  112. if len(parts) > 1 {
  113. return parts[1]
  114. }
  115. parts = amazonS3ChinaHost.FindStringSubmatch(endpointURL.Host)
  116. if len(parts) > 1 {
  117. return parts[1]
  118. }
  119. parts = amazonS3ChinaHostDualStack.FindStringSubmatch(endpointURL.Host)
  120. if len(parts) > 1 {
  121. return parts[1]
  122. }
  123. parts = amazonS3HostDot.FindStringSubmatch(endpointURL.Host)
  124. if len(parts) > 1 {
  125. return parts[1]
  126. }
  127. return ""
  128. }
  129. // IsAliyunOSSEndpoint - Match if it is exactly Aliyun OSS endpoint.
  130. func IsAliyunOSSEndpoint(endpointURL url.URL) bool {
  131. return strings.HasSuffix(endpointURL.Host, "aliyuncs.com")
  132. }
  133. // IsAmazonEndpoint - Match if it is exactly Amazon S3 endpoint.
  134. func IsAmazonEndpoint(endpointURL url.URL) bool {
  135. if endpointURL.Host == "s3-external-1.amazonaws.com" || endpointURL.Host == "s3.amazonaws.com" {
  136. return true
  137. }
  138. return GetRegionFromURL(endpointURL) != ""
  139. }
  140. // IsAmazonGovCloudEndpoint - Match if it is exactly Amazon S3 GovCloud endpoint.
  141. func IsAmazonGovCloudEndpoint(endpointURL url.URL) bool {
  142. if endpointURL == sentinelURL {
  143. return false
  144. }
  145. return (endpointURL.Host == "s3-us-gov-west-1.amazonaws.com" ||
  146. IsAmazonFIPSGovCloudEndpoint(endpointURL))
  147. }
  148. // IsAmazonFIPSGovCloudEndpoint - Match if it is exactly Amazon S3 FIPS GovCloud endpoint.
  149. // See https://aws.amazon.com/compliance/fips.
  150. func IsAmazonFIPSGovCloudEndpoint(endpointURL url.URL) bool {
  151. if endpointURL == sentinelURL {
  152. return false
  153. }
  154. return endpointURL.Host == "s3-fips-us-gov-west-1.amazonaws.com" ||
  155. endpointURL.Host == "s3-fips.dualstack.us-gov-west-1.amazonaws.com"
  156. }
  157. // IsAmazonFIPSUSEastWestEndpoint - Match if it is exactly Amazon S3 FIPS US East/West endpoint.
  158. // See https://aws.amazon.com/compliance/fips.
  159. func IsAmazonFIPSUSEastWestEndpoint(endpointURL url.URL) bool {
  160. if endpointURL == sentinelURL {
  161. return false
  162. }
  163. switch endpointURL.Host {
  164. case "s3-fips.us-east-2.amazonaws.com":
  165. case "s3-fips.dualstack.us-west-1.amazonaws.com":
  166. case "s3-fips.dualstack.us-west-2.amazonaws.com":
  167. case "s3-fips.dualstack.us-east-2.amazonaws.com":
  168. case "s3-fips.dualstack.us-east-1.amazonaws.com":
  169. case "s3-fips.us-west-1.amazonaws.com":
  170. case "s3-fips.us-west-2.amazonaws.com":
  171. case "s3-fips.us-east-1.amazonaws.com":
  172. default:
  173. return false
  174. }
  175. return true
  176. }
  177. // IsAmazonFIPSEndpoint - Match if it is exactly Amazon S3 FIPS endpoint.
  178. // See https://aws.amazon.com/compliance/fips.
  179. func IsAmazonFIPSEndpoint(endpointURL url.URL) bool {
  180. return IsAmazonFIPSUSEastWestEndpoint(endpointURL) || IsAmazonFIPSGovCloudEndpoint(endpointURL)
  181. }
  182. // IsGoogleEndpoint - Match if it is exactly Google cloud storage endpoint.
  183. func IsGoogleEndpoint(endpointURL url.URL) bool {
  184. if endpointURL == sentinelURL {
  185. return false
  186. }
  187. return endpointURL.Host == "storage.googleapis.com"
  188. }
  189. // Expects ascii encoded strings - from output of urlEncodePath
  190. func percentEncodeSlash(s string) string {
  191. return strings.Replace(s, "/", "%2F", -1)
  192. }
  193. // QueryEncode - encodes query values in their URL encoded form. In
  194. // addition to the percent encoding performed by urlEncodePath() used
  195. // here, it also percent encodes '/' (forward slash)
  196. func QueryEncode(v url.Values) string {
  197. if v == nil {
  198. return ""
  199. }
  200. var buf bytes.Buffer
  201. keys := make([]string, 0, len(v))
  202. for k := range v {
  203. keys = append(keys, k)
  204. }
  205. sort.Strings(keys)
  206. for _, k := range keys {
  207. vs := v[k]
  208. prefix := percentEncodeSlash(EncodePath(k)) + "="
  209. for _, v := range vs {
  210. if buf.Len() > 0 {
  211. buf.WriteByte('&')
  212. }
  213. buf.WriteString(prefix)
  214. buf.WriteString(percentEncodeSlash(EncodePath(v)))
  215. }
  216. }
  217. return buf.String()
  218. }
  219. // TagDecode - decodes canonical tag into map of key and value.
  220. func TagDecode(ctag string) map[string]string {
  221. if ctag == "" {
  222. return map[string]string{}
  223. }
  224. tags := strings.Split(ctag, "&")
  225. tagMap := make(map[string]string, len(tags))
  226. var err error
  227. for _, tag := range tags {
  228. kvs := strings.SplitN(tag, "=", 2)
  229. if len(kvs) == 0 {
  230. return map[string]string{}
  231. }
  232. if len(kvs) == 1 {
  233. return map[string]string{}
  234. }
  235. tagMap[kvs[0]], err = url.PathUnescape(kvs[1])
  236. if err != nil {
  237. continue
  238. }
  239. }
  240. return tagMap
  241. }
  242. // TagEncode - encodes tag values in their URL encoded form. In
  243. // addition to the percent encoding performed by urlEncodePath() used
  244. // here, it also percent encodes '/' (forward slash)
  245. func TagEncode(tags map[string]string) string {
  246. if tags == nil {
  247. return ""
  248. }
  249. values := url.Values{}
  250. for k, v := range tags {
  251. values[k] = []string{v}
  252. }
  253. return QueryEncode(values)
  254. }
  255. // if object matches reserved string, no need to encode them
  256. var reservedObjectNames = regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$")
  257. // EncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences
  258. //
  259. // This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
  260. // non english characters cannot be parsed due to the nature in which url.Encode() is written
  261. //
  262. // This function on the other hand is a direct replacement for url.Encode() technique to support
  263. // pretty much every UTF-8 character.
  264. func EncodePath(pathName string) string {
  265. if reservedObjectNames.MatchString(pathName) {
  266. return pathName
  267. }
  268. var encodedPathname strings.Builder
  269. for _, s := range pathName {
  270. if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
  271. encodedPathname.WriteRune(s)
  272. continue
  273. }
  274. switch s {
  275. case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
  276. encodedPathname.WriteRune(s)
  277. continue
  278. default:
  279. len := utf8.RuneLen(s)
  280. if len < 0 {
  281. // if utf8 cannot convert return the same string as is
  282. return pathName
  283. }
  284. u := make([]byte, len)
  285. utf8.EncodeRune(u, s)
  286. for _, r := range u {
  287. hex := hex.EncodeToString([]byte{r})
  288. encodedPathname.WriteString("%" + strings.ToUpper(hex))
  289. }
  290. }
  291. }
  292. return encodedPathname.String()
  293. }
  294. // We support '.' with bucket names but we fallback to using path
  295. // style requests instead for such buckets.
  296. var (
  297. validBucketName = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9\.\-\_\:]{1,61}[A-Za-z0-9]$`)
  298. validBucketNameStrict = regexp.MustCompile(`^[a-z0-9][a-z0-9\.\-]{1,61}[a-z0-9]$`)
  299. ipAddress = regexp.MustCompile(`^(\d+\.){3}\d+$`)
  300. )
  301. // Common checker for both stricter and basic validation.
  302. func checkBucketNameCommon(bucketName string, strict bool) (err error) {
  303. if strings.TrimSpace(bucketName) == "" {
  304. return errors.New("Bucket name cannot be empty")
  305. }
  306. if len(bucketName) < 3 {
  307. return errors.New("Bucket name cannot be shorter than 3 characters")
  308. }
  309. if len(bucketName) > 63 {
  310. return errors.New("Bucket name cannot be longer than 63 characters")
  311. }
  312. if ipAddress.MatchString(bucketName) {
  313. return errors.New("Bucket name cannot be an ip address")
  314. }
  315. if strings.Contains(bucketName, "..") || strings.Contains(bucketName, ".-") || strings.Contains(bucketName, "-.") {
  316. return errors.New("Bucket name contains invalid characters")
  317. }
  318. if strict {
  319. if !validBucketNameStrict.MatchString(bucketName) {
  320. err = errors.New("Bucket name contains invalid characters")
  321. }
  322. return err
  323. }
  324. if !validBucketName.MatchString(bucketName) {
  325. err = errors.New("Bucket name contains invalid characters")
  326. }
  327. return err
  328. }
  329. // CheckValidBucketName - checks if we have a valid input bucket name.
  330. func CheckValidBucketName(bucketName string) (err error) {
  331. return checkBucketNameCommon(bucketName, false)
  332. }
  333. // CheckValidBucketNameStrict - checks if we have a valid input bucket name.
  334. // This is a stricter version.
  335. // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html
  336. func CheckValidBucketNameStrict(bucketName string) (err error) {
  337. return checkBucketNameCommon(bucketName, true)
  338. }
  339. // CheckValidObjectNamePrefix - checks if we have a valid input object name prefix.
  340. // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
  341. func CheckValidObjectNamePrefix(objectName string) error {
  342. if len(objectName) > 1024 {
  343. return errors.New("Object name cannot be longer than 1024 characters")
  344. }
  345. if !utf8.ValidString(objectName) {
  346. return errors.New("Object name with non UTF-8 strings are not supported")
  347. }
  348. return nil
  349. }
  350. // CheckValidObjectName - checks if we have a valid input object name.
  351. // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
  352. func CheckValidObjectName(objectName string) error {
  353. if strings.TrimSpace(objectName) == "" {
  354. return errors.New("Object name cannot be empty")
  355. }
  356. return CheckValidObjectNamePrefix(objectName)
  357. }