123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391 |
- /*
- * MinIO Go Library for Amazon S3 Compatible Cloud Storage
- * Copyright 2015-2020 MinIO, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package s3utils
- import (
- "bytes"
- "encoding/hex"
- "errors"
- "net"
- "net/url"
- "regexp"
- "sort"
- "strings"
- "unicode/utf8"
- )
- // Sentinel URL is the default url value which is invalid.
- var sentinelURL = url.URL{}
- // IsValidDomain validates if input string is a valid domain name.
- func IsValidDomain(host string) bool {
- // See RFC 1035, RFC 3696.
- host = strings.TrimSpace(host)
- if len(host) == 0 || len(host) > 255 {
- return false
- }
- // host cannot start or end with "-"
- if host[len(host)-1:] == "-" || host[:1] == "-" {
- return false
- }
- // host cannot start or end with "_"
- if host[len(host)-1:] == "_" || host[:1] == "_" {
- return false
- }
- // host cannot start with a "."
- if host[:1] == "." {
- return false
- }
- // All non alphanumeric characters are invalid.
- if strings.ContainsAny(host, "`~!@#$%^&*()+={}[]|\\\"';:><?/") {
- return false
- }
- // No need to regexp match, since the list is non-exhaustive.
- // We let it valid and fail later.
- return true
- }
- // IsValidIP parses input string for ip address validity.
- func IsValidIP(ip string) bool {
- return net.ParseIP(ip) != nil
- }
- // IsVirtualHostSupported - verifies if bucketName can be part of
- // virtual host. Currently only Amazon S3 and Google Cloud Storage
- // would support this.
- func IsVirtualHostSupported(endpointURL url.URL, bucketName string) bool {
- if endpointURL == sentinelURL {
- return false
- }
- // bucketName can be valid but '.' in the hostname will fail SSL
- // certificate validation. So do not use host-style for such buckets.
- if endpointURL.Scheme == "https" && strings.Contains(bucketName, ".") {
- return false
- }
- // Return true for all other cases
- return IsAmazonEndpoint(endpointURL) || IsGoogleEndpoint(endpointURL) || IsAliyunOSSEndpoint(endpointURL)
- }
- // Refer for region styles - https://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
- // amazonS3HostHyphen - regular expression used to determine if an arg is s3 host in hyphenated style.
- var amazonS3HostHyphen = regexp.MustCompile(`^s3-(.*?).amazonaws.com$`)
- // amazonS3HostDualStack - regular expression used to determine if an arg is s3 host dualstack.
- var amazonS3HostDualStack = regexp.MustCompile(`^s3.dualstack.(.*?).amazonaws.com$`)
- // amazonS3HostDot - regular expression used to determine if an arg is s3 host in . style.
- var amazonS3HostDot = regexp.MustCompile(`^s3.(.*?).amazonaws.com$`)
- // amazonS3ChinaHost - regular expression used to determine if the arg is s3 china host.
- var amazonS3ChinaHost = regexp.MustCompile(`^s3.(cn.*?).amazonaws.com.cn$`)
- // amazonS3ChinaHostDualStack - regular expression used to determine if the arg is s3 china host dualstack.
- var amazonS3ChinaHostDualStack = regexp.MustCompile(`^s3.dualstack.(cn.*?).amazonaws.com.cn$`)
- // Regular expression used to determine if the arg is elb host.
- var elbAmazonRegex = regexp.MustCompile(`elb(.*?).amazonaws.com$`)
- // Regular expression used to determine if the arg is elb host in china.
- var elbAmazonCnRegex = regexp.MustCompile(`elb(.*?).amazonaws.com.cn$`)
- // GetRegionFromURL - returns a region from url host.
- func GetRegionFromURL(endpointURL url.URL) string {
- if endpointURL == sentinelURL {
- return ""
- }
- if endpointURL.Host == "s3-external-1.amazonaws.com" {
- return ""
- }
- if IsAmazonGovCloudEndpoint(endpointURL) {
- return "us-gov-west-1"
- }
- // if elb's are used we cannot calculate which region it may be, just return empty.
- if elbAmazonRegex.MatchString(endpointURL.Host) || elbAmazonCnRegex.MatchString(endpointURL.Host) {
- return ""
- }
- parts := amazonS3HostDualStack.FindStringSubmatch(endpointURL.Host)
- if len(parts) > 1 {
- return parts[1]
- }
- parts = amazonS3HostHyphen.FindStringSubmatch(endpointURL.Host)
- if len(parts) > 1 {
- return parts[1]
- }
- parts = amazonS3ChinaHost.FindStringSubmatch(endpointURL.Host)
- if len(parts) > 1 {
- return parts[1]
- }
- parts = amazonS3ChinaHostDualStack.FindStringSubmatch(endpointURL.Host)
- if len(parts) > 1 {
- return parts[1]
- }
- parts = amazonS3HostDot.FindStringSubmatch(endpointURL.Host)
- if len(parts) > 1 {
- return parts[1]
- }
- return ""
- }
- // IsAliyunOSSEndpoint - Match if it is exactly Aliyun OSS endpoint.
- func IsAliyunOSSEndpoint(endpointURL url.URL) bool {
- return strings.HasSuffix(endpointURL.Host, "aliyuncs.com")
- }
- // IsAmazonEndpoint - Match if it is exactly Amazon S3 endpoint.
- func IsAmazonEndpoint(endpointURL url.URL) bool {
- if endpointURL.Host == "s3-external-1.amazonaws.com" || endpointURL.Host == "s3.amazonaws.com" {
- return true
- }
- return GetRegionFromURL(endpointURL) != ""
- }
- // IsAmazonGovCloudEndpoint - Match if it is exactly Amazon S3 GovCloud endpoint.
- func IsAmazonGovCloudEndpoint(endpointURL url.URL) bool {
- if endpointURL == sentinelURL {
- return false
- }
- return (endpointURL.Host == "s3-us-gov-west-1.amazonaws.com" ||
- IsAmazonFIPSGovCloudEndpoint(endpointURL))
- }
- // IsAmazonFIPSGovCloudEndpoint - Match if it is exactly Amazon S3 FIPS GovCloud endpoint.
- // See https://aws.amazon.com/compliance/fips.
- func IsAmazonFIPSGovCloudEndpoint(endpointURL url.URL) bool {
- if endpointURL == sentinelURL {
- return false
- }
- return endpointURL.Host == "s3-fips-us-gov-west-1.amazonaws.com" ||
- endpointURL.Host == "s3-fips.dualstack.us-gov-west-1.amazonaws.com"
- }
- // IsAmazonFIPSUSEastWestEndpoint - Match if it is exactly Amazon S3 FIPS US East/West endpoint.
- // See https://aws.amazon.com/compliance/fips.
- func IsAmazonFIPSUSEastWestEndpoint(endpointURL url.URL) bool {
- if endpointURL == sentinelURL {
- return false
- }
- switch endpointURL.Host {
- case "s3-fips.us-east-2.amazonaws.com":
- case "s3-fips.dualstack.us-west-1.amazonaws.com":
- case "s3-fips.dualstack.us-west-2.amazonaws.com":
- case "s3-fips.dualstack.us-east-2.amazonaws.com":
- case "s3-fips.dualstack.us-east-1.amazonaws.com":
- case "s3-fips.us-west-1.amazonaws.com":
- case "s3-fips.us-west-2.amazonaws.com":
- case "s3-fips.us-east-1.amazonaws.com":
- default:
- return false
- }
- return true
- }
- // IsAmazonFIPSEndpoint - Match if it is exactly Amazon S3 FIPS endpoint.
- // See https://aws.amazon.com/compliance/fips.
- func IsAmazonFIPSEndpoint(endpointURL url.URL) bool {
- return IsAmazonFIPSUSEastWestEndpoint(endpointURL) || IsAmazonFIPSGovCloudEndpoint(endpointURL)
- }
- // IsGoogleEndpoint - Match if it is exactly Google cloud storage endpoint.
- func IsGoogleEndpoint(endpointURL url.URL) bool {
- if endpointURL == sentinelURL {
- return false
- }
- return endpointURL.Host == "storage.googleapis.com"
- }
- // Expects ascii encoded strings - from output of urlEncodePath
- func percentEncodeSlash(s string) string {
- return strings.Replace(s, "/", "%2F", -1)
- }
- // QueryEncode - encodes query values in their URL encoded form. In
- // addition to the percent encoding performed by urlEncodePath() used
- // here, it also percent encodes '/' (forward slash)
- func QueryEncode(v url.Values) string {
- if v == nil {
- return ""
- }
- var buf bytes.Buffer
- keys := make([]string, 0, len(v))
- for k := range v {
- keys = append(keys, k)
- }
- sort.Strings(keys)
- for _, k := range keys {
- vs := v[k]
- prefix := percentEncodeSlash(EncodePath(k)) + "="
- for _, v := range vs {
- if buf.Len() > 0 {
- buf.WriteByte('&')
- }
- buf.WriteString(prefix)
- buf.WriteString(percentEncodeSlash(EncodePath(v)))
- }
- }
- return buf.String()
- }
- // TagDecode - decodes canonical tag into map of key and value.
- func TagDecode(ctag string) map[string]string {
- if ctag == "" {
- return map[string]string{}
- }
- tags := strings.Split(ctag, "&")
- tagMap := make(map[string]string, len(tags))
- var err error
- for _, tag := range tags {
- kvs := strings.SplitN(tag, "=", 2)
- if len(kvs) == 0 {
- return map[string]string{}
- }
- if len(kvs) == 1 {
- return map[string]string{}
- }
- tagMap[kvs[0]], err = url.PathUnescape(kvs[1])
- if err != nil {
- continue
- }
- }
- return tagMap
- }
- // TagEncode - encodes tag values in their URL encoded form. In
- // addition to the percent encoding performed by urlEncodePath() used
- // here, it also percent encodes '/' (forward slash)
- func TagEncode(tags map[string]string) string {
- if tags == nil {
- return ""
- }
- values := url.Values{}
- for k, v := range tags {
- values[k] = []string{v}
- }
- return QueryEncode(values)
- }
- // if object matches reserved string, no need to encode them
- var reservedObjectNames = regexp.MustCompile("^[a-zA-Z0-9-_.~/]+$")
- // EncodePath encode the strings from UTF-8 byte representations to HTML hex escape sequences
- //
- // This is necessary since regular url.Parse() and url.Encode() functions do not support UTF-8
- // non english characters cannot be parsed due to the nature in which url.Encode() is written
- //
- // This function on the other hand is a direct replacement for url.Encode() technique to support
- // pretty much every UTF-8 character.
- func EncodePath(pathName string) string {
- if reservedObjectNames.MatchString(pathName) {
- return pathName
- }
- var encodedPathname strings.Builder
- for _, s := range pathName {
- if 'A' <= s && s <= 'Z' || 'a' <= s && s <= 'z' || '0' <= s && s <= '9' { // §2.3 Unreserved characters (mark)
- encodedPathname.WriteRune(s)
- continue
- }
- switch s {
- case '-', '_', '.', '~', '/': // §2.3 Unreserved characters (mark)
- encodedPathname.WriteRune(s)
- continue
- default:
- len := utf8.RuneLen(s)
- if len < 0 {
- // if utf8 cannot convert return the same string as is
- return pathName
- }
- u := make([]byte, len)
- utf8.EncodeRune(u, s)
- for _, r := range u {
- hex := hex.EncodeToString([]byte{r})
- encodedPathname.WriteString("%" + strings.ToUpper(hex))
- }
- }
- }
- return encodedPathname.String()
- }
- // We support '.' with bucket names but we fallback to using path
- // style requests instead for such buckets.
- var (
- validBucketName = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9\.\-\_\:]{1,61}[A-Za-z0-9]$`)
- validBucketNameStrict = regexp.MustCompile(`^[a-z0-9][a-z0-9\.\-]{1,61}[a-z0-9]$`)
- ipAddress = regexp.MustCompile(`^(\d+\.){3}\d+$`)
- )
- // Common checker for both stricter and basic validation.
- func checkBucketNameCommon(bucketName string, strict bool) (err error) {
- if strings.TrimSpace(bucketName) == "" {
- return errors.New("Bucket name cannot be empty")
- }
- if len(bucketName) < 3 {
- return errors.New("Bucket name cannot be shorter than 3 characters")
- }
- if len(bucketName) > 63 {
- return errors.New("Bucket name cannot be longer than 63 characters")
- }
- if ipAddress.MatchString(bucketName) {
- return errors.New("Bucket name cannot be an ip address")
- }
- if strings.Contains(bucketName, "..") || strings.Contains(bucketName, ".-") || strings.Contains(bucketName, "-.") {
- return errors.New("Bucket name contains invalid characters")
- }
- if strict {
- if !validBucketNameStrict.MatchString(bucketName) {
- err = errors.New("Bucket name contains invalid characters")
- }
- return err
- }
- if !validBucketName.MatchString(bucketName) {
- err = errors.New("Bucket name contains invalid characters")
- }
- return err
- }
- // CheckValidBucketName - checks if we have a valid input bucket name.
- func CheckValidBucketName(bucketName string) (err error) {
- return checkBucketNameCommon(bucketName, false)
- }
- // CheckValidBucketNameStrict - checks if we have a valid input bucket name.
- // This is a stricter version.
- // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingBucket.html
- func CheckValidBucketNameStrict(bucketName string) (err error) {
- return checkBucketNameCommon(bucketName, true)
- }
- // CheckValidObjectNamePrefix - checks if we have a valid input object name prefix.
- // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
- func CheckValidObjectNamePrefix(objectName string) error {
- if len(objectName) > 1024 {
- return errors.New("Object name cannot be longer than 1024 characters")
- }
- if !utf8.ValidString(objectName) {
- return errors.New("Object name with non UTF-8 strings are not supported")
- }
- return nil
- }
- // CheckValidObjectName - checks if we have a valid input object name.
- // - http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
- func CheckValidObjectName(objectName string) error {
- if strings.TrimSpace(objectName) == "" {
- return errors.New("Object name cannot be empty")
- }
- return CheckValidObjectNamePrefix(objectName)
- }
|