encode.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package text
  5. import (
  6. "math"
  7. "math/bits"
  8. "strconv"
  9. "strings"
  10. "unicode/utf8"
  11. "google.golang.org/protobuf/internal/detrand"
  12. "google.golang.org/protobuf/internal/errors"
  13. )
  14. // encType represents an encoding type.
  15. type encType uint8
  16. const (
  17. _ encType = (1 << iota) / 2
  18. name
  19. scalar
  20. messageOpen
  21. messageClose
  22. )
  23. // Encoder provides methods to write out textproto constructs and values. The user is
  24. // responsible for producing valid sequences of constructs and values.
  25. type Encoder struct {
  26. encoderState
  27. indent string
  28. delims [2]byte
  29. outputASCII bool
  30. }
  31. type encoderState struct {
  32. lastType encType
  33. indents []byte
  34. out []byte
  35. }
  36. // NewEncoder returns an Encoder.
  37. //
  38. // If indent is a non-empty string, it causes every entry in a List or Message
  39. // to be preceded by the indent and trailed by a newline.
  40. //
  41. // If delims is not the zero value, it controls the delimiter characters used
  42. // for messages (e.g., "{}" vs "<>").
  43. //
  44. // If outputASCII is true, strings will be serialized in such a way that
  45. // multi-byte UTF-8 sequences are escaped. This property ensures that the
  46. // overall output is ASCII (as opposed to UTF-8).
  47. func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
  48. e := &Encoder{}
  49. if len(indent) > 0 {
  50. if strings.Trim(indent, " \t") != "" {
  51. return nil, errors.New("indent may only be composed of space and tab characters")
  52. }
  53. e.indent = indent
  54. }
  55. switch delims {
  56. case [2]byte{0, 0}:
  57. e.delims = [2]byte{'{', '}'}
  58. case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
  59. e.delims = delims
  60. default:
  61. return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
  62. }
  63. e.outputASCII = outputASCII
  64. return e, nil
  65. }
  66. // Bytes returns the content of the written bytes.
  67. func (e *Encoder) Bytes() []byte {
  68. return e.out
  69. }
  70. // StartMessage writes out the '{' or '<' symbol.
  71. func (e *Encoder) StartMessage() {
  72. e.prepareNext(messageOpen)
  73. e.out = append(e.out, e.delims[0])
  74. }
  75. // EndMessage writes out the '}' or '>' symbol.
  76. func (e *Encoder) EndMessage() {
  77. e.prepareNext(messageClose)
  78. e.out = append(e.out, e.delims[1])
  79. }
  80. // WriteName writes out the field name and the separator ':'.
  81. func (e *Encoder) WriteName(s string) {
  82. e.prepareNext(name)
  83. e.out = append(e.out, s...)
  84. e.out = append(e.out, ':')
  85. }
  86. // WriteBool writes out the given boolean value.
  87. func (e *Encoder) WriteBool(b bool) {
  88. if b {
  89. e.WriteLiteral("true")
  90. } else {
  91. e.WriteLiteral("false")
  92. }
  93. }
  94. // WriteString writes out the given string value.
  95. func (e *Encoder) WriteString(s string) {
  96. e.prepareNext(scalar)
  97. e.out = appendString(e.out, s, e.outputASCII)
  98. }
  99. func appendString(out []byte, in string, outputASCII bool) []byte {
  100. out = append(out, '"')
  101. i := indexNeedEscapeInString(in)
  102. in, out = in[i:], append(out, in[:i]...)
  103. for len(in) > 0 {
  104. switch r, n := utf8.DecodeRuneInString(in); {
  105. case r == utf8.RuneError && n == 1:
  106. // We do not report invalid UTF-8 because strings in the text format
  107. // are used to represent both the proto string and bytes type.
  108. r = rune(in[0])
  109. fallthrough
  110. case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
  111. out = append(out, '\\')
  112. switch r {
  113. case '"', '\\':
  114. out = append(out, byte(r))
  115. case '\n':
  116. out = append(out, 'n')
  117. case '\r':
  118. out = append(out, 'r')
  119. case '\t':
  120. out = append(out, 't')
  121. default:
  122. out = append(out, 'x')
  123. out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
  124. out = strconv.AppendUint(out, uint64(r), 16)
  125. }
  126. in = in[n:]
  127. case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
  128. out = append(out, '\\')
  129. if r <= math.MaxUint16 {
  130. out = append(out, 'u')
  131. out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  132. out = strconv.AppendUint(out, uint64(r), 16)
  133. } else {
  134. out = append(out, 'U')
  135. out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
  136. out = strconv.AppendUint(out, uint64(r), 16)
  137. }
  138. in = in[n:]
  139. default:
  140. i := indexNeedEscapeInString(in[n:])
  141. in, out = in[n+i:], append(out, in[:n+i]...)
  142. }
  143. }
  144. out = append(out, '"')
  145. return out
  146. }
  147. // indexNeedEscapeInString returns the index of the character that needs
  148. // escaping. If no characters need escaping, this returns the input length.
  149. func indexNeedEscapeInString(s string) int {
  150. for i := 0; i < len(s); i++ {
  151. if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
  152. return i
  153. }
  154. }
  155. return len(s)
  156. }
  157. // WriteFloat writes out the given float value for given bitSize.
  158. func (e *Encoder) WriteFloat(n float64, bitSize int) {
  159. e.prepareNext(scalar)
  160. e.out = appendFloat(e.out, n, bitSize)
  161. }
  162. func appendFloat(out []byte, n float64, bitSize int) []byte {
  163. switch {
  164. case math.IsNaN(n):
  165. return append(out, "nan"...)
  166. case math.IsInf(n, +1):
  167. return append(out, "inf"...)
  168. case math.IsInf(n, -1):
  169. return append(out, "-inf"...)
  170. default:
  171. return strconv.AppendFloat(out, n, 'g', -1, bitSize)
  172. }
  173. }
  174. // WriteInt writes out the given signed integer value.
  175. func (e *Encoder) WriteInt(n int64) {
  176. e.prepareNext(scalar)
  177. e.out = append(e.out, strconv.FormatInt(n, 10)...)
  178. }
  179. // WriteUint writes out the given unsigned integer value.
  180. func (e *Encoder) WriteUint(n uint64) {
  181. e.prepareNext(scalar)
  182. e.out = append(e.out, strconv.FormatUint(n, 10)...)
  183. }
  184. // WriteLiteral writes out the given string as a literal value without quotes.
  185. // This is used for writing enum literal strings.
  186. func (e *Encoder) WriteLiteral(s string) {
  187. e.prepareNext(scalar)
  188. e.out = append(e.out, s...)
  189. }
  190. // prepareNext adds possible space and indentation for the next value based
  191. // on last encType and indent option. It also updates e.lastType to next.
  192. func (e *Encoder) prepareNext(next encType) {
  193. defer func() {
  194. e.lastType = next
  195. }()
  196. // Single line.
  197. if len(e.indent) == 0 {
  198. // Add space after each field before the next one.
  199. if e.lastType&(scalar|messageClose) != 0 && next == name {
  200. e.out = append(e.out, ' ')
  201. // Add a random extra space to make output unstable.
  202. if detrand.Bool() {
  203. e.out = append(e.out, ' ')
  204. }
  205. }
  206. return
  207. }
  208. // Multi-line.
  209. switch {
  210. case e.lastType == name:
  211. e.out = append(e.out, ' ')
  212. // Add a random extra space after name: to make output unstable.
  213. if detrand.Bool() {
  214. e.out = append(e.out, ' ')
  215. }
  216. case e.lastType == messageOpen && next != messageClose:
  217. e.indents = append(e.indents, e.indent...)
  218. e.out = append(e.out, '\n')
  219. e.out = append(e.out, e.indents...)
  220. case e.lastType&(scalar|messageClose) != 0:
  221. if next == messageClose {
  222. e.indents = e.indents[:len(e.indents)-len(e.indent)]
  223. }
  224. e.out = append(e.out, '\n')
  225. e.out = append(e.out, e.indents...)
  226. }
  227. }
  228. // Snapshot returns the current snapshot for use in Reset.
  229. func (e *Encoder) Snapshot() encoderState {
  230. return e.encoderState
  231. }
  232. // Reset resets the Encoder to the given encoderState from a Snapshot.
  233. func (e *Encoder) Reset(es encoderState) {
  234. e.encoderState = es
  235. }
  236. // AppendString appends the escaped form of the input string to b.
  237. func AppendString(b []byte, s string) []byte {
  238. return appendString(b, s, false)
  239. }