123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270 |
- // Copyright 2018 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package text
- import (
- "math"
- "math/bits"
- "strconv"
- "strings"
- "unicode/utf8"
- "google.golang.org/protobuf/internal/detrand"
- "google.golang.org/protobuf/internal/errors"
- )
- // encType represents an encoding type.
- type encType uint8
- const (
- _ encType = (1 << iota) / 2
- name
- scalar
- messageOpen
- messageClose
- )
- // Encoder provides methods to write out textproto constructs and values. The user is
- // responsible for producing valid sequences of constructs and values.
- type Encoder struct {
- encoderState
- indent string
- delims [2]byte
- outputASCII bool
- }
- type encoderState struct {
- lastType encType
- indents []byte
- out []byte
- }
- // NewEncoder returns an Encoder.
- //
- // If indent is a non-empty string, it causes every entry in a List or Message
- // to be preceded by the indent and trailed by a newline.
- //
- // If delims is not the zero value, it controls the delimiter characters used
- // for messages (e.g., "{}" vs "<>").
- //
- // If outputASCII is true, strings will be serialized in such a way that
- // multi-byte UTF-8 sequences are escaped. This property ensures that the
- // overall output is ASCII (as opposed to UTF-8).
- func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
- e := &Encoder{}
- if len(indent) > 0 {
- if strings.Trim(indent, " \t") != "" {
- return nil, errors.New("indent may only be composed of space and tab characters")
- }
- e.indent = indent
- }
- switch delims {
- case [2]byte{0, 0}:
- e.delims = [2]byte{'{', '}'}
- case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
- e.delims = delims
- default:
- return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
- }
- e.outputASCII = outputASCII
- return e, nil
- }
- // Bytes returns the content of the written bytes.
- func (e *Encoder) Bytes() []byte {
- return e.out
- }
- // StartMessage writes out the '{' or '<' symbol.
- func (e *Encoder) StartMessage() {
- e.prepareNext(messageOpen)
- e.out = append(e.out, e.delims[0])
- }
- // EndMessage writes out the '}' or '>' symbol.
- func (e *Encoder) EndMessage() {
- e.prepareNext(messageClose)
- e.out = append(e.out, e.delims[1])
- }
- // WriteName writes out the field name and the separator ':'.
- func (e *Encoder) WriteName(s string) {
- e.prepareNext(name)
- e.out = append(e.out, s...)
- e.out = append(e.out, ':')
- }
- // WriteBool writes out the given boolean value.
- func (e *Encoder) WriteBool(b bool) {
- if b {
- e.WriteLiteral("true")
- } else {
- e.WriteLiteral("false")
- }
- }
- // WriteString writes out the given string value.
- func (e *Encoder) WriteString(s string) {
- e.prepareNext(scalar)
- e.out = appendString(e.out, s, e.outputASCII)
- }
- func appendString(out []byte, in string, outputASCII bool) []byte {
- out = append(out, '"')
- i := indexNeedEscapeInString(in)
- in, out = in[i:], append(out, in[:i]...)
- for len(in) > 0 {
- switch r, n := utf8.DecodeRuneInString(in); {
- case r == utf8.RuneError && n == 1:
- // We do not report invalid UTF-8 because strings in the text format
- // are used to represent both the proto string and bytes type.
- r = rune(in[0])
- fallthrough
- case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
- out = append(out, '\\')
- switch r {
- case '"', '\\':
- out = append(out, byte(r))
- case '\n':
- out = append(out, 'n')
- case '\r':
- out = append(out, 'r')
- case '\t':
- out = append(out, 't')
- default:
- out = append(out, 'x')
- out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
- out = strconv.AppendUint(out, uint64(r), 16)
- }
- in = in[n:]
- case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
- out = append(out, '\\')
- if r <= math.MaxUint16 {
- out = append(out, 'u')
- out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
- out = strconv.AppendUint(out, uint64(r), 16)
- } else {
- out = append(out, 'U')
- out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
- out = strconv.AppendUint(out, uint64(r), 16)
- }
- in = in[n:]
- default:
- i := indexNeedEscapeInString(in[n:])
- in, out = in[n+i:], append(out, in[:n+i]...)
- }
- }
- out = append(out, '"')
- return out
- }
- // indexNeedEscapeInString returns the index of the character that needs
- // escaping. If no characters need escaping, this returns the input length.
- func indexNeedEscapeInString(s string) int {
- for i := 0; i < len(s); i++ {
- if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
- return i
- }
- }
- return len(s)
- }
- // WriteFloat writes out the given float value for given bitSize.
- func (e *Encoder) WriteFloat(n float64, bitSize int) {
- e.prepareNext(scalar)
- e.out = appendFloat(e.out, n, bitSize)
- }
- func appendFloat(out []byte, n float64, bitSize int) []byte {
- switch {
- case math.IsNaN(n):
- return append(out, "nan"...)
- case math.IsInf(n, +1):
- return append(out, "inf"...)
- case math.IsInf(n, -1):
- return append(out, "-inf"...)
- default:
- return strconv.AppendFloat(out, n, 'g', -1, bitSize)
- }
- }
- // WriteInt writes out the given signed integer value.
- func (e *Encoder) WriteInt(n int64) {
- e.prepareNext(scalar)
- e.out = append(e.out, strconv.FormatInt(n, 10)...)
- }
- // WriteUint writes out the given unsigned integer value.
- func (e *Encoder) WriteUint(n uint64) {
- e.prepareNext(scalar)
- e.out = append(e.out, strconv.FormatUint(n, 10)...)
- }
- // WriteLiteral writes out the given string as a literal value without quotes.
- // This is used for writing enum literal strings.
- func (e *Encoder) WriteLiteral(s string) {
- e.prepareNext(scalar)
- e.out = append(e.out, s...)
- }
- // prepareNext adds possible space and indentation for the next value based
- // on last encType and indent option. It also updates e.lastType to next.
- func (e *Encoder) prepareNext(next encType) {
- defer func() {
- e.lastType = next
- }()
- // Single line.
- if len(e.indent) == 0 {
- // Add space after each field before the next one.
- if e.lastType&(scalar|messageClose) != 0 && next == name {
- e.out = append(e.out, ' ')
- // Add a random extra space to make output unstable.
- if detrand.Bool() {
- e.out = append(e.out, ' ')
- }
- }
- return
- }
- // Multi-line.
- switch {
- case e.lastType == name:
- e.out = append(e.out, ' ')
- // Add a random extra space after name: to make output unstable.
- if detrand.Bool() {
- e.out = append(e.out, ' ')
- }
- case e.lastType == messageOpen && next != messageClose:
- e.indents = append(e.indents, e.indent...)
- e.out = append(e.out, '\n')
- e.out = append(e.out, e.indents...)
- case e.lastType&(scalar|messageClose) != 0:
- if next == messageClose {
- e.indents = e.indents[:len(e.indents)-len(e.indent)]
- }
- e.out = append(e.out, '\n')
- e.out = append(e.out, e.indents...)
- }
- }
- // Snapshot returns the current snapshot for use in Reset.
- func (e *Encoder) Snapshot() encoderState {
- return e.encoderState
- }
- // Reset resets the Encoder to the given encoderState from a Snapshot.
- func (e *Encoder) Reset(es encoderState) {
- e.encoderState = es
- }
- // AppendString appends the escaped form of the input string to b.
- func AppendString(b []byte, s string) []byte {
- return appendString(b, s, false)
- }
|