text_decode.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package proto
  5. import (
  6. "encoding"
  7. "errors"
  8. "fmt"
  9. "reflect"
  10. "strconv"
  11. "strings"
  12. "unicode/utf8"
  13. "google.golang.org/protobuf/encoding/prototext"
  14. protoV2 "google.golang.org/protobuf/proto"
  15. "google.golang.org/protobuf/reflect/protoreflect"
  16. "google.golang.org/protobuf/reflect/protoregistry"
  17. )
  18. const wrapTextUnmarshalV2 = false
  19. // ParseError is returned by UnmarshalText.
  20. type ParseError struct {
  21. Message string
  22. // Deprecated: Do not use.
  23. Line, Offset int
  24. }
  25. func (e *ParseError) Error() string {
  26. if wrapTextUnmarshalV2 {
  27. return e.Message
  28. }
  29. if e.Line == 1 {
  30. return fmt.Sprintf("line 1.%d: %v", e.Offset, e.Message)
  31. }
  32. return fmt.Sprintf("line %d: %v", e.Line, e.Message)
  33. }
  34. // UnmarshalText parses a proto text formatted string into m.
  35. func UnmarshalText(s string, m Message) error {
  36. if u, ok := m.(encoding.TextUnmarshaler); ok {
  37. return u.UnmarshalText([]byte(s))
  38. }
  39. m.Reset()
  40. mi := MessageV2(m)
  41. if wrapTextUnmarshalV2 {
  42. err := prototext.UnmarshalOptions{
  43. AllowPartial: true,
  44. }.Unmarshal([]byte(s), mi)
  45. if err != nil {
  46. return &ParseError{Message: err.Error()}
  47. }
  48. return checkRequiredNotSet(mi)
  49. } else {
  50. if err := newTextParser(s).unmarshalMessage(mi.ProtoReflect(), ""); err != nil {
  51. return err
  52. }
  53. return checkRequiredNotSet(mi)
  54. }
  55. }
  56. type textParser struct {
  57. s string // remaining input
  58. done bool // whether the parsing is finished (success or error)
  59. backed bool // whether back() was called
  60. offset, line int
  61. cur token
  62. }
  63. type token struct {
  64. value string
  65. err *ParseError
  66. line int // line number
  67. offset int // byte number from start of input, not start of line
  68. unquoted string // the unquoted version of value, if it was a quoted string
  69. }
  70. func newTextParser(s string) *textParser {
  71. p := new(textParser)
  72. p.s = s
  73. p.line = 1
  74. p.cur.line = 1
  75. return p
  76. }
  77. func (p *textParser) unmarshalMessage(m protoreflect.Message, terminator string) (err error) {
  78. md := m.Descriptor()
  79. fds := md.Fields()
  80. // A struct is a sequence of "name: value", terminated by one of
  81. // '>' or '}', or the end of the input. A name may also be
  82. // "[extension]" or "[type/url]".
  83. //
  84. // The whole struct can also be an expanded Any message, like:
  85. // [type/url] < ... struct contents ... >
  86. seen := make(map[protoreflect.FieldNumber]bool)
  87. for {
  88. tok := p.next()
  89. if tok.err != nil {
  90. return tok.err
  91. }
  92. if tok.value == terminator {
  93. break
  94. }
  95. if tok.value == "[" {
  96. if err := p.unmarshalExtensionOrAny(m, seen); err != nil {
  97. return err
  98. }
  99. continue
  100. }
  101. // This is a normal, non-extension field.
  102. name := protoreflect.Name(tok.value)
  103. fd := fds.ByName(name)
  104. switch {
  105. case fd == nil:
  106. gd := fds.ByName(protoreflect.Name(strings.ToLower(string(name))))
  107. if gd != nil && gd.Kind() == protoreflect.GroupKind && gd.Message().Name() == name {
  108. fd = gd
  109. }
  110. case fd.Kind() == protoreflect.GroupKind && fd.Message().Name() != name:
  111. fd = nil
  112. case fd.IsWeak() && fd.Message().IsPlaceholder():
  113. fd = nil
  114. }
  115. if fd == nil {
  116. typeName := string(md.FullName())
  117. if m, ok := m.Interface().(Message); ok {
  118. t := reflect.TypeOf(m)
  119. if t.Kind() == reflect.Ptr {
  120. typeName = t.Elem().String()
  121. }
  122. }
  123. return p.errorf("unknown field name %q in %v", name, typeName)
  124. }
  125. if od := fd.ContainingOneof(); od != nil && m.WhichOneof(od) != nil {
  126. return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, od.Name())
  127. }
  128. if fd.Cardinality() != protoreflect.Repeated && seen[fd.Number()] {
  129. return p.errorf("non-repeated field %q was repeated", fd.Name())
  130. }
  131. seen[fd.Number()] = true
  132. // Consume any colon.
  133. if err := p.checkForColon(fd); err != nil {
  134. return err
  135. }
  136. // Parse into the field.
  137. v := m.Get(fd)
  138. if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
  139. v = m.Mutable(fd)
  140. }
  141. if v, err = p.unmarshalValue(v, fd); err != nil {
  142. return err
  143. }
  144. m.Set(fd, v)
  145. if err := p.consumeOptionalSeparator(); err != nil {
  146. return err
  147. }
  148. }
  149. return nil
  150. }
  151. func (p *textParser) unmarshalExtensionOrAny(m protoreflect.Message, seen map[protoreflect.FieldNumber]bool) error {
  152. name, err := p.consumeExtensionOrAnyName()
  153. if err != nil {
  154. return err
  155. }
  156. // If it contains a slash, it's an Any type URL.
  157. if slashIdx := strings.LastIndex(name, "/"); slashIdx >= 0 {
  158. tok := p.next()
  159. if tok.err != nil {
  160. return tok.err
  161. }
  162. // consume an optional colon
  163. if tok.value == ":" {
  164. tok = p.next()
  165. if tok.err != nil {
  166. return tok.err
  167. }
  168. }
  169. var terminator string
  170. switch tok.value {
  171. case "<":
  172. terminator = ">"
  173. case "{":
  174. terminator = "}"
  175. default:
  176. return p.errorf("expected '{' or '<', found %q", tok.value)
  177. }
  178. mt, err := protoregistry.GlobalTypes.FindMessageByURL(name)
  179. if err != nil {
  180. return p.errorf("unrecognized message %q in google.protobuf.Any", name[slashIdx+len("/"):])
  181. }
  182. m2 := mt.New()
  183. if err := p.unmarshalMessage(m2, terminator); err != nil {
  184. return err
  185. }
  186. b, err := protoV2.Marshal(m2.Interface())
  187. if err != nil {
  188. return p.errorf("failed to marshal message of type %q: %v", name[slashIdx+len("/"):], err)
  189. }
  190. urlFD := m.Descriptor().Fields().ByName("type_url")
  191. valFD := m.Descriptor().Fields().ByName("value")
  192. if seen[urlFD.Number()] {
  193. return p.errorf("Any message unpacked multiple times, or %q already set", urlFD.Name())
  194. }
  195. if seen[valFD.Number()] {
  196. return p.errorf("Any message unpacked multiple times, or %q already set", valFD.Name())
  197. }
  198. m.Set(urlFD, protoreflect.ValueOfString(name))
  199. m.Set(valFD, protoreflect.ValueOfBytes(b))
  200. seen[urlFD.Number()] = true
  201. seen[valFD.Number()] = true
  202. return nil
  203. }
  204. xname := protoreflect.FullName(name)
  205. xt, _ := protoregistry.GlobalTypes.FindExtensionByName(xname)
  206. if xt == nil && isMessageSet(m.Descriptor()) {
  207. xt, _ = protoregistry.GlobalTypes.FindExtensionByName(xname.Append("message_set_extension"))
  208. }
  209. if xt == nil {
  210. return p.errorf("unrecognized extension %q", name)
  211. }
  212. fd := xt.TypeDescriptor()
  213. if fd.ContainingMessage().FullName() != m.Descriptor().FullName() {
  214. return p.errorf("extension field %q does not extend message %q", name, m.Descriptor().FullName())
  215. }
  216. if err := p.checkForColon(fd); err != nil {
  217. return err
  218. }
  219. v := m.Get(fd)
  220. if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
  221. v = m.Mutable(fd)
  222. }
  223. v, err = p.unmarshalValue(v, fd)
  224. if err != nil {
  225. return err
  226. }
  227. m.Set(fd, v)
  228. return p.consumeOptionalSeparator()
  229. }
  230. func (p *textParser) unmarshalValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
  231. tok := p.next()
  232. if tok.err != nil {
  233. return v, tok.err
  234. }
  235. if tok.value == "" {
  236. return v, p.errorf("unexpected EOF")
  237. }
  238. switch {
  239. case fd.IsList():
  240. lv := v.List()
  241. var err error
  242. if tok.value == "[" {
  243. // Repeated field with list notation, like [1,2,3].
  244. for {
  245. vv := lv.NewElement()
  246. vv, err = p.unmarshalSingularValue(vv, fd)
  247. if err != nil {
  248. return v, err
  249. }
  250. lv.Append(vv)
  251. tok := p.next()
  252. if tok.err != nil {
  253. return v, tok.err
  254. }
  255. if tok.value == "]" {
  256. break
  257. }
  258. if tok.value != "," {
  259. return v, p.errorf("Expected ']' or ',' found %q", tok.value)
  260. }
  261. }
  262. return v, nil
  263. }
  264. // One value of the repeated field.
  265. p.back()
  266. vv := lv.NewElement()
  267. vv, err = p.unmarshalSingularValue(vv, fd)
  268. if err != nil {
  269. return v, err
  270. }
  271. lv.Append(vv)
  272. return v, nil
  273. case fd.IsMap():
  274. // The map entry should be this sequence of tokens:
  275. // < key : KEY value : VALUE >
  276. // However, implementations may omit key or value, and technically
  277. // we should support them in any order.
  278. var terminator string
  279. switch tok.value {
  280. case "<":
  281. terminator = ">"
  282. case "{":
  283. terminator = "}"
  284. default:
  285. return v, p.errorf("expected '{' or '<', found %q", tok.value)
  286. }
  287. keyFD := fd.MapKey()
  288. valFD := fd.MapValue()
  289. mv := v.Map()
  290. kv := keyFD.Default()
  291. vv := mv.NewValue()
  292. for {
  293. tok := p.next()
  294. if tok.err != nil {
  295. return v, tok.err
  296. }
  297. if tok.value == terminator {
  298. break
  299. }
  300. var err error
  301. switch tok.value {
  302. case "key":
  303. if err := p.consumeToken(":"); err != nil {
  304. return v, err
  305. }
  306. if kv, err = p.unmarshalSingularValue(kv, keyFD); err != nil {
  307. return v, err
  308. }
  309. if err := p.consumeOptionalSeparator(); err != nil {
  310. return v, err
  311. }
  312. case "value":
  313. if err := p.checkForColon(valFD); err != nil {
  314. return v, err
  315. }
  316. if vv, err = p.unmarshalSingularValue(vv, valFD); err != nil {
  317. return v, err
  318. }
  319. if err := p.consumeOptionalSeparator(); err != nil {
  320. return v, err
  321. }
  322. default:
  323. p.back()
  324. return v, p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
  325. }
  326. }
  327. mv.Set(kv.MapKey(), vv)
  328. return v, nil
  329. default:
  330. p.back()
  331. return p.unmarshalSingularValue(v, fd)
  332. }
  333. }
  334. func (p *textParser) unmarshalSingularValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
  335. tok := p.next()
  336. if tok.err != nil {
  337. return v, tok.err
  338. }
  339. if tok.value == "" {
  340. return v, p.errorf("unexpected EOF")
  341. }
  342. switch fd.Kind() {
  343. case protoreflect.BoolKind:
  344. switch tok.value {
  345. case "true", "1", "t", "True":
  346. return protoreflect.ValueOfBool(true), nil
  347. case "false", "0", "f", "False":
  348. return protoreflect.ValueOfBool(false), nil
  349. }
  350. case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
  351. if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
  352. return protoreflect.ValueOfInt32(int32(x)), nil
  353. }
  354. // The C++ parser accepts large positive hex numbers that uses
  355. // two's complement arithmetic to represent negative numbers.
  356. // This feature is here for backwards compatibility with C++.
  357. if strings.HasPrefix(tok.value, "0x") {
  358. if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
  359. return protoreflect.ValueOfInt32(int32(-(int64(^x) + 1))), nil
  360. }
  361. }
  362. case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
  363. if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
  364. return protoreflect.ValueOfInt64(int64(x)), nil
  365. }
  366. // The C++ parser accepts large positive hex numbers that uses
  367. // two's complement arithmetic to represent negative numbers.
  368. // This feature is here for backwards compatibility with C++.
  369. if strings.HasPrefix(tok.value, "0x") {
  370. if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
  371. return protoreflect.ValueOfInt64(int64(-(int64(^x) + 1))), nil
  372. }
  373. }
  374. case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
  375. if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
  376. return protoreflect.ValueOfUint32(uint32(x)), nil
  377. }
  378. case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
  379. if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
  380. return protoreflect.ValueOfUint64(uint64(x)), nil
  381. }
  382. case protoreflect.FloatKind:
  383. // Ignore 'f' for compatibility with output generated by C++,
  384. // but don't remove 'f' when the value is "-inf" or "inf".
  385. v := tok.value
  386. if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
  387. v = v[:len(v)-len("f")]
  388. }
  389. if x, err := strconv.ParseFloat(v, 32); err == nil {
  390. return protoreflect.ValueOfFloat32(float32(x)), nil
  391. }
  392. case protoreflect.DoubleKind:
  393. // Ignore 'f' for compatibility with output generated by C++,
  394. // but don't remove 'f' when the value is "-inf" or "inf".
  395. v := tok.value
  396. if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
  397. v = v[:len(v)-len("f")]
  398. }
  399. if x, err := strconv.ParseFloat(v, 64); err == nil {
  400. return protoreflect.ValueOfFloat64(float64(x)), nil
  401. }
  402. case protoreflect.StringKind:
  403. if isQuote(tok.value[0]) {
  404. return protoreflect.ValueOfString(tok.unquoted), nil
  405. }
  406. case protoreflect.BytesKind:
  407. if isQuote(tok.value[0]) {
  408. return protoreflect.ValueOfBytes([]byte(tok.unquoted)), nil
  409. }
  410. case protoreflect.EnumKind:
  411. if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
  412. return protoreflect.ValueOfEnum(protoreflect.EnumNumber(x)), nil
  413. }
  414. vd := fd.Enum().Values().ByName(protoreflect.Name(tok.value))
  415. if vd != nil {
  416. return protoreflect.ValueOfEnum(vd.Number()), nil
  417. }
  418. case protoreflect.MessageKind, protoreflect.GroupKind:
  419. var terminator string
  420. switch tok.value {
  421. case "{":
  422. terminator = "}"
  423. case "<":
  424. terminator = ">"
  425. default:
  426. return v, p.errorf("expected '{' or '<', found %q", tok.value)
  427. }
  428. err := p.unmarshalMessage(v.Message(), terminator)
  429. return v, err
  430. default:
  431. panic(fmt.Sprintf("invalid kind %v", fd.Kind()))
  432. }
  433. return v, p.errorf("invalid %v: %v", fd.Kind(), tok.value)
  434. }
  435. // Consume a ':' from the input stream (if the next token is a colon),
  436. // returning an error if a colon is needed but not present.
  437. func (p *textParser) checkForColon(fd protoreflect.FieldDescriptor) *ParseError {
  438. tok := p.next()
  439. if tok.err != nil {
  440. return tok.err
  441. }
  442. if tok.value != ":" {
  443. if fd.Message() == nil {
  444. return p.errorf("expected ':', found %q", tok.value)
  445. }
  446. p.back()
  447. }
  448. return nil
  449. }
  450. // consumeExtensionOrAnyName consumes an extension name or an Any type URL and
  451. // the following ']'. It returns the name or URL consumed.
  452. func (p *textParser) consumeExtensionOrAnyName() (string, error) {
  453. tok := p.next()
  454. if tok.err != nil {
  455. return "", tok.err
  456. }
  457. // If extension name or type url is quoted, it's a single token.
  458. if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
  459. name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
  460. if err != nil {
  461. return "", err
  462. }
  463. return name, p.consumeToken("]")
  464. }
  465. // Consume everything up to "]"
  466. var parts []string
  467. for tok.value != "]" {
  468. parts = append(parts, tok.value)
  469. tok = p.next()
  470. if tok.err != nil {
  471. return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
  472. }
  473. if p.done && tok.value != "]" {
  474. return "", p.errorf("unclosed type_url or extension name")
  475. }
  476. }
  477. return strings.Join(parts, ""), nil
  478. }
  479. // consumeOptionalSeparator consumes an optional semicolon or comma.
  480. // It is used in unmarshalMessage to provide backward compatibility.
  481. func (p *textParser) consumeOptionalSeparator() error {
  482. tok := p.next()
  483. if tok.err != nil {
  484. return tok.err
  485. }
  486. if tok.value != ";" && tok.value != "," {
  487. p.back()
  488. }
  489. return nil
  490. }
  491. func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
  492. pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
  493. p.cur.err = pe
  494. p.done = true
  495. return pe
  496. }
  497. func (p *textParser) skipWhitespace() {
  498. i := 0
  499. for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
  500. if p.s[i] == '#' {
  501. // comment; skip to end of line or input
  502. for i < len(p.s) && p.s[i] != '\n' {
  503. i++
  504. }
  505. if i == len(p.s) {
  506. break
  507. }
  508. }
  509. if p.s[i] == '\n' {
  510. p.line++
  511. }
  512. i++
  513. }
  514. p.offset += i
  515. p.s = p.s[i:len(p.s)]
  516. if len(p.s) == 0 {
  517. p.done = true
  518. }
  519. }
  520. func (p *textParser) advance() {
  521. // Skip whitespace
  522. p.skipWhitespace()
  523. if p.done {
  524. return
  525. }
  526. // Start of non-whitespace
  527. p.cur.err = nil
  528. p.cur.offset, p.cur.line = p.offset, p.line
  529. p.cur.unquoted = ""
  530. switch p.s[0] {
  531. case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
  532. // Single symbol
  533. p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
  534. case '"', '\'':
  535. // Quoted string
  536. i := 1
  537. for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
  538. if p.s[i] == '\\' && i+1 < len(p.s) {
  539. // skip escaped char
  540. i++
  541. }
  542. i++
  543. }
  544. if i >= len(p.s) || p.s[i] != p.s[0] {
  545. p.errorf("unmatched quote")
  546. return
  547. }
  548. unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
  549. if err != nil {
  550. p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
  551. return
  552. }
  553. p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
  554. p.cur.unquoted = unq
  555. default:
  556. i := 0
  557. for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
  558. i++
  559. }
  560. if i == 0 {
  561. p.errorf("unexpected byte %#x", p.s[0])
  562. return
  563. }
  564. p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
  565. }
  566. p.offset += len(p.cur.value)
  567. }
  568. // Back off the parser by one token. Can only be done between calls to next().
  569. // It makes the next advance() a no-op.
  570. func (p *textParser) back() { p.backed = true }
  571. // Advances the parser and returns the new current token.
  572. func (p *textParser) next() *token {
  573. if p.backed || p.done {
  574. p.backed = false
  575. return &p.cur
  576. }
  577. p.advance()
  578. if p.done {
  579. p.cur.value = ""
  580. } else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
  581. // Look for multiple quoted strings separated by whitespace,
  582. // and concatenate them.
  583. cat := p.cur
  584. for {
  585. p.skipWhitespace()
  586. if p.done || !isQuote(p.s[0]) {
  587. break
  588. }
  589. p.advance()
  590. if p.cur.err != nil {
  591. return &p.cur
  592. }
  593. cat.value += " " + p.cur.value
  594. cat.unquoted += p.cur.unquoted
  595. }
  596. p.done = false // parser may have seen EOF, but we want to return cat
  597. p.cur = cat
  598. }
  599. return &p.cur
  600. }
  601. func (p *textParser) consumeToken(s string) error {
  602. tok := p.next()
  603. if tok.err != nil {
  604. return tok.err
  605. }
  606. if tok.value != s {
  607. p.back()
  608. return p.errorf("expected %q, found %q", s, tok.value)
  609. }
  610. return nil
  611. }
  612. var errBadUTF8 = errors.New("proto: bad UTF-8")
  613. func unquoteC(s string, quote rune) (string, error) {
  614. // This is based on C++'s tokenizer.cc.
  615. // Despite its name, this is *not* parsing C syntax.
  616. // For instance, "\0" is an invalid quoted string.
  617. // Avoid allocation in trivial cases.
  618. simple := true
  619. for _, r := range s {
  620. if r == '\\' || r == quote {
  621. simple = false
  622. break
  623. }
  624. }
  625. if simple {
  626. return s, nil
  627. }
  628. buf := make([]byte, 0, 3*len(s)/2)
  629. for len(s) > 0 {
  630. r, n := utf8.DecodeRuneInString(s)
  631. if r == utf8.RuneError && n == 1 {
  632. return "", errBadUTF8
  633. }
  634. s = s[n:]
  635. if r != '\\' {
  636. if r < utf8.RuneSelf {
  637. buf = append(buf, byte(r))
  638. } else {
  639. buf = append(buf, string(r)...)
  640. }
  641. continue
  642. }
  643. ch, tail, err := unescape(s)
  644. if err != nil {
  645. return "", err
  646. }
  647. buf = append(buf, ch...)
  648. s = tail
  649. }
  650. return string(buf), nil
  651. }
  652. func unescape(s string) (ch string, tail string, err error) {
  653. r, n := utf8.DecodeRuneInString(s)
  654. if r == utf8.RuneError && n == 1 {
  655. return "", "", errBadUTF8
  656. }
  657. s = s[n:]
  658. switch r {
  659. case 'a':
  660. return "\a", s, nil
  661. case 'b':
  662. return "\b", s, nil
  663. case 'f':
  664. return "\f", s, nil
  665. case 'n':
  666. return "\n", s, nil
  667. case 'r':
  668. return "\r", s, nil
  669. case 't':
  670. return "\t", s, nil
  671. case 'v':
  672. return "\v", s, nil
  673. case '?':
  674. return "?", s, nil // trigraph workaround
  675. case '\'', '"', '\\':
  676. return string(r), s, nil
  677. case '0', '1', '2', '3', '4', '5', '6', '7':
  678. if len(s) < 2 {
  679. return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
  680. }
  681. ss := string(r) + s[:2]
  682. s = s[2:]
  683. i, err := strconv.ParseUint(ss, 8, 8)
  684. if err != nil {
  685. return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
  686. }
  687. return string([]byte{byte(i)}), s, nil
  688. case 'x', 'X', 'u', 'U':
  689. var n int
  690. switch r {
  691. case 'x', 'X':
  692. n = 2
  693. case 'u':
  694. n = 4
  695. case 'U':
  696. n = 8
  697. }
  698. if len(s) < n {
  699. return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
  700. }
  701. ss := s[:n]
  702. s = s[n:]
  703. i, err := strconv.ParseUint(ss, 16, 64)
  704. if err != nil {
  705. return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
  706. }
  707. if r == 'x' || r == 'X' {
  708. return string([]byte{byte(i)}), s, nil
  709. }
  710. if i > utf8.MaxRune {
  711. return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
  712. }
  713. return string(rune(i)), s, nil
  714. }
  715. return "", "", fmt.Errorf(`unknown escape \%c`, r)
  716. }
  717. func isIdentOrNumberChar(c byte) bool {
  718. switch {
  719. case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
  720. return true
  721. case '0' <= c && c <= '9':
  722. return true
  723. }
  724. switch c {
  725. case '-', '+', '.', '_':
  726. return true
  727. }
  728. return false
  729. }
  730. func isWhitespace(c byte) bool {
  731. switch c {
  732. case ' ', '\t', '\n', '\r':
  733. return true
  734. }
  735. return false
  736. }
  737. func isQuote(c byte) bool {
  738. switch c {
  739. case '"', '\'':
  740. return true
  741. }
  742. return false
  743. }