decoder.go 17 KB


  1. package maxminddb
  2. import (
  3. "encoding/binary"
  4. "math"
  5. "math/big"
  6. "reflect"
  7. "sync"
  8. )
  9. type decoder struct {
  10. buffer []byte
  11. }
  12. type dataType int
  13. const (
  14. _Extended dataType = iota
  15. _Pointer
  16. _String
  17. _Float64
  18. _Bytes
  19. _Uint16
  20. _Uint32
  21. _Map
  22. _Int32
  23. _Uint64
  24. _Uint128
  25. _Slice
  26. // We don't use the next two. They are placeholders. See the spec
  27. // for more details.
  28. _Container // nolint: deadcode, varcheck
  29. _Marker // nolint: deadcode, varcheck
  30. _Bool
  31. _Float32
  32. )
  33. const (
  34. // This is the value used in libmaxminddb
  35. maximumDataStructureDepth = 512
  36. )
  37. func (d *decoder) decode(offset uint, result reflect.Value, depth int) (uint, error) {
  38. if depth > maximumDataStructureDepth {
  39. return 0, newInvalidDatabaseError("exceeded maximum data structure depth; database is likely corrupt")
  40. }
  41. typeNum, size, newOffset, err := d.decodeCtrlData(offset)
  42. if err != nil {
  43. return 0, err
  44. }
  45. if typeNum != _Pointer && result.Kind() == reflect.Uintptr {
  46. result.Set(reflect.ValueOf(uintptr(offset)))
  47. return d.nextValueOffset(offset, 1)
  48. }
  49. return d.decodeFromType(typeNum, size, newOffset, result, depth+1)
  50. }
  51. func (d *decoder) decodeCtrlData(offset uint) (dataType, uint, uint, error) {
  52. newOffset := offset + 1
  53. if offset >= uint(len(d.buffer)) {
  54. return 0, 0, 0, newOffsetError()
  55. }
  56. ctrlByte := d.buffer[offset]
  57. typeNum := dataType(ctrlByte >> 5)
  58. if typeNum == _Extended {
  59. if newOffset >= uint(len(d.buffer)) {
  60. return 0, 0, 0, newOffsetError()
  61. }
  62. typeNum = dataType(d.buffer[newOffset] + 7)
  63. newOffset++
  64. }
  65. var size uint
  66. size, newOffset, err := d.sizeFromCtrlByte(ctrlByte, newOffset, typeNum)
  67. return typeNum, size, newOffset, err
  68. }
  69. func (d *decoder) sizeFromCtrlByte(ctrlByte byte, offset uint, typeNum dataType) (uint, uint, error) {
  70. size := uint(ctrlByte & 0x1f)
  71. if typeNum == _Extended {
  72. return size, offset, nil
  73. }
  74. var bytesToRead uint
  75. if size < 29 {
  76. return size, offset, nil
  77. }
  78. bytesToRead = size - 28
  79. newOffset := offset + bytesToRead
  80. if newOffset > uint(len(d.buffer)) {
  81. return 0, 0, newOffsetError()
  82. }
  83. if size == 29 {
  84. return 29 + uint(d.buffer[offset]), offset + 1, nil
  85. }
  86. sizeBytes := d.buffer[offset:newOffset]
  87. switch {
  88. case size == 30:
  89. size = 285 + uintFromBytes(0, sizeBytes)
  90. case size > 30:
  91. size = uintFromBytes(0, sizeBytes) + 65821
  92. }
  93. return size, newOffset, nil
  94. }
  95. func (d *decoder) decodeFromType(
  96. dtype dataType,
  97. size uint,
  98. offset uint,
  99. result reflect.Value,
  100. depth int,
  101. ) (uint, error) {
  102. result = d.indirect(result)
  103. // For these types, size has a special meaning
  104. switch dtype {
  105. case _Bool:
  106. return d.unmarshalBool(size, offset, result)
  107. case _Map:
  108. return d.unmarshalMap(size, offset, result, depth)
  109. case _Pointer:
  110. return d.unmarshalPointer(size, offset, result, depth)
  111. case _Slice:
  112. return d.unmarshalSlice(size, offset, result, depth)
  113. }
  114. // For the remaining types, size is the byte size
  115. if offset+size > uint(len(d.buffer)) {
  116. return 0, newOffsetError()
  117. }
  118. switch dtype {
  119. case _Bytes:
  120. return d.unmarshalBytes(size, offset, result)
  121. case _Float32:
  122. return d.unmarshalFloat32(size, offset, result)
  123. case _Float64:
  124. return d.unmarshalFloat64(size, offset, result)
  125. case _Int32:
  126. return d.unmarshalInt32(size, offset, result)
  127. case _String:
  128. return d.unmarshalString(size, offset, result)
  129. case _Uint16:
  130. return d.unmarshalUint(size, offset, result, 16)
  131. case _Uint32:
  132. return d.unmarshalUint(size, offset, result, 32)
  133. case _Uint64:
  134. return d.unmarshalUint(size, offset, result, 64)
  135. case _Uint128:
  136. return d.unmarshalUint128(size, offset, result)
  137. default:
  138. return 0, newInvalidDatabaseError("unknown type: %d", dtype)
  139. }
  140. }
  141. func (d *decoder) unmarshalBool(size uint, offset uint, result reflect.Value) (uint, error) {
  142. if size > 1 {
  143. return 0, newInvalidDatabaseError("the MaxMind DB file's data section contains bad data (bool size of %v)", size)
  144. }
  145. value, newOffset := d.decodeBool(size, offset)
  146. switch result.Kind() {
  147. case reflect.Bool:
  148. result.SetBool(value)
  149. return newOffset, nil
  150. case reflect.Interface:
  151. if result.NumMethod() == 0 {
  152. result.Set(reflect.ValueOf(value))
  153. return newOffset, nil
  154. }
  155. }
  156. return newOffset, newUnmarshalTypeError(value, result.Type())
  157. }
  158. // indirect follows pointers and create values as necessary. This is
  159. // heavily based on encoding/json as my original version had a subtle
  160. // bug. This method should be considered to be licensed under
  161. // https://golang.org/LICENSE
  162. func (d *decoder) indirect(result reflect.Value) reflect.Value {
  163. for {
  164. // Load value from interface, but only if the result will be
  165. // usefully addressable.
  166. if result.Kind() == reflect.Interface && !result.IsNil() {
  167. e := result.Elem()
  168. if e.Kind() == reflect.Ptr && !e.IsNil() {
  169. result = e
  170. continue
  171. }
  172. }
  173. if result.Kind() != reflect.Ptr {
  174. break
  175. }
  176. if result.IsNil() {
  177. result.Set(reflect.New(result.Type().Elem()))
  178. }
  179. result = result.Elem()
  180. }
  181. return result
  182. }
  183. var sliceType = reflect.TypeOf([]byte{})
  184. func (d *decoder) unmarshalBytes(size uint, offset uint, result reflect.Value) (uint, error) {
  185. value, newOffset := d.decodeBytes(size, offset)
  186. switch result.Kind() {
  187. case reflect.Slice:
  188. if result.Type() == sliceType {
  189. result.SetBytes(value)
  190. return newOffset, nil
  191. }
  192. case reflect.Interface:
  193. if result.NumMethod() == 0 {
  194. result.Set(reflect.ValueOf(value))
  195. return newOffset, nil
  196. }
  197. }
  198. return newOffset, newUnmarshalTypeError(value, result.Type())
  199. }
  200. func (d *decoder) unmarshalFloat32(size uint, offset uint, result reflect.Value) (uint, error) {
  201. if size != 4 {
  202. return 0, newInvalidDatabaseError("the MaxMind DB file's data section contains bad data (float32 size of %v)", size)
  203. }
  204. value, newOffset := d.decodeFloat32(size, offset)
  205. switch result.Kind() {
  206. case reflect.Float32, reflect.Float64:
  207. result.SetFloat(float64(value))
  208. return newOffset, nil
  209. case reflect.Interface:
  210. if result.NumMethod() == 0 {
  211. result.Set(reflect.ValueOf(value))
  212. return newOffset, nil
  213. }
  214. }
  215. return newOffset, newUnmarshalTypeError(value, result.Type())
  216. }
  217. func (d *decoder) unmarshalFloat64(size uint, offset uint, result reflect.Value) (uint, error) {
  218. if size != 8 {
  219. return 0, newInvalidDatabaseError("the MaxMind DB file's data section contains bad data (float 64 size of %v)", size)
  220. }
  221. value, newOffset := d.decodeFloat64(size, offset)
  222. switch result.Kind() {
  223. case reflect.Float32, reflect.Float64:
  224. if result.OverflowFloat(value) {
  225. return 0, newUnmarshalTypeError(value, result.Type())
  226. }
  227. result.SetFloat(value)
  228. return newOffset, nil
  229. case reflect.Interface:
  230. if result.NumMethod() == 0 {
  231. result.Set(reflect.ValueOf(value))
  232. return newOffset, nil
  233. }
  234. }
  235. return newOffset, newUnmarshalTypeError(value, result.Type())
  236. }
  237. func (d *decoder) unmarshalInt32(size uint, offset uint, result reflect.Value) (uint, error) {
  238. if size > 4 {
  239. return 0, newInvalidDatabaseError("the MaxMind DB file's data section contains bad data (int32 size of %v)", size)
  240. }
  241. value, newOffset := d.decodeInt(size, offset)
  242. switch result.Kind() {
  243. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  244. n := int64(value)
  245. if !result.OverflowInt(n) {
  246. result.SetInt(n)
  247. return newOffset, nil
  248. }
  249. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  250. n := uint64(value)
  251. if !result.OverflowUint(n) {
  252. result.SetUint(n)
  253. return newOffset, nil
  254. }
  255. case reflect.Interface:
  256. if result.NumMethod() == 0 {
  257. result.Set(reflect.ValueOf(value))
  258. return newOffset, nil
  259. }
  260. }
  261. return newOffset, newUnmarshalTypeError(value, result.Type())
  262. }
  263. func (d *decoder) unmarshalMap(
  264. size uint,
  265. offset uint,
  266. result reflect.Value,
  267. depth int,
  268. ) (uint, error) {
  269. result = d.indirect(result)
  270. switch result.Kind() {
  271. default:
  272. return 0, newUnmarshalTypeError("map", result.Type())
  273. case reflect.Struct:
  274. return d.decodeStruct(size, offset, result, depth)
  275. case reflect.Map:
  276. return d.decodeMap(size, offset, result, depth)
  277. case reflect.Interface:
  278. if result.NumMethod() == 0 {
  279. rv := reflect.ValueOf(make(map[string]interface{}, size))
  280. newOffset, err := d.decodeMap(size, offset, rv, depth)
  281. result.Set(rv)
  282. return newOffset, err
  283. }
  284. return 0, newUnmarshalTypeError("map", result.Type())
  285. }
  286. }
  287. func (d *decoder) unmarshalPointer(size uint, offset uint, result reflect.Value, depth int) (uint, error) {
  288. pointer, newOffset, err := d.decodePointer(size, offset)
  289. if err != nil {
  290. return 0, err
  291. }
  292. _, err = d.decode(pointer, result, depth)
  293. return newOffset, err
  294. }
  295. func (d *decoder) unmarshalSlice(
  296. size uint,
  297. offset uint,
  298. result reflect.Value,
  299. depth int,
  300. ) (uint, error) {
  301. switch result.Kind() {
  302. case reflect.Slice:
  303. return d.decodeSlice(size, offset, result, depth)
  304. case reflect.Interface:
  305. if result.NumMethod() == 0 {
  306. a := []interface{}{}
  307. rv := reflect.ValueOf(&a).Elem()
  308. newOffset, err := d.decodeSlice(size, offset, rv, depth)
  309. result.Set(rv)
  310. return newOffset, err
  311. }
  312. }
  313. return 0, newUnmarshalTypeError("array", result.Type())
  314. }
  315. func (d *decoder) unmarshalString(size uint, offset uint, result reflect.Value) (uint, error) {
  316. value, newOffset := d.decodeString(size, offset)
  317. switch result.Kind() {
  318. case reflect.String:
  319. result.SetString(value)
  320. return newOffset, nil
  321. case reflect.Interface:
  322. if result.NumMethod() == 0 {
  323. result.Set(reflect.ValueOf(value))
  324. return newOffset, nil
  325. }
  326. }
  327. return newOffset, newUnmarshalTypeError(value, result.Type())
  328. }
  329. func (d *decoder) unmarshalUint(size uint, offset uint, result reflect.Value, uintType uint) (uint, error) {
  330. if size > uintType/8 {
  331. return 0, newInvalidDatabaseError("the MaxMind DB file's data section contains bad data (uint%v size of %v)", uintType, size)
  332. }
  333. value, newOffset := d.decodeUint(size, offset)
  334. switch result.Kind() {
  335. case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
  336. n := int64(value)
  337. if !result.OverflowInt(n) {
  338. result.SetInt(n)
  339. return newOffset, nil
  340. }
  341. case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
  342. if !result.OverflowUint(value) {
  343. result.SetUint(value)
  344. return newOffset, nil
  345. }
  346. case reflect.Interface:
  347. if result.NumMethod() == 0 {
  348. result.Set(reflect.ValueOf(value))
  349. return newOffset, nil
  350. }
  351. }
  352. return newOffset, newUnmarshalTypeError(value, result.Type())
  353. }
  354. var bigIntType = reflect.TypeOf(big.Int{})
  355. func (d *decoder) unmarshalUint128(size uint, offset uint, result reflect.Value) (uint, error) {
  356. if size > 16 {
  357. return 0, newInvalidDatabaseError("the MaxMind DB file's data section contains bad data (uint128 size of %v)", size)
  358. }
  359. value, newOffset := d.decodeUint128(size, offset)
  360. switch result.Kind() {
  361. case reflect.Struct:
  362. if result.Type() == bigIntType {
  363. result.Set(reflect.ValueOf(*value))
  364. return newOffset, nil
  365. }
  366. case reflect.Interface:
  367. if result.NumMethod() == 0 {
  368. result.Set(reflect.ValueOf(value))
  369. return newOffset, nil
  370. }
  371. }
  372. return newOffset, newUnmarshalTypeError(value, result.Type())
  373. }
  374. func (d *decoder) decodeBool(size uint, offset uint) (bool, uint) {
  375. return size != 0, offset
  376. }
  377. func (d *decoder) decodeBytes(size uint, offset uint) ([]byte, uint) {
  378. newOffset := offset + size
  379. bytes := make([]byte, size)
  380. copy(bytes, d.buffer[offset:newOffset])
  381. return bytes, newOffset
  382. }
  383. func (d *decoder) decodeFloat64(size uint, offset uint) (float64, uint) {
  384. newOffset := offset + size
  385. bits := binary.BigEndian.Uint64(d.buffer[offset:newOffset])
  386. return math.Float64frombits(bits), newOffset
  387. }
  388. func (d *decoder) decodeFloat32(size uint, offset uint) (float32, uint) {
  389. newOffset := offset + size
  390. bits := binary.BigEndian.Uint32(d.buffer[offset:newOffset])
  391. return math.Float32frombits(bits), newOffset
  392. }
  393. func (d *decoder) decodeInt(size uint, offset uint) (int, uint) {
  394. newOffset := offset + size
  395. var val int32
  396. for _, b := range d.buffer[offset:newOffset] {
  397. val = (val << 8) | int32(b)
  398. }
  399. return int(val), newOffset
  400. }
  401. func (d *decoder) decodeMap(
  402. size uint,
  403. offset uint,
  404. result reflect.Value,
  405. depth int,
  406. ) (uint, error) {
  407. if result.IsNil() {
  408. result.Set(reflect.MakeMapWithSize(result.Type(), int(size)))
  409. }
  410. mapType := result.Type()
  411. keyValue := reflect.New(mapType.Key()).Elem()
  412. elemType := mapType.Elem()
  413. elemKind := elemType.Kind()
  414. var elemValue reflect.Value
  415. for i := uint(0); i < size; i++ {
  416. var key []byte
  417. var err error
  418. key, offset, err = d.decodeKey(offset)
  419. if err != nil {
  420. return 0, err
  421. }
  422. if !elemValue.IsValid() || elemKind == reflect.Interface {
  423. elemValue = reflect.New(elemType).Elem()
  424. }
  425. offset, err = d.decode(offset, elemValue, depth)
  426. if err != nil {
  427. return 0, err
  428. }
  429. keyValue.SetString(string(key))
  430. result.SetMapIndex(keyValue, elemValue)
  431. }
  432. return offset, nil
  433. }
  434. func (d *decoder) decodePointer(
  435. size uint,
  436. offset uint,
  437. ) (uint, uint, error) {
  438. pointerSize := ((size >> 3) & 0x3) + 1
  439. newOffset := offset + pointerSize
  440. if newOffset > uint(len(d.buffer)) {
  441. return 0, 0, newOffsetError()
  442. }
  443. pointerBytes := d.buffer[offset:newOffset]
  444. var prefix uint
  445. if pointerSize == 4 {
  446. prefix = 0
  447. } else {
  448. prefix = size & 0x7
  449. }
  450. unpacked := uintFromBytes(prefix, pointerBytes)
  451. var pointerValueOffset uint
  452. switch pointerSize {
  453. case 1:
  454. pointerValueOffset = 0
  455. case 2:
  456. pointerValueOffset = 2048
  457. case 3:
  458. pointerValueOffset = 526336
  459. case 4:
  460. pointerValueOffset = 0
  461. }
  462. pointer := unpacked + pointerValueOffset
  463. return pointer, newOffset, nil
  464. }
  465. func (d *decoder) decodeSlice(
  466. size uint,
  467. offset uint,
  468. result reflect.Value,
  469. depth int,
  470. ) (uint, error) {
  471. result.Set(reflect.MakeSlice(result.Type(), int(size), int(size)))
  472. for i := 0; i < int(size); i++ {
  473. var err error
  474. offset, err = d.decode(offset, result.Index(i), depth)
  475. if err != nil {
  476. return 0, err
  477. }
  478. }
  479. return offset, nil
  480. }
  481. func (d *decoder) decodeString(size uint, offset uint) (string, uint) {
  482. newOffset := offset + size
  483. return string(d.buffer[offset:newOffset]), newOffset
  484. }
  485. func (d *decoder) decodeStruct(
  486. size uint,
  487. offset uint,
  488. result reflect.Value,
  489. depth int,
  490. ) (uint, error) {
  491. fields := cachedFields(result)
  492. // This fills in embedded structs
  493. for _, i := range fields.anonymousFields {
  494. _, err := d.unmarshalMap(size, offset, result.Field(i), depth)
  495. if err != nil {
  496. return 0, err
  497. }
  498. }
  499. // This handles named fields
  500. for i := uint(0); i < size; i++ {
  501. var (
  502. err error
  503. key []byte
  504. )
  505. key, offset, err = d.decodeKey(offset)
  506. if err != nil {
  507. return 0, err
  508. }
  509. // The string() does not create a copy due to this compiler
  510. // optimization: https://github.com/golang/go/issues/3512
  511. j, ok := fields.namedFields[string(key)]
  512. if !ok {
  513. offset, err = d.nextValueOffset(offset, 1)
  514. if err != nil {
  515. return 0, err
  516. }
  517. continue
  518. }
  519. offset, err = d.decode(offset, result.Field(j), depth)
  520. if err != nil {
  521. return 0, err
  522. }
  523. }
  524. return offset, nil
  525. }
  526. type fieldsType struct {
  527. namedFields map[string]int
  528. anonymousFields []int
  529. }
  530. var fieldsMap sync.Map
  531. func cachedFields(result reflect.Value) *fieldsType {
  532. resultType := result.Type()
  533. if fields, ok := fieldsMap.Load(resultType); ok {
  534. return fields.(*fieldsType)
  535. }
  536. numFields := resultType.NumField()
  537. namedFields := make(map[string]int, numFields)
  538. var anonymous []int
  539. for i := 0; i < numFields; i++ {
  540. field := resultType.Field(i)
  541. fieldName := field.Name
  542. if tag := field.Tag.Get("maxminddb"); tag != "" {
  543. if tag == "-" {
  544. continue
  545. }
  546. fieldName = tag
  547. }
  548. if field.Anonymous {
  549. anonymous = append(anonymous, i)
  550. continue
  551. }
  552. namedFields[fieldName] = i
  553. }
  554. fields := &fieldsType{namedFields, anonymous}
  555. fieldsMap.Store(resultType, fields)
  556. return fields
  557. }
  558. func (d *decoder) decodeUint(size uint, offset uint) (uint64, uint) {
  559. newOffset := offset + size
  560. bytes := d.buffer[offset:newOffset]
  561. var val uint64
  562. for _, b := range bytes {
  563. val = (val << 8) | uint64(b)
  564. }
  565. return val, newOffset
  566. }
  567. func (d *decoder) decodeUint128(size uint, offset uint) (*big.Int, uint) {
  568. newOffset := offset + size
  569. val := new(big.Int)
  570. val.SetBytes(d.buffer[offset:newOffset])
  571. return val, newOffset
  572. }
  573. func uintFromBytes(prefix uint, uintBytes []byte) uint {
  574. val := prefix
  575. for _, b := range uintBytes {
  576. val = (val << 8) | uint(b)
  577. }
  578. return val
  579. }
  580. // decodeKey decodes a map key into []byte slice. We use a []byte so that we
  581. // can take advantage of https://github.com/golang/go/issues/3512 to avoid
  582. // copying the bytes when decoding a struct. Previously, we achieved this by
  583. // using unsafe.
  584. func (d *decoder) decodeKey(offset uint) ([]byte, uint, error) {
  585. typeNum, size, dataOffset, err := d.decodeCtrlData(offset)
  586. if err != nil {
  587. return nil, 0, err
  588. }
  589. if typeNum == _Pointer {
  590. pointer, ptrOffset, err := d.decodePointer(size, dataOffset)
  591. if err != nil {
  592. return nil, 0, err
  593. }
  594. key, _, err := d.decodeKey(pointer)
  595. return key, ptrOffset, err
  596. }
  597. if typeNum != _String {
  598. return nil, 0, newInvalidDatabaseError("unexpected type when decoding string: %v", typeNum)
  599. }
  600. newOffset := dataOffset + size
  601. if newOffset > uint(len(d.buffer)) {
  602. return nil, 0, newOffsetError()
  603. }
  604. return d.buffer[dataOffset:newOffset], newOffset, nil
  605. }
  606. // This function is used to skip ahead to the next value without decoding
  607. // the one at the offset passed in. The size bits have different meanings for
  608. // different data types
  609. func (d *decoder) nextValueOffset(offset uint, numberToSkip uint) (uint, error) {
  610. if numberToSkip == 0 {
  611. return offset, nil
  612. }
  613. typeNum, size, offset, err := d.decodeCtrlData(offset)
  614. if err != nil {
  615. return 0, err
  616. }
  617. switch typeNum {
  618. case _Pointer:
  619. _, offset, err = d.decodePointer(size, offset)
  620. if err != nil {
  621. return 0, err
  622. }
  623. case _Map:
  624. numberToSkip += 2 * size
  625. case _Slice:
  626. numberToSkip += size
  627. case _Bool:
  628. default:
  629. offset += size
  630. }
  631. return d.nextValueOffset(offset, numberToSkip-1)
  632. }