# Focused unit tests for the canonicaliser's divergence-prone rules.
# No non-ASCII literals appear in this source file; Unicode inputs are built
# from code points so the file's own encoding cannot perturb them.

test_that("keys are sorted by Unicode code point", {
  expect_equal(rawToChar(atx_canonicalize('{"b":1,"a":2,"c":3}')),
               '{"a":2,"b":1,"c":3}')
})

test_that("no whitespace and compact separators", {
  expect_equal(rawToChar(atx_canonicalize('{ "a" : [1, 2 , 3] }')),
               '{"a":[1,2,3]}')
})

test_that("whole-number floats collapse to integers", {
  out <- rawToChar(atx_canonicalize('{"x":1.0,"y":2.0,"z":1.5}'))
  expect_equal(out, '{"x":1,"y":2,"z":1.5}')
})

test_that("large integers are preserved exactly (no float coercion)", {
  out <- rawToChar(atx_canonicalize('{"big":9007199254740993}'))
  expect_equal(out, '{"big":9007199254740993}')
})

test_that("negative integers and nesting", {
  out <- rawToChar(atx_canonicalize('{"n":-42,"d":{"y":[3,2,1],"x":"z"}}'))
  expect_equal(out, '{"d":{"x":"z","y":[3,2,1]},"n":-42}')
})

test_that("empty object and empty array are distinguished", {
  out <- rawToChar(atx_canonicalize('{"o":{},"a":[]}'))
  expect_equal(out, '{"a":[],"o":{}}')
})

test_that("non-ASCII is emitted as raw UTF-8, not unicode escapes", {
  # value "cafe" + U+00E9 (precomposed) -> bytes ... 63 61 66 c3 a9
  e_acute <- intToUtf8(0x00E9L)
  Encoding(e_acute) <- "UTF-8"
  txt <- paste0('{"k":"caf', e_acute, '"}')
  bytes <- atx_canonicalize(txt)
  expect_equal(atx_raw_to_hex(bytes), "7b226b223a22636166c3a9227d")
})

test_that("NFC normalization composes decomposed sequences", {
  # "cafe" + U+0301 (combining acute) must normalize to "caf" + U+00E9.
  dec <- intToUtf8(c(utf8ToInt("cafe"), 0x0301L))
  Encoding(dec) <- "UTF-8"
  decomposed <- paste0('{"k":"', dec, '"}')
  com <- intToUtf8(c(utf8ToInt("caf"), 0x00E9L))
  Encoding(com) <- "UTF-8"
  composed <- paste0('{"k":"', com, '"}')
  expect_equal(atx_canonicalize(decomposed), atx_canonicalize(composed))
  expect_equal(atx_raw_to_hex(atx_canonicalize(decomposed)),
               "7b226b223a22636166c3a9227d")
})

test_that("4-byte emoji survives as raw UTF-8", {
  # U+1F600 grinning face -> f0 9f 98 80
  emoji <- intToUtf8(0x1F600L)
  Encoding(emoji) <- "UTF-8"
  txt <- paste0('{"k":"', emoji, '"}')
  expect_equal(atx_raw_to_hex(atx_canonicalize(txt)),
               "7b226b223a22f09f9880227d")
})

test_that("base58 round-trips arbitrary bytes", {
  for (h in c("00", "0001", "ed018022fe847be6", "ffffffff")) {
    r <- atx_hex_to_raw(h)
    expect_equal(atx_base58_decode(atx_base58_encode(r)), r, info = h)
  }
})

test_that("booleans and null literals", {
  expect_equal(rawToChar(atx_canonicalize('{"t":true,"f":false,"n":null}')),
               '{"f":false,"n":null,"t":true}')
})