#! /usr/bin/env Rscript

library(testthat)
library(rly)

context("Parsing of Fortran H Edit descriptions")

#' These tokens can't be easily tokenized because they are of the following
#' form:
#'
#'   nHc1...cn
#'
#' where n is a positive integer and c1 ... cn are characters.
#'
#' This example shows how to modify the state of the lexer to parse
#' such tokens

Lexer <- R6::R6Class("Lexer",
  public = list(
    tokens = c('H_EDIT_DESCRIPTOR'),
    t_ignore = " \t\n",
    t_H_EDIT_DESCRIPTOR = function(re="\\d+H.*", t) {
      # This grabs all of the remaining text
      i <- gregexpr(pattern ='H', t$value)[[1]][1]
      n <- strtoi(substring(t$value, 1, i-1))
      # Adjust the tokenizing position
      t$lexer$lexpos <- t$lexer$lexpos - nchar(t$value) + (i+1+n)
      t$value <- substring(t$value, i+1, i+n)
      return(t)
    },
    t_error = function(t) {
      cat(sprintf("Illegal character '%s'", t$value[0]))
      t$lexer$skip(1)
    }
  )
)

test_that("H Edit", {
  lexer <- rly::lex(Lexer)
  lexer$input("3Habc 10Habcdefghij 2Hxy")
  expect_equal(lexer$token()$value, "abc")
  expect_equal(lexer$token()$value, "abcdefghij")
  expect_equal(lexer$token()$value, "xy")
})