Skip to content

Commit

Permalink
Fix dstr unparsing
Browse files Browse the repository at this point in the history
* This is an entirely new approach.
* Instead to find the "correct" dstr segments we simply try all and unparse the first one
  that round trips.
* This so far guarantees we always get good concrete syntax, but it can be time intensive as
  the combinatoric space of possible dynamic string sequence is quadratic with the dstr children size.
* For this reason we try above (currently) dstr children to unparse as heredoc first.
* Passes the entire corpus and fixes bugs.

[fix #249]
  • Loading branch information
mbj committed Sep 16, 2024
1 parent 2163503 commit 7ac3ca1
Show file tree
Hide file tree
Showing 56 changed files with 1,284 additions and 850 deletions.
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@

source 'https://rubygems.org'

gem 'mutant', path: '../mutant'

gemspec
22 changes: 13 additions & 9 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
PATH
remote: ../mutant
specs:
mutant (0.12.4)
diff-lcs (~> 1.3)
parser (~> 3.3.0)
regexp_parser (~> 2.9.0)
sorbet-runtime (~> 0.5.0)
unparser (~> 0.6.14)

PATH
remote: .
specs:
Expand All @@ -12,14 +22,8 @@ GEM
diff-lcs (1.5.1)
json (2.7.2)
language_server-protocol (3.17.0.3)
mutant (0.12.3)
diff-lcs (~> 1.3)
parser (~> 3.3.0)
regexp_parser (~> 2.9.0)
sorbet-runtime (~> 0.5.0)
unparser (~> 0.6.14)
mutant-rspec (0.12.3)
mutant (= 0.12.3)
mutant-rspec (0.12.4)
mutant (= 0.12.4)
rspec-core (>= 3.8.0, < 4.0.0)
parallel (1.25.1)
parser (3.3.2.0)
Expand Down Expand Up @@ -70,7 +74,7 @@ PLATFORMS
ruby

DEPENDENCIES
mutant (~> 0.12.2)
mutant!
mutant-rspec (~> 0.12.2)
rspec (~> 3.9)
rspec-core (~> 3.9)
Expand Down
97 changes: 93 additions & 4 deletions bin/corpus
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

require 'etc'
require 'mutant'
require 'optparse'
require 'pathname'
require 'unparser'

Thread.abort_on_exception = true
Thread.report_on_exception = true

module Unparser
module Corpus
ROOT = Pathname.new(__dir__).parent
Expand All @@ -17,16 +22,85 @@ module Unparser
#
# @return [Boolean]
def verify
puts("Verifiying: #{name}")
checkout
command = %W[unparser #{repo_path}]
exclude.each do |name|
command.push('--ignore', repo_path.join(name).to_s)

paths = Pathname.glob(Pathname.new(repo_path).join('**/*.rb'))

driver = Mutant::Parallel.async(
config: Mutant::Parallel::Config.new(
block: method(:verify_path),
jobs: Etc.nprocessors,
on_process_start: ->(*) {},
process_name: 'unparser-corpus-test',
sink: Sink.new,
source: Mutant::Parallel::Source::Array.new(jobs: paths),
thread_name: 'unparser-corpus-test',
timeout: nil
),
world: Mutant::WORLD
)

loop do
status = driver.wait_timeout(1)

puts("Processed: #{status.payload.total}")

status.payload.errors.each do |report|
puts report
fail
end

break if status.done?
end
Kernel.system(*command)

true
end

private

class Sink
include Mutant::Parallel::Sink

attr_reader :errors, :total

def initialize
@errors = []
@total = 0
end

def stop?
!@errors.empty?
end

def status
self
end

def response(response)
if response.error
Mutant::WORLD.stderr.puts(response.log)
fail response.error
end

@total += 1

if response.result
@errors << response.result
end
end
end

def verify_path(path)
validation = Validation.from_path(path)

if original_syntax_error?(validation) || generated_encoding_error?(validation) || validation.success?
return
end

validation.report
end

def checkout
TMP.mkdir unless TMP.directory?

Expand All @@ -50,6 +124,21 @@ module Unparser
TMP.join(name)
end

private

# This happens if the original source contained a non UTF charset meta comment.
# These are not exposed to the AST in a way unparser could know about to generate a non UTF-8
# target and emit that meta comment itself.
# For the purpose of corpus testing these cases are ignored.
def generated_encoding_error?(validation)
exception = validation.generated_node.from_left { return false }
exception.instance_of?(Parser::SyntaxError) && exception.message.eql?('literal contains escape sequences incompatible with UTF-8')
end

def original_syntax_error?(validation)
validation.original_node.from_left { return false }.instance_of?(Parser::SyntaxError)
end

def system(arguments)
return if Kernel.system(*arguments)

Expand Down
9 changes: 5 additions & 4 deletions bin/parser-round-trip-test
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ class Test
:rubies
)

EXPECT_FAILURE = {}.freeze
STATIC_LOCAL_VARIABLES = %i[foo bar baz].freeze
EXPECT_FAILURE = {}.freeze

def legacy_attributes
default_builder_attributes.reject do |attribute_name, value|
Expand Down Expand Up @@ -77,9 +78,9 @@ class Test

# rubocop:disable Metrics/AbcSize
def validation
identification = name.to_s
identification = name.to_s

generated_source = Unparser.unparse_either(node)
generated_source = Unparser.unparse_either(node, static_local_variables: STATIC_LOCAL_VARIABLES)
.fmap { |string| string.dup.force_encoding(parser_source.encoding).freeze }

generated_node = generated_source.bind do |source|
Expand All @@ -99,7 +100,7 @@ class Test

def parser
Unparser.parser.tap do |parser|
%w[foo bar baz].each(&parser.static_env.method(:declare))
STATIC_LOCAL_VARIABLES.each(&parser.static_env.method(:declare))
end
end

Expand Down
117 changes: 86 additions & 31 deletions lib/unparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ def initialize
end
end

# Parsed AST with additional details
class AST
include Anima.new(:comments, :explicit_encoding, :node, :static_local_variables)
end

EMPTY_STRING = ''.freeze
EMPTY_ARRAY = [].freeze

Expand All @@ -44,39 +49,90 @@ def initialize(message, node)
@node = node
freeze
end
end
end # InvalidNodeError

# Error raised when unparser encounders AST it cannot generate source for that would parse to the same AST.
class UnsupportedNodeError < RuntimeError
end # UnsupportedNodeError

# Unparse an AST (and, optionally, comments) into a string
#
# @param [Parser::AST::Node, nil] node
# @param [Array] comment_array
# @param [Array] comments
# @param [Encoding, nil] explicit_encoding
# @param [Set<Symbol>] static_local_variables
#
# @return [String]
#
# @raise InvalidNodeError
# if the node passed is invalid
#
# @api public
def self.unparse(node, comment_array = [])
return '' if node.nil?
def self.unparse(
node,
comments: EMPTY_ARRAY,
explicit_encoding: nil,
static_local_variables: Set.new
)
unparse_ast(
AST.new(
comments: comments,
explicit_encoding: explicit_encoding,
node: node,
static_local_variables: static_local_variables
)
)
end

# Unparse an AST
#
# @param [AST] ast
#
# @return [String]
#
# @raise InvalidNodeError
# if the node passed is invalid
#
# @raise UnsupportedNodeError
# if the node passed is valid but unparser cannot unparse it
#
# @api public
def self.unparse_ast(ast)
return EMPTY_STRING if ast.node.nil?

local_variable_scope = AST::LocalVariableScope.new(
node: ast.node,
static_local_variables: ast.static_local_variables
)

Buffer.new.tap do |buffer|
Emitter::Root.new(
buffer,
node,
Comments.new(comment_array)
buffer: buffer,
comments: Comments.new(ast.comments),
explicit_encoding: ast.explicit_encoding,
local_variable_scope: local_variable_scope,
node: ast.node
).write_to_buffer
end.content
end

# Unparse AST either
#
# @param [AST] ast
#
# @return [Either<Exception,String>]
def self.unparse_ast_either(ast)
Either.wrap_error(Exception) { unparse_ast(ast) }
end

# Unparse with validation
#
# @param [Parser::AST::Node, nil] node
# @param [Array] comment_array
# @param [Array] comments
#
# @return [Either<Validation,String>]
def self.unparse_validate(node, comment_array = [])
generated = unparse(node, comment_array)
def self.unparse_validate(node, comments: EMPTY_ARRAY)
generated = unparse(node, comments:)
validation = Validation.from_string(generated)

if validation.success?
Expand All @@ -86,44 +142,41 @@ def self.unparse_validate(node, comment_array = [])
end
end

# Unparse capturing errors
#
# This is mostly useful for writing testing tools against unparser.
#
# @param [Parser::AST::Node, nil] node
#
# @return [Either<Exception, String>]
def self.unparse_either(node)
Either.wrap_error(Exception) { unparse(node) }
end

# Parse string into AST
#
# @param [String] source
#
# @return [Parser::AST::Node, nil]
def self.parse(source)
parser.parse(buffer(source))
def self.parse(source, identification: '(string)')
parse_ast(source, identification: identification).node
end

# Parse string into either syntax error or AST
#
# @param [String] source
#
# @return [Either<Parser::SyntaxError, (Parser::ASTNode, nil)>]
def self.parse_either(source)
Either.wrap_error(Parser::SyntaxError) do
parser.parse(buffer(source))
# @return [Either<Exception, (Parser::ASTNode, nil)>]
def self.parse_ast_either(source, identification: '(source)')
Either.wrap_error(Exception) do
parse_ast(source, identification: identification)
end
end

# Parse string into AST, with comments
# Parse source with ast details
#
# @param [String] source
#
# @return [Parser::AST::Node]
def self.parse_with_comments(source)
parser.parse_with_comments(buffer(source))
# @return [AST]
def self.parse_ast(source, identification: '(source)', static_local_variables: Set.new)
explicit_encoding = Parser::Source::Buffer.recognize_encoding(source.dup.force_encoding(Encoding::BINARY))
node, comments = parser.parse_with_comments(buffer(source, identification: identification))

AST.new(
comments: comments,
explicit_encoding: explicit_encoding,
node: node,
static_local_variables: static_local_variables
)
end

# Parser instance that produces AST unparser understands
Expand Down Expand Up @@ -210,6 +263,7 @@ def self.buffer(source, identification = '(string)')
require 'unparser/emitter/root'
require 'unparser/emitter/send'
require 'unparser/emitter/simple'
require 'unparser/emitter/string'
require 'unparser/emitter/splat'
require 'unparser/emitter/super'
require 'unparser/emitter/undef'
Expand All @@ -224,6 +278,7 @@ def self.buffer(source, identification = '(string)')
require 'unparser/writer'
require 'unparser/writer/binary'
require 'unparser/writer/dynamic_string'
require 'unparser/writer/regexp'
require 'unparser/writer/resbody'
require 'unparser/writer/rescue'
require 'unparser/writer/send'
Expand Down
Loading

0 comments on commit 7ac3ca1

Please sign in to comment.