Class: Embulk::TextGuessPlugin

Inherits:
GuessPlugin show all
Defined in:
embulk-core/src/main/ruby/embulk/guess_plugin.rb

Direct Known Subclasses

Guess::NewlineGuessPlugin

Instance Method Summary collapse

Methods inherited from GuessPlugin

from_java, new_java

Instance Method Details

#guess(config, sample) ⇒ Object



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'embulk-core/src/main/ruby/embulk/guess_plugin.rb', line 47

def guess(config, sample)
  if config.fetch('parser', {}).fetch('charset', nil).nil?
    require 'embulk/guess/charset'
    charset_guess = Guess::CharsetGuessPlugin.new
    return charset_guess.guess(config, sample)
  end

  # TODO pure-ruby LineDecoder implementation?
  begin
    parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
  rescue
    # TODO log?
    p $!
    p $!.backtrace
    return DataSource.new
  end

  decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
  sample_text = ''
  while decoder.nextFile
    first = true
    while line = decoder.poll
      if first
        first = false
      else
        sample_text << parser_task.getNewline().getString()
      end
      sample_text << line
    end
  end

  return guess_text(config, sample_text);
end

#guess_text(config, sample_text) ⇒ Object

Raises:

  • (NotImplementedError)


81
82
83
# File 'embulk-core/src/main/ruby/embulk/guess_plugin.rb', line 81

def guess_text(config, sample_text)
  raise NotImplementedError, "TextGuessPlugin#guess_text(config, sample_text) must be implemented"
end