Class: Embulk::LineGuessPlugin

Inherits:
GuessPlugin show all
Defined in:
embulk-core/src/main/ruby/embulk/guess_plugin.rb

Instance Method Summary collapse

Methods inherited from GuessPlugin

from_java, new_java

Instance Method Details

#guess(config, sample) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'embulk-core/src/main/ruby/embulk/guess_plugin.rb', line 87

def guess(config, sample)
  if config.fetch('parser', {}).fetch('charset', nil).nil?
    require 'embulk/guess/charset'
    charset_guess = Guess::CharsetGuessPlugin.new
    return charset_guess.guess(config, sample)
  end

  if config.fetch('parser', {}).fetch('newline', nil).nil?
    require 'embulk/guess/newline'
    newline_guess = Guess::NewlineGuessPlugin.new
    return newline_guess.guess(config, sample)
  end

  # TODO pure-ruby LineDecoder implementation?
  begin
    parser_task = config.param("parser", :hash, default: {}).load_config(Java::LineDecoder::DecoderTask)
  rescue
    # TODO log?
    p $!
    p $!.backtrace
    return DataSource.new
  end

  decoder = Java::LineDecoder.new(Java::ListFileInput.new([[sample.to_java]]), parser_task)
  sample_lines = []
  while decoder.nextFile
    while line = decoder.poll
      sample_lines << line
    end
    unless sample.end_with?(parser_task.getNewline.getString)
      sample_lines.pop unless sample_lines.empty? # last line is partial
    end
  end

  return guess_lines(config, sample_lines);
end

#guess_lines(config, sample_lines) ⇒ Object

Raises:

  • (NotImplementedError)


124
125
126
# File 'embulk-core/src/main/ruby/embulk/guess_plugin.rb', line 124

def guess_lines(config, sample_lines)
  raise NotImplementedError, "LineGuessPlugin#guess_lines(config, sample_lines) must be implemented"
end