Class: Embulk::Guess::NewlineGuessPlugin

Inherits:
TextGuessPlugin show all
Defined in:
embulk-core/src/main/ruby/embulk/guess/newline.rb

Instance Method Summary collapse

Methods inherited from TextGuessPlugin

#guess_text

Methods inherited from Embulk::GuessPlugin

from_java, new_java

Instance Method Details

#guess(config, sample) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'embulk-core/src/main/ruby/embulk/guess/newline.rb', line 7

def guess(config, sample)
  if config.fetch('parser', {}).fetch('charset', nil).nil?
    require 'embulk/guess/charset'
    charset_guess = Guess::CharsetGuessPlugin.new
    return charset_guess.guess(config, sample)
  end

  cr_count = sample.count("\r")
  lf_count = sample.count("\n")
  crlf_count = sample.scan(/\r\n/).length
  if crlf_count > cr_count / 2 && crlf_count > lf_count / 2
    return {"parser" => {"newline" => "CRLF"}}
  elsif cr_count > lf_count / 2
    return {"parser" => {"newline" => "CR"}}
  else
    return {"parser" => {"newline" => "LF"}}
  end
end