File tree Expand file tree Collapse file tree 4 files changed +34
-5
lines changed Expand file tree Collapse file tree 4 files changed +34
-5
lines changed Original file line number Diff line number Diff line change 1
1
require "r_nlp/version"
2
2
require "r_nlp/tf"
3
+ require 'r_nlp/idf'
3
4
4
5
module RNlp
5
6
# Your code goes here...
Original file line number Diff line number Diff line change
1
+ # -*- coding: utf-8 -*-
2
+
3
+ module RNlp
4
+ class Idf
5
+ # compatible with ja or en
6
+ attr_reader :lang
7
+ def initialize ( lang )
8
+ @lang = lang
9
+ unless lang == 'ja' || lang == 'en'
10
+ puts "#{ @lang } is not compatible language\n lang should be 'ja' or 'en'"
11
+ exit
12
+ end
13
+ end
14
+ # documents should be array of string
15
+ def calc_idf ( word , documents )
16
+ @word = word
17
+ @documents = documents
18
+ n = @documents . size
19
+ df = 0.0
20
+ @documents . each do |document |
21
+ df += 1 if document =~ /#{ @word } /
22
+ end
23
+ idf = Math . log2 ( n /df ) + 1
24
+ return idf
25
+ end
26
+ end
27
+ end
Original file line number Diff line number Diff line change @@ -7,6 +7,10 @@ class Tf
7
7
attr_reader :lang
8
8
def initialize ( lang )
9
9
@lang = lang
10
+ unless lang == 'ja' || lang == 'en'
11
+ puts "lang #{ @lang } is not compatible."
12
+ exit
13
+ end
10
14
end
11
15
def count ( text )
12
16
tf = Hash . new
@@ -23,7 +27,7 @@ def count(text)
23
27
end
24
28
end
25
29
elsif @lang == 'en'
26
- text . split ( " " ) . each do |line |
30
+ text . split ( "\n " ) . each do |line |
27
31
line . split ( " " ) . each do |word |
28
32
if tf [ word ] == nil
29
33
tf [ word ] = 1
@@ -32,9 +36,6 @@ def count(text)
32
36
end
33
37
end
34
38
end
35
- else
36
- puts "lang #{ @lang } is not compatible."
37
- exit
38
39
end
39
40
return tf
40
41
end
Original file line number Diff line number Diff line change 1
1
module RNlp
2
- VERSION = "0.1.4 "
2
+ VERSION = "0.1.5 "
3
3
end
You can’t perform that action at this time.
0 commit comments