Skip to content

Commit 8ae8ee4

Browse files
author
Makoto Hiramatsu
committed
implement idf
1 parent 9b94475 commit 8ae8ee4

File tree

4 files changed

+34
-5
lines changed

4 files changed

+34
-5
lines changed

lib/r_nlp.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
require "r_nlp/version"
22
require "r_nlp/tf"
3+
require 'r_nlp/idf'
34

45
module RNlp
56
# Your code goes here...

lib/r_nlp/idf.rb

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# -*- coding: utf-8 -*-
2+
3+
module RNlp
4+
class Idf
5+
# compatible with ja or en
6+
attr_reader :lang
7+
def initialize(lang)
8+
@lang = lang
9+
unless lang == 'ja' || lang == 'en'
10+
puts "#{@lang} is not compatible language\nlang should be 'ja' or 'en'"
11+
exit
12+
end
13+
end
14+
# documents should be array of string
15+
def calc_idf(word, documents)
16+
@word = word
17+
@documents = documents
18+
n = @documents.size
19+
df = 0.0
20+
@documents.each do |document|
21+
df += 1 if document =~ /#{@word}/
22+
end
23+
idf = Math.log2(n/df) + 1
24+
return idf
25+
end
26+
end
27+
end

lib/r_nlp/tf.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ class Tf
77
attr_reader :lang
88
def initialize(lang)
99
@lang = lang
10+
unless lang == 'ja' || lang == 'en'
11+
puts "lang #{@lang} is not compatible."
12+
exit
13+
end
1014
end
1115
def count(text)
1216
tf = Hash.new
@@ -23,7 +27,7 @@ def count(text)
2327
end
2428
end
2529
elsif @lang == 'en'
26-
text.split(" ").each do |line|
30+
text.split("\n").each do |line|
2731
line.split(" ").each do |word|
2832
if tf[word] == nil
2933
tf[word] = 1
@@ -32,9 +36,6 @@ def count(text)
3236
end
3337
end
3438
end
35-
else
36-
puts "lang #{@lang} is not compatible."
37-
exit
3839
end
3940
return tf
4041
end

lib/r_nlp/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module RNlp
2-
VERSION = "0.1.4"
2+
VERSION = "0.1.5"
33
end

0 commit comments

Comments
 (0)