diff --git a/lib/core_extensions/array.rb b/lib/core_extensions/array.rb new file mode 100644 index 0000000..a6efeee --- /dev/null +++ b/lib/core_extensions/array.rb @@ -0,0 +1,25 @@ +class Array + def classification + collect(&:last) + end + + # calculate information entropy + def entropy + return 0 if empty? + + info = {} + total = 0 + each do |i| + info[i] = !info[i] ? 1 : (info[i] + 1) + total += 1 + end + + result = 0 + info.each do |_symbol, count| + if count > 0 + result += -count.to_f / total * Math.log(count.to_f / total) / Math.log(2.0) + end + end + result + end +end diff --git a/lib/core_extensions/object.rb b/lib/core_extensions/object.rb new file mode 100644 index 0000000..0b79fd9 --- /dev/null +++ b/lib/core_extensions/object.rb @@ -0,0 +1,9 @@ +class Object + def save_to_file(filename) + File.open(filename, 'w+') { |f| f << Marshal.dump(self) } + end + + def self.load_from_file(filename) + Marshal.load(File.read(filename)) + end +end diff --git a/lib/decisiontree.rb b/lib/decisiontree.rb index 99f893f..3da0b47 100644 --- a/lib/decisiontree.rb +++ b/lib/decisiontree.rb @@ -1,2 +1,3 @@ require File.dirname(__FILE__) + '/decisiontree/id3_tree.rb' -require 'core_extension/array.rb' +require 'core_extensions/object' +require 'core_extensions/array' diff --git a/lib/decisiontree/id3_tree.rb b/lib/decisiontree/id3_tree.rb index 187b97e..4e4c340 100755 --- a/lib/decisiontree/id3_tree.rb +++ b/lib/decisiontree/id3_tree.rb @@ -3,16 +3,6 @@ ### Copyright (c) 2007 Ilya Grigorik ### Modifed at 2007 by José Ignacio Fernández -class Object - def save_to_file(filename) - File.open(filename, 'w+') { |f| f << Marshal.dump(self) } - end - - def self.load_from_file(filename) - Marshal.load(File.read(filename)) - end -end - module DecisionTree Node = Struct.new(:attribute, :threshold, :gain)