#!/usr/bin/ruby $:.push './html-parser' require 'htmlscan' require 'sgml-parser' class LooseHTMLScanner def parse_ary(a) parse a end end class SGMLParser alias parse feed def parse_ary(a) for i in a feed i end end end Entries = [ SGMLParser, LooseHTMLScanner ] require 'nkf' #$KCODE = 'E' def benchmark(klass, method, arg) begin scanner = klass.new t1 = Time.times.utime scanner.send method, arg t2 = Time.times.utime sprintf "%.2f", t2 - t1 rescue Exception raise if $!.is_a? Interrupt sprintf "=%s=", $!.type end end if ARGV[0] == '-f' then files = File.open(ARGV[1]).readlines files.each { |i| i.chomp! } else files = ARGV end STDOUT.sync = true print Entries.collect{ |i| i.name }.join("\t"), "\n" print "at once (read)\tby line (gets)\n" files.each { |i| print i src = File.open(i.chomp) { |f| s = f.gets("\r\n\r\n") # skip http header f.read or s } src = NKF.nkf('-dexm0', src) print "\t", src.size src_a = src.to_a print "\t", src_a.size Entries.each { |klass| print "\t", benchmark(klass, :parse, src) print "\t", benchmark(klass, :parse_ary, src_a) } print "\n" }