require "amrita/node"
require "amrita/tag"
module Amrita
if RUBY_PLATFORM =~ /win/
NL = "\r\n"
else
NL = "\n"
end
# This module provide methods for avoid XSS vulnerability
# taken from IPA home page(Japanese)
# http://www.ipa.go.jp/security/awareness/vendor/programming/a01_02.html
module Sanitizer
NAMECHAR = '[-\w\d\.:]'
NAME = "([\\w:]#{NAMECHAR}*)"
NOT_REFERENCE = "(?!#{NAME};|\\d+;|[0-9a-fA-F]+;)" # borrowed from rexml
AMP_WITHOUT_REFRENCE = /{NOT_REFERENCE}/
# escape &<>
def sanitize_text(text)
s = text.dup
s.gsub!(AMP_WITHOUT_REFRENCE, '&')
s.gsub!("<", '<')
s.gsub!(">", '>')
s
end
# escape &<>"'
def sanitize_attribute_value(text)
s = text.dup
s.gsub!(AMP_WITHOUT_REFRENCE, '&')
s.gsub!("<", '<')
s.gsub!(">", '>')
s.gsub!('"', '"')
s.gsub!("'", ''')
s
end
DefaultAllowedScheme = {
'http' => true,
'https' => true,
'ftp' => true,
'mailto' => true,
}
#UrlInvalidChar = Regexp.new(%q|[^;/?:@&=+$,A-Za-z0-9\-_.!~*'()%]|)
UrlInvalidChar = Regexp.new(%q|[^;/?:@&=+$,A-Za-z0-9\-_.!~*'()%#]|) #'
# +sanitize_url+ accepts only these characters
# --- http://www.ietf.org/rfc/rfc2396.txt ---
# uric = reserved | unreserved | escaped
# reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
# unreserved = alphanum | mark
# mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
# escaped = "%" hex hex
#
# +sanitize_url+ accepts only schems specified by +allowd_scheme+
#
# The default is http: https: ftp: mailt:
def sanitize_url(text, allowd_scheme = DefaultAllowedScheme)
# return nil if text has characters not allowd for URL
return nil if text =~ UrlInvalidChar
# return '' if text has an unknown scheme
# --- http://www.ietf.org/rfc/rfc2396.txt ---
# scheme = alpha *( alpha | digit | "+" | "-" | "." )
if text =~ %r|^([A-Za-z][A-Za-z0-9+\-.]*):|
return nil unless allowd_scheme[$1]
end
# escape HTML
# special = "&" | "<" | ">" | '"' | "'"
# But I checked "<" | ">" | '"' before.
s = text.dup
#s.gsub!("&", '&')
s.gsub!("'", ''')
s
end
module_function :sanitize_text, :sanitize_attribute_value, :sanitize_url
end
end
class String
def amrita_sanitize
Amrita::Sanitizer::sanitize_text(self)
end
def amrita_sanitize_as_attribute
Amrita::Sanitizer::sanitize_attribute_value(self)
end
def amrita_sanitize_as_url
Amrita::Sanitizer::sanitize_url(self)
end
end
module Amrita
# this is a String that amrita dosen't sanitize.
# If you don't sanitize or sanitize yourself,
# pass SanitizedString.new(x) as model data.
class SanitizedString < String
def SanitizedString::[](s)
new(s)
end
def amrita_sanitize
self
end
def amrita_sanitize_as_attribute
self
end
def amrita_sanitize_as_url
self
end
def to_s
self
end
end
# A Formatter object prints a Node object as HTML text.
class Formatter
include Amrita
attr_reader :stream #:nodoc:
# If set, text will be escaped by Sanitizer#sanitize_text
# Default is true.
attr_accessor :escape
# If set, single tag like
will be printed as
# Default is false
attr_accessor :asxml
# If set, with no attribute will be deleted.
# Default is true
attr_accessor :delete_span
# If set, the spaces and new-lines in text will be compacted.
# Default is false
attr_accessor :compact_space
# stream is any object that has << method.
# All output is done by << method.
def initialize(stream="", tagdict=DefaultHtmlTagInfo, &element_filter)
@stream = stream
@tagdict = tagdict
@escape = true
@asxml = false
@delete_span = true
@sanitized = false
@compact_space = false
@element_filter = element_filter
end
# Format a Node object.
#
# If +stream+ is given, the output is changed to it in this method.
def format(node, stream=nil)
node = Node::to_node(node)
with_stream(stream) do
node.format(self)
end
end
def with_stream(stream, &block) #:nodoc:
if stream
save = @stream
@stream = stream
begin
yield
ensure
@stream = save
end
stream
else
yield
@stream
end
end
def put(t) #:nodoc:
@stream << t
end
def <<(x) #:nodoc:
case x
when Node
format(x)
else
@stream << x.to_s
end
self
end
def format_attrs(attrarray, taginfo=nil) #:nodoc:
return nil if attrarray.size == 0
array = attrarray.collect do |a|
flag = taginfo ? taginfo.url_attr?(a.key) : false
format_attr(a, flag)
end
array.join(" ")
end
def format_attr_default(a, flag) #:nodoc:
attrval = a.value
if attrval then
if flag
attrval = attrval.to_s.amrita_sanitize_as_url
else
attrval = attrval.to_s.amrita_sanitize_as_attribute
end
%Q'#{a.key}="#{attrval}"'
else
%Q'#{a.key}'
end
end
alias format_attr format_attr_default
# set replacing attribute.
#
# f.set_attr_filter(:__id=>:id)
#
#
is printed as
def set_attr_filter(hash) src = [ "def self.format_attr(a, flag)" ] src << "case a.key" hash.each do |key, val| src << %Q[ when "#{key}" ; format_attr_default(Attr.new(:#{val}, a.value), flag)] end src << "else; format_attr_default(a, flag)" src << "end" src << "end" src << "" #puts src eval src.join("\n") rescue SctiptError, NameError puts src end def format_start_tag(e) #:nodoc: e = @element_filter.call(e) if @element_filter taginfo = @tagdict.get_tag_info(e.tagname_symbol) a = format_attrs(e.attrs, taginfo) if a %Q[<#{e.tagname} #{a}>] else %Q[<#{e.tagname}>] end end def format_end_tag(e) #:nodoc: %Q[#{e.tagname}>] end def format_single_tag(e) #:nodoc: e = @element_filter.call(e) if @element_filter a = format_element_attrs(e) if asxml if a %Q[<#{e.tagname} #{a} />] else %Q[<#{e.tagname} />] end else if a %Q[<#{e.tagname} #{a}>] else %Q[<#{e.tagname}>] end end end def format_text(text) #:nodoc: text = text.to_s.amrita_sanitize if @escape text = text.gsub(%r[\s+]m, ' ') if @compact_space @stream << text text end def can_be_single?(element) #:nodoc: taginfo_of_element(element).can_be_empty end def format_attr_of_element(attr, element) taginfo = taginfo_of_element(element) format_attr(attr, taginfo && taginfo.url_attr?(attr.key)) end def format_element_attrs(element) format_attrs(element.attrs, taginfo_of_element(element)) end def taginfo_of_element(element) @tagdict.get_tag_info(element.tagname_symbol) end end # This Formatter print out template with no change. class AsIsFormatter < Formatter def format_element(element, stream=nil, &block) #:nodoc: with_stream(stream) do if element.no_child? and can_be_single?(element) @stream << format_single_tag(element) else @stream << format_start_tag(element) if block_given? yield else element.body.format(self) end @stream << format_end_tag(element) end end end def new_line #:nodoc: @stream << NL end end # This Formatter compact spaces and delete new line. # class SingleLineFormatter < Formatter def initialize(stream="", tagdict=DefaultHtmlTagInfo) #:nodoc: super(stream) @compact_space = true end def format_element(element, stream=nil, &block) #:nodoc: with_stream(stream) do if element.no_child? and can_be_single?(element) @stream << format_single_tag(element) else @stream << format_start_tag(element) if block_given? yield else element.body.format(self) end @stream << format_end_tag(element) end end end end class PrettyPrintFormatter < Formatter attr_accessor :offset def initialize(stream="", tagdict=DefaultHtmlTagInfo) #:nodoc: super(stream) @tagdict = tagdict @level = 0 @offset = 2 @compact_space = true @first_of_line = true end def format(node, stream=nil) #:nodoc: with_stream(stream) do super @stream << NL end end def format_element(element, stream=nil, &block) #:nodoc: with_stream(stream) do body = element.body pptype = @tagdict.get_tag_info(element.tagname_symbol).pptype unless element.no_child? and can_be_single?(element) first = format_start_tag(element) last = format_end_tag(element) prettyprint(pptype, first, last) do if block_given? yield else body.format(self) end end else s = format_single_tag(element) put(s) new_line unless pptype == 3 end end end def prettyprint(tagtype, first, last, &block) #:nodoc: case tagtype when 1 prettyprint1(first, last, &block) when 2 prettyprint2(first, last, &block) when 3 prettyprint3(first, last, &block) else prettyprint3(first, last, &block) end end def prettyprint1(first, last, &block) #:nodoc: new_line put(first) level_up new_line yield new_line level_down put(last) new_line end def prettyprint2(first, last, &block) #:nodoc: new_line put(first) yield put(last) new_line end def prettyprint3(first, last, &block) #:nodoc: put(first) yield put(last) end def level_up #:nodoc: @level = @level + 1 end def level_down #:nodoc: @level = @level - 1 end def put(t) #:nodoc: return unless t.size > 0 t = t.gsub(/\s+/m, " ") if @compact_space if @first_of_line @stream << NL @stream << " " * (@offset * @level) if @offset @first_of_line = false end @stream << t end def new_line #:nodoc: @first_of_line = true end end class PreFormatter attr_reader :formatter, :expand_attr def initialize(formatter, expand_attr=false) @formatter = formatter @expand_attr = expand_attr @result_str = "" @result_array = [] end def pre_format(node) @formatter.with_stream(self) do node.pre_format1(self) end end def <<(x) case x when Element @result_array << SanitizedString[@result_str] unless @result_str == "" @result_array << x @result_str = "" when String @result_str << x when NodeArray x.array.each do |n| n.pre_format1(self) end when Node @formatter.format(x) else @result_str << x.to_s end self end def result @result_array << SanitizedString[@result_str] unless @result_str == "" case @result_array.size when 0 Null when 1 @result_array[0] else @result_array end end def result_as_top Node::to_node(result) end end module Node def to_s ret = "" SingleLineFormatter.new(ret).format(self) ret end # converts an Element without +id+ to TextElement to make # tree low for performance. # # A pre-formatted Node tree will be expanded faster than # original. But, it produces the same output . def pre_format(formatter, expand_attr=false) raise "pre_format dose not suport pretty-print" if formatter.kind_of?(PrettyPrintFormatter) prf = PreFormatter.new(formatter, expand_attr) prf.pre_format(self) prf end def pre_format1(prf) prf << self end end class NullNode def format(f) end def pre_format1(prf) end end class Element def format(f) if f.delete_span and tagname_symbol == :span and attrs.size == 0 return body.format(f) else f.format_element(self) end end def pre_format1(prf) f = prf.formatter if hid or (prf.expand_attr and has_expandable_attr?) prf << clone do body.pre_format(f).result end else f.format_element(self) do body.pre_format1(prf) end end end end class TextElement def format(f) f.format_text(@text) end end class NodeArray def format(f) @array.each { |n| n.format(f) } end def pre_format1(prf) @array.each do |n| n.pre_format1(prf) end end end class SpecialElement def format(f) f.put("<#{@tag}") f.put(@body) f.put("#{etag}>") end end class FormatterNode #:nodoc: include Node def initialize(&block) init_body(&block) end def change_formatter(formatter, &block) # subclass responsibility end def format(formatter) change_formatter(formatter) do body.format(formatter) end end def pre_format1(pre_formatter) change_formatter(pre_formatter.formatter) do body.pre_format1(pre_formatter) end end def to_ruby "#{type}.new { #{body_to_ruby} }" end def body_to_ruby if @body @body.to_ruby else "" end end end class Escape < FormatterNode #:nodoc: def initialize(escape=true, &block) @escape = escape super(&block) end def clone(&block) if block_given? Escape.new(@escape, &block) else Escape.new(@escape) { @body.clone } end end def change_formatter(f) save = f.escape f.escape = @escape yield ensure f.escape = save end def to_ruby %Q[#{type}.new(#{@sanitize_text})] + body_to_ruby end end class CompactSpace < FormatterNode #:nodoc: def initialize(compact_space=true, &block) @compact_space = compact_space super(&block) end def clone(&block) if block_given? CompactSpace.new(@compact_space, &block) else CompactSpace.new(@compact_space) { @body.clone } end end def change_formatter(f) save = f.compact_space f.compact_space= @compact_space yield ensure f.compact_space = save end end # format a Node object given as +block+ in # single line format. # # If +data+ was given, +expand+ will be called before # formatting def format_inline(data=true, &block) node = Node::to_node(yield) f = SingleLineFormatter.new f.format(node.expand1(data)) end module_function :format_inline # pretty print a Node object given as +block+ # # If +data+ was given, +expand+ will be called before # formatting # def format_pretty(data=true, &block) node = Node::to_node(yield) f = PrettyPrintFormatter.new f.format(node.expand1(data)) end module_function :format_pretty # Usually the <> character in text will be escaped. # # tmpl = TemplateText.new "
" # tmpl.expand(STDOUT, {:x => "<tag>
# # If the text was wrapped by this method, # it will no be escaped. # # tmpl.expand(STDOUT, {:x => noescape {"