(* $Id: validate.ml 712 2005-05-04 13:09:58Z gerd $ * ---------------------------------------------------------------------- * *) open Pxp_document;; open Pxp_yacc;; open Pxp_types;; let error_happened = ref false;; let print_error e = print_endline (string_of_exn e) ;; class warner = object method warn w = print_endline ("WARNING: " ^ w) end ;; let resolve_by_helper scheme program sends_mime_header = let url_syntax = { Neturl.ip_url_syntax with Neturl.url_accepts_8bits = true } in let get_url id = (* Only accept SYSTEM Ids with the right scheme: *) match id with System sysid -> ( try let sysid_scheme = try Neturl.extract_url_scheme sysid with Neturl.Malformed_URL -> scheme (* If no scheme found: assume our own scheme *) in if sysid_scheme = scheme then Neturl.url_of_string url_syntax sysid (* or Malformed_URL *) else raise Pxp_reader.Not_competent with (* If the URL is syntactically wrong, do not accept it: *) Neturl.Malformed_URL -> raise Pxp_reader.Not_competent ) | _ -> raise Pxp_reader.Not_competent in let read_mime_header ch = let empty_re = Str.regexp "^[ \t\r\n]*$" in let is_empty s = Str.string_match empty_re s 0 in let buffer = Buffer.create 1024 in let line = ref(input_line ch) in if String.length !line >= 6 && String.sub !line 0 5 = "HTTP/" then line := input_line ch; while not (is_empty !line) do Buffer.add_string buffer !line; Buffer.add_string buffer "\n"; line := input_line ch done; Buffer.add_string buffer "\n"; Buffer.contents buffer in let open_channel id url = let url_string = Neturl.string_of_url url in let command = program ^ " " ^ Filename.quote url_string in let ch = Unix.open_process_in command in if sends_mime_header then let header_string = read_mime_header ch in let header_alist,_ = Mimestring.scan_header header_string 0 (String.length header_string) in let content_type = try List.assoc "content-type" header_alist with Not_found -> "application/octet-stream" in let mime_type, mime_type_params = Mimestring.scan_mime_type content_type [] in let encoding = try Some(Netconversion.encoding_of_string (List.assoc "charset" mime_type_params)) with Not_found -> None in ch, encoding else ch, None in let close_channel ch = match Unix.close_process_in ch with Unix.WEXITED 0 -> () | Unix.WEXITED n -> failwith("Command terminated with exit code " ^ string_of_int n) | Unix.WSIGNALED n -> failwith("Command terminated by signal " ^ string_of_int n) | _ -> assert false in new Pxp_reader.resolve_read_url_channel ~close:close_channel ~url_of_id: get_url ~channel_of_url: open_channel () ;; let parse debug wf namespaces iso88591 helpers filename = try (* Parse the document: *) let parse_fn = if wf then parse_wfdocument_entity ?transform_dtd:None else let index = new hash_index in parse_document_entity ?transform_dtd:None ~id_index:(index :> 'ext index) in let mng = if namespaces then Some (new Pxp_dtd.namespace_manager) else None in let resolver = let file_resolver = new Pxp_reader.resolve_as_file() in new Pxp_reader.combine (helpers @ [file_resolver]) in let start_id = System filename in let spec = if namespaces then default_namespace_spec else default_spec in let doc = parse_fn { default_config with debugging_mode = debug; encoding = if iso88591 then `Enc_iso88591 else `Enc_utf8; idref_pass = true; enable_namespace_processing = mng; warner = new warner } (ExtID(start_id, resolver)) spec in () with e -> (* Print error; remember that there was an error *) error_happened := true; print_error e; (* raise e *) ;; let main() = let debug = ref false in let wf = ref false in let namespaces = ref false in let iso88591 = ref false in let helpers = ref [] in let files = ref [] in let eq_split s = let eq = try String.index s '=' with Not_found -> raise(Arg.Bad "Syntax error") in let before_eq = String.sub s 0 eq in let after_eq = String.sub s (eq+1) (String.length s - eq - 1) in (before_eq, after_eq) in let add_helper sends_mime_header s = let scheme,cmd = eq_split s in let h = resolve_by_helper scheme cmd sends_mime_header in helpers := !helpers @ [h] in let add_pubid s = let pubid,filename = eq_split s in let h = Pxp_reader.lookup_public_id_as_file [pubid,filename] in helpers := !helpers @ [h] in let add_sysid s = let sysid,filename = eq_split s in let h = Pxp_reader.lookup_system_id_as_file [sysid,filename] in helpers := !helpers @ [h] in Arg.parse [ "-d", Arg.Set debug, " turn debugging mode on"; "-wf", Arg.Set wf, " check only for well-formedness"; "-namespaces", Arg.Set namespaces, " enable namespace support"; "-iso-8859-1", Arg.Set iso88591, " use ISO-8859-1 as internal encoding instead of UTF-8"; "-helper", Arg.String (add_helper false), "scheme=cmd add this helper command"; "-helper-mh", Arg.String (add_helper true), "scheme=cmd add this helper command (which sends mime headers)"; "-pubid", Arg.String add_pubid, "id=file map this PUBLIC id to this file"; "-sysid", Arg.String add_sysid, "id=file map this SYSTEM id to this file"; ] (fun x -> files := x :: !files) " usage: pxpvalidate [options] URL ... - checks the validity of XML documents. See below for list of options.