;;; batch-psgml-validate.el --- Batch-validation of HTML with PSGML for XEmacs
;;;
;;; $Id: batch-psgml-validate.el,v 1.23 2009-12-06 13:54:35 apa-guest Exp $
;;;
;;; Copyright (C) 2000 Adrian Aichner

;; Author: Adrian Aichner <adrian@xemacs.org>
;; Maintainer: XEmacs Development Team <xemacs-beta@xemacs.org>
;; Date: $Date: 2009-12-06 13:54:35 $
;; Version: $Revision: 1.23 $
;; Keywords: xemacsweb

;; This file is part of XEmacs.

;; XEmacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; XEmacs is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with XEmacs; see the file COPYING.  If not, write to the Free
;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
;; 02111-1307, USA.

;;; Synched up with: Not in FSF.

(require 'psgml)
(require 'psgml-parse)                  ;;; for (defstruct (sgml-dtd ...))
;;; APA: 'psgml-html does not provide itself.  Why not?
(load "psgml-html")                   ;;; for html-quote-region

(defconst psgml-validate-conform
  "    <p><small>Conform with "
  "Phrase to insert for conforming SGML document.")

(defconst psgml-validate-non-conform
  "    <p><small><strong>Not</strong> conform with "
  "Phrase to insert for non-conforming SGML document.")

(defgroup batch-psgml-validate nil
  "SGML document validation using PSGML."
  :group 'emacs)

(defcustom psgml-validate-ignore-directories
  (quote ("CVS" "genpage" "html" "RCS" "SCCS" "linklint"))
  "*List of directory names to be ignored by `batch-psgml-validate'."
  :type '(repeat
          :custom-show t
          :documentation-shown t
          string)
  :group 'batch-psgml-validate)

(defvar batch-psgml-last-warning nil
  "Save last warning.")

(defun psgml-find-file-hook ()
  (condition-case nil
      (save-excursion
        (let (mdo)
          (goto-char (point-min))
          (setq mdo
                (sgml-with-parser-syntax
                 (let (start)
                   (sgml-skip-upto "MDO")
                   (setq start (point))
                   (sgml-skip-upto-mdc)
                   (forward-char 1)
                   (buffer-substring start (point)))))
          (string-match "\\bDTD\\s-+\\(\\w+\\)\\b" mdo)
          (cond
           ((string= (match-string 1 mdo) "XHTML")
            (xml-mode))
           ((string= (match-string 1 mdo) "XML")
            (xml-mode))
           ((string= (match-string 1 mdo) "HTML")
            (html-mode))
           (t
            nil))))
    (t nil)))

(defun batch-psgml-validate (&optional file-or-dir)
  "Uses `sgml-next-trouble-spot' from the PSGML package to validate
conformance of files in FILE-OR-DIR with the specified DTD.  See
`batch-psgml-validate-buffer'.  If FILE-OR-DIR is missing,
`batch-psgml-validate' is performed for each `command-line-args-left'."
  (interactive "DHTML directory to validate: ")
  (if (null file-or-dir)
      (progn
        (let (file-or-dir)
          (while command-line-args-left
            (setq file-or-dir (expand-file-name (car command-line-args-left)))
            (batch-psgml-validate file-or-dir)
            (setq command-line-args-left (cdr command-line-args-left)))))
    (if (file-directory-p file-or-dir)
        (dolist (file (directory-files file-or-dir t nil nil nil))
          (cond
           ((member (file-name-nondirectory file) (list "." ".."))
            nil)
           ((file-directory-p file)
            (batch-psgml-validate file))
           ((and
             (member (file-name-extension file) (list "htm" "html"))
             (null
              (backup-file-name-p file)))
            ;; drop indent which takes all the time
            (batch-psgml-validate-file file t))))
      ;; drop indent which takes all the time
      (batch-psgml-validate-file file-or-dir t))))

(defun batch-psgml-validate-file (file &optional insert-result indent)
  "Uses `sgml-next-trouble-spot' from the PSGML package to validate
conformance of FILE with the specified DTD.  See
`batch-psgml-validate-buffer'."
  (interactive
   (list
    (read-file-name "HTML file to validate: ")
    (yes-or-no-p "insert compliance text ")
    (yes-or-no-p "indent buffer ")))
  (when
      (catch 'file-should-be-validated
        (mapc
         (function
          (lambda (c)
            (when (member c psgml-validate-ignore-directories)
              (message "ignoring directory named %s" c)
              ;; APA: throw a nil value, causing catch to return nil
              (throw 'file-should-be-validated nil))))
         (split-string-by-char
          file
          directory-sep-char)))
    (with-current-buffer
        (find-file-noselect file)
      (batch-psgml-validate-buffer insert-result indent))
    (message "batch-psgml-validate of %s is done" file)))

(defun batch-psgml-validate-buffer (&optional insert-result indent)
  "Uses `sgml-next-trouble-spot' from the PSGML package to validate
conformance of buffer with the specified DTD.  INSERT-RESULT inserts a
\(non-\)compliance messsage before \"</body>\".  INDENT will
`indent-according-to-mode' as well.  The buffer is saved in the end if
not called interactively."
  (interactive
   (list
    (yes-or-no-p "insert compliance text ")
    (yes-or-no-p "indent buffer ")))
  (set (make-local-variable 'backup-inhibited)
       t)
  (let (old-result-begin
        old-result-end
        new-result-begin
        result
        file
        line)
    (psgml-find-file-hook)
    (goto-char (point-min))
    (condition-case err
        (progn
          (flet ((append-message
                  (&rest args) ())
                 (clear-message
                  (&optional label frame stdout-p no-restore)
                  ())
                 (sgml-log-warning
                  (format &rest things)
                  (setq batch-psgml-last-warning (apply 'format format things))
                  (when sgml-throw-on-warning
                    (throw sgml-throw-on-warning t))))
            (setq result (sgml-next-trouble-spot)))
          (unless (string= result "Ok")
            (setf file (buffer-file-name))
            (setq line (count-lines (point-min) (point)))
            (if batch-psgml-last-warning
                (display-message 'error
                  (format "%s:%d:%s" 
                          file line batch-psgml-last-warning)))))
      (error (message ">> Error: %s" (prin1-to-string err)))
      (t (message "trouble: %s" (prin1-to-string err))))
    (when insert-result
      (goto-char (point-max))
      (setq old-result-end
            (re-search-backward "^\\s-*</body>" (point-min) t))
      (while
          (re-search-backward
           (format "^\\s-*\\(%s\\|%s\\)"
                   psgml-validate-conform
                   psgml-validate-non-conform)
           (point-min) t)
        (setq old-result-begin (point)))
      (and
       old-result-begin
       old-result-end
       (delete-region
        old-result-begin
        old-result-end))
      (insert
       (format "%s"
               (if (string= result "Ok")
                   psgml-validate-conform
                 psgml-validate-non-conform)))
      (setq new-result-begin (point))
      (flet ((append-message (&rest args) ())
             (clear-message (&optional label frame stdout-p no-restore) ()))
        (insert
         (format
          "%s\n"
          ;; As suggested in
          ;; http://www.xemacs.org/list-archives/xemacs-beta/200008/msg00251.html
          (save-excursion
            (goto-char (point-min))
            (sgml-with-parser-syntax
             (let (start)
               (sgml-skip-upto "MDO")
               (setq start (point))
               (sgml-skip-upto-mdc)
               (forward-char 1)
               (buffer-substring start (point)))))))
        (save-restriction
          (narrow-to-region new-result-begin (point))
          (goto-char (point-min))
          (while (re-search-forward "\\s-+" (point-max) t)
            (replace-match " " nil nil))
          (goto-char (point-max))
          (widen))
        (html-quote-region new-result-begin (point)))
      (insert
       "\n      <br />Automatically validated by <a href=\"http://sourceforge.net/projects/psgml/\">PSGML</a></small></p>\n"))
    (flet ((append-message (&rest args) ())
           (clear-message (&optional label frame stdout-p no-restore) ()))
      (when indent
        (indent-region (point-min) (point-max) nil))
      (if (noninteractive)
          (if (buffer-modified-p)
              (save-buffer))))))

;;; batch-psgml-validate.el ends here