View Javadoc

1   /********************************************************************************
2    * Copyright 2004, The Board of Regents of the University of Wisconsin System.
3    * All rights reserved.
4    * 
5    * A non-exclusive worldwide royalty-free license is granted for this Software.
6    * Permission to use, copy, modify, and distribute this Software and its
7    * documentation, with or without modification, for any purpose is granted
8    * provided that such redistribution and use in source and binary forms, with or
9    * without modification meets the following conditions:
10   * 
11   * 1. Redistributions of source code must retain the above copyright notice,
12   * this list of conditions and the following disclaimer.
13   * 
14   * 2. Redistributions in binary form must reproduce the above copyright notice,
15   * this list of conditions and the following disclaimer in the documentation
16   * and/or other materials provided with the distribution.
17   * 
18   * 3. Redistributions of any form whatsoever must retain the following
19   * acknowledgement:
20   * 
21   * "This product includes software developed by The Board of Regents of the
22   * University of Wisconsin System."
23   * 
24   * THIS SOFTWARE IS PROVIDED BY THE BOARD OF REGENTS OF THE UNIVERSITY OF
25   * WISCONSIN SYSTEM "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
26   * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27   * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE BOARD OF REGENTS
28   * OF THE UNIVERSITY OF WISCONSIN SYSTEM BE LIABLE FOR ANY DIRECT, INDIRECT,
29   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
31   * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32   * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33   * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
34   * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35   ******************************************************************************/
36  package edu.wisc.my.webproxy.beans.filtering;
37  
38  import javax.portlet.ActionRequest;
39  import javax.portlet.ActionResponse;
40  import javax.portlet.PortletPreferences;
41  import javax.portlet.RenderRequest;
42  import javax.portlet.RenderResponse;
43  
44  import org.apache.commons.logging.Log;
45  import org.apache.commons.logging.LogFactory;
46  import org.cyberneko.html.parsers.SAXParser;
47  import org.xml.sax.SAXNotRecognizedException;
48  import org.xml.sax.SAXNotSupportedException;
49  import org.xml.sax.XMLReader;
50  import org.xml.sax.ext.LexicalHandler;
51  
52  import edu.wisc.my.webproxy.beans.config.ProxyComponent;
53  
54  /***
55   * This class implements the HtmlParser Interface by using NekoHtml
56   * 
57   * @author dgrimwood
58   * 
59   * @version $Id: NekoHtmlParser.java,v 1.1 2006/11/15 22:55:30 edalquist Exp $
60   *  
61   */
62  public class NekoHtmlParser implements HtmlParser, ProxyComponent {
63  
64      private static final Log log = LogFactory.getLog(NekoHtmlParser.class);
65  
66      private boolean insertDoctype = true;
67  
68      private boolean balanceTags = false;
69  
70      private boolean scriptStripComment = true;
71  
72      private boolean stripComments = true;
73  
74      private boolean reportErrors = false;
75  
76      public NekoHtmlParser() {
77  
78      }
79  
80      public XMLReader getReader(LexicalHandler myHandler) {
81  
82          SAXParser defaultParser = new SAXParser();
83  
84          try {
85              defaultParser.setProperty("http://xml.org/sax/properties/lexical-handler", myHandler);
86              defaultParser.setProperty("http://cyberneko.org/html/properties/default-encoding", "ASCII");
87              defaultParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
88              defaultParser.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
89              
90              defaultParser.setFeature("http://cyberneko.org/html/features/report-errors", reportErrors);
91              defaultParser.setFeature("http://cyberneko.org/html/features/insert-doctype", insertDoctype);
92              defaultParser.setFeature("http://cyberneko.org/html/features/balance-tags", balanceTags);
93              defaultParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", scriptStripComment);
94              defaultParser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-comment-delims", stripComments);
95  
96              defaultParser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
97              defaultParser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
98          }
99          catch (SAXNotRecognizedException e) {
100             log.debug("SaxParser not recognized:  ", e);
101         }
102         catch (SAXNotSupportedException e) {
103             log.debug("SaxParser not supported:  ", e);
104         }
105         return defaultParser;
106     }
107 
108     public String getName() {
109         return "NekoHtml Filter";
110     }
111 
112     public void setRenderData(RenderRequest request, RenderResponse response) {
113         PortletPreferences pp = request.getPreferences();
114         this.reportErrors = new Boolean(pp.getValue("reportErrors", null)).booleanValue();
115         this.balanceTags = new Boolean(pp.getValue("balanceTags", null)).booleanValue();
116         this.insertDoctype = new Boolean(pp.getValue("insertDoctype", null)).booleanValue();
117         this.scriptStripComment = new Boolean(pp.getValue("scriptStripComment", null)).booleanValue();
118         this.stripComments = new Boolean(pp.getValue("stripComments", null)).booleanValue();
119     }
120 
121     public void setActionData(ActionRequest request, ActionResponse response) {
122 
123     }
124 
125     public void clearData() {
126 
127     }
128 }
129