1 /********************************************************************************
2 * Copyright 2004, The Board of Regents of the University of Wisconsin System.
3 * All rights reserved.
4 *
5 * A non-exclusive worldwide royalty-free license is granted for this Software.
6 * Permission to use, copy, modify, and distribute this Software and its
7 * documentation, with or without modification, for any purpose is granted
8 * provided that such redistribution and use in source and binary forms, with or
9 * without modification meets the following conditions:
10 *
11 * 1. Redistributions of source code must retain the above copyright notice,
12 * this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright notice,
15 * this list of conditions and the following disclaimer in the documentation
16 * and/or other materials provided with the distribution.
17 *
18 * 3. Redistributions of any form whatsoever must retain the following
19 * acknowledgement:
20 *
21 * "This product includes software developed by The Board of Regents of the
22 * University of Wisconsin System."
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE BOARD OF REGENTS OF THE UNIVERSITY OF
25 * WISCONSIN SYSTEM "AS IS" AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING,
26 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE BOARD OF REGENTS
28 * OF THE UNIVERSITY OF WISCONSIN SYSTEM BE LIABLE FOR ANY DIRECT, INDIRECT,
29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
30 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
31 * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
34 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 ******************************************************************************/
36 package edu.wisc.my.webproxy.beans.filtering;
37
38 import javax.portlet.ActionRequest;
39 import javax.portlet.ActionResponse;
40 import javax.portlet.PortletPreferences;
41 import javax.portlet.RenderRequest;
42 import javax.portlet.RenderResponse;
43
44 import org.apache.commons.logging.Log;
45 import org.apache.commons.logging.LogFactory;
46 import org.cyberneko.html.parsers.SAXParser;
47 import org.xml.sax.SAXNotRecognizedException;
48 import org.xml.sax.SAXNotSupportedException;
49 import org.xml.sax.XMLReader;
50 import org.xml.sax.ext.LexicalHandler;
51
52 import edu.wisc.my.webproxy.beans.config.ProxyComponent;
53
54 /***
55 * This class implements the HtmlParser Interface by using NekoHtml
56 *
57 * @author dgrimwood
58 *
59 * @version $Id: NekoHtmlParser.java,v 1.1 2006/11/15 22:55:30 edalquist Exp $
60 *
61 */
62 public class NekoHtmlParser implements HtmlParser, ProxyComponent {
63
64 private static final Log log = LogFactory.getLog(NekoHtmlParser.class);
65
66 private boolean insertDoctype = true;
67
68 private boolean balanceTags = false;
69
70 private boolean scriptStripComment = true;
71
72 private boolean stripComments = true;
73
74 private boolean reportErrors = false;
75
76 public NekoHtmlParser() {
77
78 }
79
80 public XMLReader getReader(LexicalHandler myHandler) {
81
82 SAXParser defaultParser = new SAXParser();
83
84 try {
85 defaultParser.setProperty("http://xml.org/sax/properties/lexical-handler", myHandler);
86 defaultParser.setProperty("http://cyberneko.org/html/properties/default-encoding", "ASCII");
87 defaultParser.setProperty("http://cyberneko.org/html/properties/names/elems", "match");
88 defaultParser.setProperty("http://cyberneko.org/html/properties/names/attrs", "no-change");
89
90 defaultParser.setFeature("http://cyberneko.org/html/features/report-errors", reportErrors);
91 defaultParser.setFeature("http://cyberneko.org/html/features/insert-doctype", insertDoctype);
92 defaultParser.setFeature("http://cyberneko.org/html/features/balance-tags", balanceTags);
93 defaultParser.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", scriptStripComment);
94 defaultParser.setFeature("http://cyberneko.org/html/features/scanner/style/strip-comment-delims", stripComments);
95
96 defaultParser.setFeature("http://cyberneko.org/html/features/scanner/notify-builtin-refs", true);
97 defaultParser.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", true);
98 }
99 catch (SAXNotRecognizedException e) {
100 log.debug("SaxParser not recognized: ", e);
101 }
102 catch (SAXNotSupportedException e) {
103 log.debug("SaxParser not supported: ", e);
104 }
105 return defaultParser;
106 }
107
108 public String getName() {
109 return "NekoHtml Filter";
110 }
111
112 public void setRenderData(RenderRequest request, RenderResponse response) {
113 PortletPreferences pp = request.getPreferences();
114 this.reportErrors = new Boolean(pp.getValue("reportErrors", null)).booleanValue();
115 this.balanceTags = new Boolean(pp.getValue("balanceTags", null)).booleanValue();
116 this.insertDoctype = new Boolean(pp.getValue("insertDoctype", null)).booleanValue();
117 this.scriptStripComment = new Boolean(pp.getValue("scriptStripComment", null)).booleanValue();
118 this.stripComments = new Boolean(pp.getValue("stripComments", null)).booleanValue();
119 }
120
121 public void setActionData(ActionRequest request, ActionResponse response) {
122
123 }
124
125 public void clearData() {
126
127 }
128 }
129