Parser
Identifying the implementation of JAXP in use
System.out.printf("%1$s = %2$s\n",
"javax.xml.parsers.SAXParserFactory", System.getProperty("javax.xml.parsers.SAXParserFactory"));
System.out.printf("%1$s = %2$s\n",
"javax.xml.parsers.DocumentBuilderFactory", System.getProperty("javax.xml.parsers.DocumentBuilderFactory"));
System.out.printf("%1$s = %2$s\n",
"javax.xml.transform.TransformerFactory", System.getProperty("javax.xml.transform.TransformerFactory"));
System.out.printf("%1$s = %2$s\n",
"javax.xml.xpath.XPathFactory", System.getProperty("javax.xml.xpath.XPathFactory"));
System.out.printf("%1$s = %2$s\n",
"javax.xml.validation.SchemaFactory", System.getProperty("javax.xml.validation.SchemaFactory"));
System.out.printf("SaxParserFactory implementation = %1$s\n",
SAXParserFactory.newInstance().getClass().getName());
System.out.printf("DocumentBuilderFactory implementation = %1$s\n",
DocumentBuilderFactory.newInstance().getClass().getName());
System.out.printf("TransformerFactory implementation = %1$s\n",
TransformerFactory.newInstance().getClass().getName());
System.out.printf("XPathFactory implementation = %1$s\n",
XPathFactory.newInstance().getClass().getName());
System.out.printf("SchemaFactory implementation = %1$s\n",
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).getClass().getName());
Schema
XQuery
Applying aggregation functions on XQuery's query
When using the aggregation function such as count
with for
clause, apply the function
to the entire for
clause, not return
.
The following query returns 10 lines of 1.
for $x in (1 to 10) return count($x)
Maybe, if you want 10, the following query will be right.
count(for $x in (1 to 10) return ($x))
Other functions for sequence, such as index-of
or distinct-values
can be used in the same way.
Useful or intelligent expressions of XQuery
Think over the following expressions. What are they? These are from the book "XSLT Coookbook" by Sal Mangano.
(50,45,40,34,32,29,-1)[(index-of((('XXL', 'XL', 'L', 'M', 'S', 'XS')), size), 7)[1]]
The following iterates just n times not n2 times.
for $pos in 1 to count($sequence), $item in $sequence[$pos] return $item , $pos
Applying XQuery on HTML
Saxon can't parse HTML file directly. But using TagSoup along with Saxon, you can parse HTML and apply XPath or XQuery to access data. The sample below shows the overall programming.
package thirdstage.exercise.xml.saxon;
import java.io.File;
import java.io.FileInputStream;
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.Source;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXSource;
import net.sf.saxon.Configuration;
import net.sf.saxon.dom.DocumentBuilderImpl;
import net.sf.saxon.lib.ParseOptions;
import net.sf.saxon.lib.Validation;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.XQueryCompiler;
import net.sf.saxon.s9api.XQueryEvaluator;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;
import org.ccil.cowan.tagsoup.Parser;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
...
@Test
public void testXqueryOnHtmlAlongWithTagSoup() throws Exception{
Configuration cfg = new Configuration();
cfg.setSchemaValidationMode(Validation.LAX);
cfg.setValidation(false);
cfg.setValidationWarnings(true);
Processor proc = new Processor(cfg);
URL url = ClassLoader.getSystemResource("thirdstage/exercise/xml/saxon/krx-stock-code-only-10.html");
XMLReader xr = new org.ccil.cowan.tagsoup.Parser();
//xr.setFeature(Parser.namespacesFeature, false);
Source src = new SAXSource(xr, new InputSource(new FileInputStream(new File(url.toURI()))));
net.sf.saxon.s9api.DocumentBuilder db = proc.newDocumentBuilder();
XdmNode input = db.build(src);
String qr = new StringBuilder()
.append("declare default element namespace \"http://www.w3.org/1999/xhtml\";\n")
.append("for $x in /html/body[1]/table[1]/tr/td[1]/child::text() return $x").toString();
XQueryCompiler xqc = proc.newXQueryCompiler();
XQueryExecutable xqe = xqc.compile(qr);
XQueryEvaluator xqev = xqe.load();
xqev.setSource(input.asSource());
XdmValue result = xqev.evaluate();
System.out.printf("The result of query contains %1$d items.\n", result.size());
System.out.printf(result.toString());
}
...