XSLT Insert html content - html

I'm trying to insert some HTML at a given point. The XML file has a content node, which inside that has actual HTML. For exmaple here is the content section of the XML:
-----------------
<content>
<h2>Header</h2>
<p>some link</p>
<p>some link1</p>
<p>some link2</p>
</content>
-----------------
I need to insert a link after the header but before the first link, inside its own p tag. A little rusty with XSLT, any help is appreciated!

Given this source:
<html>
<head/>
<body>
<content>
<h2>Header</h2>
<p>some link</p>
<p>some link1</p>
<p>some link2</p>
</content>
</body>
</html>
This stylesheet will do what you want to do:
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
<xsl:template match="/html/body/content/h2">
<xsl:copy>
<xsl:apply-templates/>
</xsl:copy>
<p>your new link</p>
</xsl:template>
</xsl:stylesheet>

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<xsl:template match="/content">
<xsl:copy-of select="h2"/>
foo
<xsl:copy-of select="p"/>
</xsl:template>
</xsl:stylesheet>

Related

Create nested XML from flat HTML using XSLT

I have this specific flat input HTML structure:
<!DOCTYPE html>
<html>
<head>
<title>Article <b>bold</b> title</title>
</head>
<body>
<article>
<h1 class="h-title"><span class="span-title">1 </span> Title 1 with some <sup>sup</sup> elements.</h1>
<p>Some <b>bold</b> text for 1.</p>
<p>Some more <b>bold</b> text for 1.</p>
<h1 class="h-title"><span class="span-title">2 </span> Title 2 with some <sup>sup</sup> elements.</h1>
<ul>
<li>The first list item.</li>
<li>The second list item with <i>italic</i> text.</li>
</ul>
<p>Some <b>bold</b> text for 2.</p>
<h2 class="h-title"><span class="span-title">2.1</span> Title 2.1 with some <sup>sup</sup> elements.</h2>
<p>Some <b>bold</b> text for 2.1.</p>
<h2 class="h-title"><span class="span-title">2.2</span> Title 2.2 with some <sup>sup</sup> elements.</h2>
<p>Some <b>bold</b> text for 2.2.</p>
<h3 class="h-title"><span class="span-title">2.2.1</span> Title 2.2.1 with some <sup>sup</sup> elements.</h3>
<p>Some <b>bold</b> text for 2.2.1.</p>
<h3 class="h-title"><span class="span-title">2.2.2</span> Title 2.2.2 with some <sup>sup</sup> elements.</h3>
<p>Some <b>bold</b> text for 2.2.2.</p>
<h2 class="h-title"><span class="span-title">2.3</span> Title 2.3 with some <sup>sup</sup> elements.</h2>
<p>Some <b>bold</b> text for 2.3.</p>
<h1 class="h-title"><span class="span-title">3</span> Title 3 with some <sup>sup</sup> elements.</h1>
<p>Some <b>bold</b> text for 3.</p>
</article>
</body>
</html>
I would need to create a nested output XML structure as below:
<?xml version="1.0" encoding="UTF-8"?>
<xml>
<front type="head">
<title>Article <b>bold</b> title</title>
</front>
<body>
<sec id="s1" sec-type="Title 1 with some sup elements.">
<label>1</label>
<title>Title 1 with some <sup>sup</sup> elements.</title>
<p>Some <b>bold</b> text for 1.</p>
<p>Some more <b>bold</b> text for 1.</p>
</sec>
<sec id="s2" sec-type="Title 2 with some sup elements.">
<label>2</label>
<title>Title 2 with some <sup>sup</sup> elements.</title>
<list list-type="bullet">
<list-item>The first list item.</list-item>
<list-item>The second list item with <i>italic</i> text.</list-item>
</list>
<p>Some <b>bold</b> text for 2.</p>
<sec id="s2.1" sec-type="Title 2.1 with some sup elements.">
<label>2.1</label>
<title>Title 2.1 with some <sup>sup</sup> elements.</title>
<p>Some <b>bold</b> text for 2.1.</p>
</sec>
<sec id="s2.2" sec-type="Title 2.2 with some sup elements.">
<label>2.2</label>
<title>Title 2.2 with some <sup>sup</sup> elements.</title>
<p>Some <b>bold</b> text for 2.2.</p>
<sec id="s2.2.1" sec-type="Title 2.2.1 with some sup elements.">
<label>2.2.1</label>
<title>Title 2.2.1 with some <sup>sup</sup> elements.</title>
<p>Some <b>bold</b> text for 2.2.1.</p>
</sec>
<sec id="s2.2.2" sec-type="Title 2.2.2 with some sup elements.">
<label>2.2.2</label>
<title>Title 2.2.2 with some <sup>sup</sup> elements.</title>
<p>Some <b>bold</b> text for 2.2.2.</p>
</sec>
</sec>
<sec id="s2.3" sec-type="Title 2.3 with some sup elements.">
<label>2.3</label>
<title>Title 2.3 with some <sup>sup</sup> elements.</title>
<p>Some <b>bold</b> text for 2.3.</p>
</sec>
</sec>
<sec id="s3" sec-type="Title 3 with some sup elements.">
<label>3</label>
<title>Title 3 with some <sup>sup</sup> elements.</title>
<p>Some <b>bold</b> text for 3.</p>
</sec>
</body>
</xml>
So far, I have produced this XSLT transformation below (h1-h6 section needs to be improved I believe):
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs"
version="2.0">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<!-- all -->
<xsl:template match="node()|#*">
<xsl:copy>
<xsl:apply-templates select="node()|#*"/>
</xsl:copy>
</xsl:template>
<!-- html / xml -->
<xsl:template match="html">
<xml>
<xsl:apply-templates select="node()|#*"/>
</xml>
</xsl:template>
<!-- head / front -->
<xsl:template match="head">
<front type="head">
<xsl:apply-templates select="node()|#*"/>
</front>
</xsl:template>
<!-- article / -->
<xsl:template match="article">
<xsl:apply-templates select="node()|#*"/>
</xsl:template>
<!-- h1-h6 / sec -->
<xsl:template match="h1[#class='h-title']">
<xsl:variable name="secId" select="normalize-space(span)"/>
<xsl:variable name="secType" select="substring-after(.,' ')"/>
<sec>
<xsl:attribute name="id" select="normalize-space(concat('s', $secId))"/>
<xsl:attribute name="sec-type" select="$secType"/>
<label>
<xsl:value-of select="$secId"/>
</label>
<title>
<xsl:apply-templates select="node() except span" />
</title>
</sec>
</xsl:template>
<!-- ul / list -->
<xsl:template match="ul">
<list list-type="bullet">
<xsl:apply-templates select="node()|#*"/>
</list>
</xsl:template>
<!-- li / list-item -->
<xsl:template match="li">
<list-item>
<xsl:apply-templates select="node()|#*"/>
</list-item>
</xsl:template>
</xsl:stylesheet>
Short description:
I have this flat HTML structure which needs to be transformed to nested XML structure. The original HTML structure may use h1 to h6 headings and they should be transformed into nested output XML sections accordingly. Each heading (h1...h6) has its own class (h1-title...h6-title). The HTML is always "well-structured", meaning h1 can be followed only by h2 or h3, etc. The wrong format (i.e. h1->h3->h2) may never occur.
I have two issues:
I believe the transformation needs to be done with recursion, but I am unable to figure it out with XSLT. I managed to create the right XML output structure and re-tag everything accordingly, but I'm unable to set nested structure.
The second (small) issue is that I don't know how to strip leading/trailing spaces from XML output tag and at the same time use "node() except span"? Function normalize-space() in this case returns an error.
I will be eternally grateful (and I mean it!) to someone who can solve this recursive mystery above for me.
Three days ago I touched the XSLT code for the first time. Today I'm posting my first "achievement" which is based on Martin Honnen's golden function (html(h)->xml(sec)). I believe the code is ugly and I don't know if it's written by all standards as it should be, but the end result is correct so I'll post it as an answer to my question for now. If there are some anomalities/issues still present, I'll be glad to fix it, if someone can comment.
It looks like this:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:transform
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fn="http://www.w3.org/2005/xpath-functions"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:mf="http://example.com/mf"
exclude-result-prefixes="fn xs mf"
version="2.0">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<!-- all -->
<xsl:template match="node()|#*">
<xsl:copy>
<xsl:apply-templates select="node()|#*"/>
</xsl:copy>
</xsl:template>
<!-- html / xml -->
<xsl:template match="html">
<xml>
<xsl:apply-templates select="node()|#*"/>
</xml>
</xsl:template>
<!-- head / front -->
<xsl:template match="head">
<front type="head">
<xsl:apply-templates select="node()|#*"/>
</front>
</xsl:template>
<!-- flat html (h1-h6) to nested xml (sec) transformation -->
<xsl:function name="mf:group" as="node()*">
<xsl:param name="nodes" as="node()*"/>
<xsl:param name="level" as="xs:integer"/>
<xsl:for-each-group select="$nodes" group-starting-with="*[starts-with(local-name(), concat('h', $level))]">
<xsl:choose>
<xsl:when test="self::*[starts-with(local-name(), concat('h', $level))]">
<sec>
<xsl:apply-templates select="."/>
<xsl:sequence select="mf:group(current-group() except ., $level+1)"/>
</sec>
</xsl:when>
<xsl:when test="$level lt 6">
<xsl:sequence select="mf:group(current-group(), $level+1)"/>
</xsl:when>
<xsl:otherwise>
<xsl:apply-templates select="current-group()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:for-each-group>
</xsl:function>
<!-- article / -->
<xsl:template match="article">
<xsl:sequence select="mf:group(node(), 1)"/>
</xsl:template>
<!-- h1-h6 / sec -->
<xsl:template match="(h1|h2|h3|h4|h5|h6)[#class='h-title']">
<xsl:variable name="secId" select="normalize-space(span)"/>
<xsl:variable name="secType" select="fn:substring-after(normalize-space(.), ' ')"/>
<xsl:attribute name="id" select="normalize-space(concat('s', $secId))"/>
<xsl:attribute name="sec-type" select="$secType"/>
<label>
<xsl:value-of select="$secId"/>
</label>
<title>
<xsl:apply-templates select="node() except span" />
</title>
</xsl:template>
<!-- ul / list -->
<xsl:template match="ul">
<list list-type="bullet">
<xsl:apply-templates select="node()|#*"/>
</list>
</xsl:template>
<!-- li / list-item -->
<xsl:template match="li">
<list-item>
<xsl:apply-templates select="node()|#*"/>
</list-item>
</xsl:template>
</xsl:transform>

XSLT apply-templates in for-each

I'm trying to write a simple XHTML to Simple Docbook translator (the input XHTML is a limited subset so it should be doable).
I have this:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8" indent="yes" omit-xml-declaration="yes" standalone="no"/>
<!--
<xsl:strip-space elements="*"/>
-->
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()" />
</xsl:copy>
</xsl:template>
<!-- skip implicit tags-->
<xsl:template match="/html/body"><xsl:apply-templates/></xsl:template>
<xsl:template match="/html"><xsl:apply-templates/></xsl:template>
<!-- paragraphs to sections converter -->
<xsl:template match="h2">
<xsl:variable name="title" select="generate-id(.)"/>
<section>
<title><xsl:apply-templates select="text()"/></title>
<xsl:for-each select="following-sibling::*[generate-id(preceding-sibling::h2[1]) = $title and not(self::h2)]">
<xsl:apply-templates/>
</xsl:for-each>
</section>
</xsl:template>
<xsl:template match="p">
<para><xsl:apply-templates select="*|text()"/></para>
</xsl:template>
<xsl:template match="p[preceding-sibling::h2]"/>
<xsl:template match="ul">
<itemizedlist><xsl:apply-templates select="li"/></itemizedlist>
</xsl:template>
<xsl:template match="ul[preceding-sibling::h2]"/>
<xsl:template match="ol">
<orderedlist><xsl:apply-templates select="li"/></orderedlist>
</xsl:template>
<xsl:template match="ol[preceding-sibling::h2]"/>
<xsl:template match="li">
<listitem><para><xsl:apply-templates select="*|text()"/></para></listitem>
</xsl:template>
</xsl:stylesheet>
For this input
<html>
<body>
<p>First paragraph</p>
<p>Second paragraph</p>
<h2>First title</h2>
<p>First paragraph</p>
<p>Second paragraph</p>
<p>Third paragraph</p>
<h2>Second title</h2>
<p>First paragraph</p>
<ul>
<li>A list item</li>
<li>Another list item</li>
</ul>
<p>Second paragraph</p>
</body>
</html>
I expect this output
<para>First paragraph</para>
<para>Second paragraph</para>
<section>
<title>First title</title>
<para>First paragraph</para>
<para>Second paragraph</para>
<para>Third paragraph</para>
</section>
<section>
<title>Second title</title>
<para>First paragraph</para>
<itemizedlist>
<listitem>A list item</listitem>
<listitem>Another list item</listitem>
</itemizedlist>
<para>Second paragraph</para>
</section>
But I get
<para>First paragraph</para>
<para>Second paragraph</para>
<section><title>First title</title>First paragraphSecond paragraphThird paragraph</section>
<section><title>Second title</title>First paragraph
<listitem><para>A list item</para></listitem>
<listitem><para>Another list item</para></listitem>
Second paragraph</section>
For some reason, the template for my paragraphs and lists is not being applied. I'm guessing because the templates matching are the empty ones, but I need those to prevent duplicate tags outside section.
How can I make this work? TIA.
Use
<xsl:for-each select="following-sibling::*[generate-id(preceding-sibling::h2[1]) = $title and not(self::h2)]">
<xsl:apply-templates select="."/>
</xsl:for-each>
or simply
<xsl:apply-templates select="following-sibling::*[generate-id(preceding-sibling::h2[1]) = $title and not(self::h2)]"/>
to process those elements you want to wrap into a section. But there will be a collision with your other templates so perhaps using a mode helps for the processing:
<xsl:template match="p" mode="wrapped">
<para><xsl:apply-templates select="*|text()"/></para>
</xsl:template>
<xsl:template match="p[preceding-sibling::h2]"/>
<xsl:template match="ul" mode="wrapped">
<itemizedlist><xsl:apply-templates select="li"/></itemizedlist>
</xsl:template>
<xsl:template match="ul[preceding-sibling::h2]"/>
<xsl:template match="ol" mode="wrapped">
<orderedlist><xsl:apply-templates select="li"/></orderedlist>
</xsl:template>
<xsl:template match="ol[preceding-sibling::h2]"/>
<xsl:template match="li" mode="wrapped">
<listitem><para><xsl:apply-templates select="*|text()"/></para></listitem>
</xsl:template>

Remove certain nodes in copy of node

How do I copy an entire element but remove only some of the children?
I want to copy the div#about but I want to remove the table elements from it.
Input HTML:
<html>
<body>
<div class="content-header">
<h1>Title</h1>
</div>
<div id="about">
<h1>About</h1>
<table>...</table>
<p>Bla bla bla</p>
<table>...</table>
<p>The end</p>
</div>
</body>
</html>
XSLT:
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="/">
<div class="article">
<h1>
<xsl:value select="//div[#class='content-header']/h1/text()"/>
</h1>
<div>
<xsl:copy-of select="//div[#id='about']"/>
<!-- Here should render the entire div#about without the tables -->
</div>
</div>
</xsl:template>
<xsl:output method="html" indent="yes" omit-xml-declaration="yes"/>
</xsl:transform>
First add the identity template to your XSLT
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()" />
</xsl:copy>
</xsl:template>
(Or, if you were using XSLT 3.0, you could use <xsl:mode on-no-match="shallow-copy"/> instead)
Then add another template to ignore the table elements
<xsl:template match="div[#id='about']/table" />
And finally, replace your xsl:copy-of with xsl:apply-templates to allow these templates to be matched, thus ensuring the table elements do not get copied.
Try this XSLT
<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" indent="yes" omit-xml-declaration="yes"/>
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()" />
</xsl:copy>
</xsl:template>
<xsl:template match="div[#id='about']/table" />
<xsl:template match="/">
<div class="article">
<h1>
<xsl:value-of select="//div[#class='content-header']/h1/text()"/>
</h1>
<div>
<xsl:apply-templates select="//div[#id='about']"/>
</div>
</div>
</xsl:template>
</xsl:transform>

How to make a different font in a tag inside another tag in XSL style sheet?

How can I apply a different font in an XSL for the XML looking like this
<text> sometext <citation> somecitation </citation> sometext </text>
so the citation content should be in a different font as just text.
so this is the part of my XML
<text>She flipped her bed over and found invisible alligators all over her room. <citation> What's going on here? <citation> she demanded. </text>
I wrote a code <xsl:template match="text">
<p>
<xsl:value-of select="text()"/>
<q>
<xsl:value-of select="citation/text()"/>
</q>
</p>
(q stands for italic in CSS)
What I want to get :
She flipped her bed over and found invisible alligators all over her room.
"What's going on here?" she demanded.
What I get for now : She flipped her bed over and found invisible alligators all over her room.she demanded. "What's going on here?"
how can I proceed to get correct result?
Thank you!
First, the default styling applied by the browser should be quite sufficient - provided you use the correct HTML tags.
Given the following input:
XML
<text>She flipped her bed over and found invisible alligators all over her room. <citation> What's going on here? </citation> she demanded. </text>
the following stylesheet:
XSLT 1.0
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:strip-space elements="*"/>
<xsl:template match="/">
<html>
<body>
<xsl:apply-templates/>
</body>
</html>
</xsl:template>
<xsl:template match="text">
<p>
<xsl:apply-templates/>
</p>
</xsl:template>
<xsl:template match="citation">
<cite>
<xsl:apply-templates/>
</cite>
</xsl:template>
</xsl:stylesheet>
will return:
<html>
<body>
<p>She flipped her bed over and found invisible alligators all over her room. <cite> What's going on here? </cite> she demanded.
</p>
</body>
</html>
which most any browser will render as:
If you don't like or don't trust the browser defaults, you can specify your own style, for example:
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:strip-space elements="*"/>
<xsl:template match="/">
<html>
<body>
<xsl:apply-templates/>
</body>
</html>
</xsl:template>
<xsl:template match="text">
<p>
<xsl:apply-templates/>
</p>
</xsl:template>
<xsl:template match="citation">
<span style="font-style: italic;">
<xsl:apply-templates/>
</span>
</xsl:template>
</xsl:stylesheet>
Or:
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="html" encoding="UTF-8"/>
<xsl:strip-space elements="*"/>
<xsl:template match="/">
<html>
<head>
<style>
q {
font-style: italic;;
}
</style>
</head>
<body>
<xsl:apply-templates/>
</body>
</html>
</xsl:template>
<xsl:template match="text">
<p>
<xsl:apply-templates/>
</p>
</xsl:template>
<xsl:template match="citation">
<q>
<xsl:apply-templates/>
</q>
</xsl:template>
</xsl:stylesheet>

How to wrap <h2> and <p> tags inside a <section> tag in xslt?

I want to wrap the headers and paragraph inside the section tags. Section tag ends when the next header arises.
Input:
<body>
<h2>text text</h2>
<p> some text </p>
<p> some text </p>
<h2> text text </h2>
<p> some text </p>
<p> some text </p>
<p> some text </p>
</body>
Output:
<body>
<section>
<h2>text text</h2>
<p> some text </p>
<p> some text </p>
</section>
<section>
<h2> text text </h2>
<p> some text </p>
<p> some text </p>
<p> some text </p>
</section>
</body>
Like mentioned in the comments, this is a grouping question.
If you're using XSLT 2.0, you can use xsl:for-each-group/#group-starting-with...
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:template match="/body">
<xsl:copy>
<xsl:copy-of select="#*"/>
<xsl:for-each-group select="*" group-starting-with="h2">
<section>
<xsl:copy-of select="current-group()"/>
</section>
</xsl:for-each-group>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
If you're stuck with XSLT 1.0, you can use an xsl:key based on a generated id of the h2...
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<xsl:key name="sectElems" match="/body/*[not(self::h2)]"
use="generate-id(preceding-sibling::h2[1])"/>
<xsl:template match="/body">
<xsl:copy>
<xsl:copy-of select="#*"/>
<xsl:apply-templates select="h2"/>
</xsl:copy>
</xsl:template>
<xsl:template match="h2">
<xsl:variable name="id">
<xsl:value-of select="generate-id()"/>
</xsl:variable>
<section>
<xsl:copy-of select=".|key('sectElems',$id)"/>
</section>
</xsl:template>
</xsl:stylesheet>
Both of these stylesheets produce the same output.