Creating HTML

You can create and output HTML like this:

from ll.xist import xsc
from ll.xist.ns import html, xml, meta

node = xsc.Frag(
·  xml.XML(),
·  html.DocTypeXHTML10transitional(),
·  html.html(
·  ·  html.head(
·  ·  ·  meta.contenttype(),
·  ·  ·  html.title("Example page")
·  ·  ),
·  ·  html.body(
·  ·  ·  html.h1("Welcome to the example page"),
·  ·  ·  html.p(
·  ·  ·  ·  "This example page has a link to the ",
·  ·  ·  ·  html.a("Python home page", href="http://www.python.org/"),
·  ·  ·  ·  "."
·  ·  ·  )
·  ·  )
·  )
)

You can also use with blocks (and the unary + operator) to generate the same HTML:

from ll.xist import xsc
from ll.xist.ns import html, xml, meta

with xsc.build():
·  with xsc.Frag() as node:
·  ·  +xml.XML()
·  ·  +html.DocTypeXHTML10transitional()
·  ·  with html.html():
·  ·  ·  with html.head():
·  ·  ·  ·  +meta.contenttype()
·  ·  ·  ·  +html.title("Example page")
·  ·  ·  with html.body():
·  ·  ·  ·  +html.h1("Welcome to the example page")
·  ·  ·  ·  with html.p():
·  ·  ·  ·  ·  +xsc.Text("This example page has a link to the ")
·  ·  ·  ·  ·  with html.a():
·  ·  ·  ·  ·  ·  with xsc.addattr("href"):
·  ·  ·  ·  ·  ·  ·  +xsc.Text(""http://www.python.org/"")
·  ·  ·  ·  ·  ·  +xsc.Text("Python home page")
·  ·  ·  ·  ·  +xsc.Text(".")

Printing HTML

When you have an XIST tree you can print it with the string method like this:

from ll.xist import xsc
from ll.xist.ns import html, xml, meta

node = xsc.Frag(
·  xml.XML(),
·  html.DocTypeXHTML10transitional(),
·  html.html(
·  ·  html.head(
·  ·  ·  meta.contenttype(),
·  ·  ·  html.title("Example page")
·  ·  ),
·  ·  html.body(
·  ·  ·  html.h1("Welcome to the example page"),
·  ·  ·  html.p(
·  ·  ·  ·  "This example page has a link to the ",
·  ·  ·  ·  html.a("Python home page", href="http://www.python.org/"),
·  ·  ·  ·  "."
·  ·  ·  )
·  ·  )
·  )
)

print(node.string(encoding="us-ascii"))

When you want to save this into a file, use the bytes method instead of string:

with open("example.xml", "wb") as f:
·  f.write(node.bytes(encoding="us-ascii"))

Defining new elements

You can define new elements and how they should be converted to HTML (or other XML vocabularies) like this:

from ll.xist import xsc
from ll.xist.ns import html, xml, meta

class cheeseshoplink(xsc.Element):
·  class Attrs(xsc.Element.Attrs):
·  ·  class name(xsc.TextAttr): pass

·  def convert(self, converter):
·  ·  e = html.a(
·  ·  ·  self.attrs.name,
·  ·  ·  href=("http://cheeseshop.python.org/pypi/", self.attrs.name)
·  ·  )
·  ·  return e.convert(converter)

names = ["ll-xist", "cx_Oracle", "PIL"]

node = xsc.Frag(
·  xml.XML(),
·  html.DocTypeXHTML10transitional(),
·  html.html(
·  ·  html.head(
·  ·  ·  meta.contenttype(),
·  ·  ·  html.title("Cheeseshop links")
·  ·  ),
·  ·  html.body(
·  ·  ·  html.h1("Cheeseshop links"),
·  ·  ·  html.ul(html.li(cheeseshoplink(name=name)) for name in names)
·  ·  )
·  )
)

print(node.conv().string(encoding="us-ascii"))

Parsing HTML

Parsing HTML is done like this:

from ll.xist import parse
from ll.xist.ns import html

node = parse.tree(
·  parse.URL("http://www.python.org/"),
·  parse.Tidy(),
·  parse.NS(html),
·  parse.Node()
)

Finding and counting nodes

The following example shows you how to output the URLs of all images inside links on Python's homepage:

>>> from ll.xist import parse
>>> from ll.xist.ns import html
>>> node = parse.tree(
... 	parse.URL("http://www.python.org/"),
... 	parse.Expat(ns=True),
... 	parse.Node()
... )
>>> for img in node.walknodes(html.a/html.img):
...    print(img.attrs.src)
... 
http://www.python.org/images/python-logo.gif
http://www.python.org/images/trans.gif
http://www.python.org/images/trans.gif
http://www.python.org/images/success/nasa.jpg

If you want to output both the links and the image URLs, do the following:

>>> from ll.xist import parse, xfind
>>> from ll.xist.ns import html
>>> node = parse.tree(
... 	parse.URL("http://www.python.org/"),
... 	parse.Expat(ns=True),
... 	parse.Node()
... )
>>> for path in node.walkpaths(html.a/html.img):
...    print(path[-2].attrs.href, path[-1].attrs.src)
http://www.python.org/ http://www.python.org/images/python-logo.gif
http://www.python.org/#left%2dhand%2dnavigation http://www.python.org/images/trans.gif
http://www.python.org/#content%2dbody http://www.python.org/images/trans.gif
http://www.python.org/about/success/usa http://www.python.org/images/success/nasa.jpg

If you want to count the number of links on the page you can do the following:

>>> from ll import misc
>>> from ll.xist import parse
>>> from ll.xist.ns import html
>>> node = parse.tree(
... 	parse.URL("http://www.python.org/"),
... 	parse.Expat(ns=True),
... 	parse.Node()
... )
>>> misc.count(node.walk(html.a))
83

Replacing text

This example demonstrates how to make a copy of an XML tree with some text replacements:

from ll.xist import xsc, parse

def p2p(node, converter):
·  if isinstance(node, xsc.Text):
·  ·  node = node.replace("Python", "Parrot")
·  ·  node = node.replace("python", "parrot")
·  return node

node = parse.tree(
·  parse.URL("http://www.python.org/"),
·  parse.Expat(ns=True),
·  parse.Node()
)

node = node.mapped(p2p)
node.write(open("parrot_index.html", "wb"))

Converting HTML to XIST code

The class ll.xist.present.CodePresenter makes it possible to output an XIST tree as usable Python source code:

>>> from ll.xist import parse, present
>>> node = parse.tree(
... 	parse.URL("http://www.python.org/"),
... 	parse.Expat(ns=True),
... 	parse.Node()
... )
>>> print(present.CodePresenter(node))
ll.xist.xsc.Frag(
·  ll.xist.ns.html.html(
·  ·  ll.xist.ns.html.head(
·  ·  ·  ll.xist.ns.html.meta(
·  ·  ·  ·  http_equiv='content-type',
·  ·  ·  ·  content='text/html; charset=utf-8'
·  ·  ·  ),
·  ·  ·  ll.xist.ns.html.title(
·  ·  ·  ·  'Python Programming Language -- Official Website'
·  ·  ·  ),
·  ·  ·  ll.xist.ns.html.meta(
·  ·  ·  ·  name='keywords',
·  ·  ·  ·  content='python programming language object oriented web free source'
·  ·  ·  ),
·  ·  ·  [... Many lines deleted ...]
·  ·  ·  ·  ·  ·  u'\n\tCopyright \xa9 1990-2007, ',
·  ·  ·  ·  ·  ·  ll.xist.ns.html.a(
·  ·  ·  ·  ·  ·  ·  'Python Software Foundation',
·  ·  ·  ·  ·  ·  ·  href='http://www.python.org/psf'
·  ·  ·  ·  ·  ·  ),
·  ·  ·  ·  ·  ·  ll.xist.ns.html.br(),
·  ·  ·  ·  ·  ·  ll.xist.ns.html.a(
·  ·  ·  ·  ·  ·  ·  'Legal Statements',
·  ·  ·  ·  ·  ·  ·  href='http://www.python.org/about/legal'
·  ·  ·  ·  ·  ·  ),
·  ·  ·  ·  ·  ·  '\n      ',
·  ·  ·  ·  ·  ·  id='footer'
·  ·  ·  ·  ·  ),
·  ·  ·  ·  ·  '\n\n\n    ',
·  ·  ·  ·  ·  id='body-main'
·  ·  ·  ·  ),
·  ·  ·  ·  '\n  ',
·  ·  ·  ·  id='content-body'
·  ·  ·  ),
·  ·  ·  '\n'
·  ·  ),
·  ·  lang='en'
·  )
)

Using converter contexts to pass information between elements

Converter contexts can be used to pass information between elements. The following example will generate HTML <h1>, ..., <h6> elements according to the nesting depth of a <section> element.

from ll.xist import xsc

class section(xsc.Element):
·  class Attrs(xsc.Element.Attrs):
·  ·  class title(xsc.TextAttr): pass

·  class Context(xsc.Element.Context):
·  ·  def __init__(self):
·  ·  ·  xsc.Element.Context.__init__(self)
·  ·  ·  self.level = 1

·  def convert(self, converter):
·  ·  context = converter[self]
·  ·  elementname = "h{}".format(min(context.level, 6))
·  ·  node = xsc.Frag(
·  ·  ·  getattr(converter.target, elementname)(self.attrs.title),
·  ·  ·  self.content
·  ·  )
·  ·  context.level += 1
·  ·  node = node.convert(converter)
·  ·  context.level -= 1
·  ·  return node

with xsc.build():
·  with section(title="Python Tutorial") as document:
·  ·  with section(title="Using the Python Interpreter"):
·  ·  ·  with section(title="Invoking the Interpreter"):
·  ·  ·  ·  +section(title="Argument Passing")
·  ·  ·  ·  +section(title="Interactive Mode")
·  ·  ·  with section(title="The Interpreter and Its Environment"):
·  ·  ·  ·  +section(title="Error Handling")
·  ·  ·  ·  +section(title="Executable Python Scripts")
·  ·  ·  ·  +section(title="Source Code Encoding")
·  ·  ·  ·  +section(title="The Interactive Startup File")

print(document.conv().string())

The output of this script will be:

<h1>Python Tutorial</h1>
<h2>Using the Python Interpreter</h2>
<h3>Invoking the Interpreter</h3>
<h4>Argument Passing</h4>
<h4>Interactive Mode</h4>
<h3>The Interpreter and Its Environment</h3>
<h4>Error Handling</h4>
<h4>Executable Python Scripts</h4>
<h4>Source Code Encoding</h4>
<h4>The Interactive Startup File</h4>

Formatting HTML as plain text

The function html.astext can to used to format HTML into plain text:

from ll.xist.ns import html

e = html.div(
·  html.h1("The Zen of Python, by Tim Peters"),
·  html.ul(
·  ·  html.li("Beautiful is better than ugly."),
·  ·  html.li("Explicit is better than implicit."),
·  ·  html.li("Simple is better than complex."),
·  ·  html.li("Complex is better than complicated."),
·  ·  html.li("Flat is better than nested."),
·  ·  html.li("Sparse is better than dense."),
·  ·  html.li("Readability counts."),
·  ·  html.li("Special cases aren't special enough to break the rules."),
·  ·  html.li("Although practicality beats purity."),
·  ·  html.li("Errors should never pass silently."),
·  ·  html.li("Unless explicitly silenced."),
·  ·  html.li("In the face of ambiguity, refuse the temptation to guess."),
·  ·  html.li("There should be one-- and preferably only one --obvious way to do it."),
·  ·  html.li("Although that way may not be obvious at first unless you're Dutch."),
·  ·  html.li("Now is better than never."),
·  ·  html.li("Although never is often better than *right* now."),
·  ·  html.li("If the implementation is hard to explain, it's a bad idea."),
·  ·  html.li("If the implementation is easy to explain, it may be a good idea."),
·  ·  html.li("Namespaces are one honking great idea -- let's do more of those!"),
·  )
)

print(html.astext(e, width=40))

This will output:

The Zen of Python, by Tim Peters
================================

*  Beautiful is better than ugly.

*  Explicit is better than implicit.

*  Simple is better than complex.

*  Complex is better than complicated.

*  Flat is better than nested.

*  Sparse is better than dense.

*  Readability counts.

*  Special cases aren't special enough
   to break the rules.

*  Although practicality beats purity.

*  Errors should never pass silently.

*  Unless explicitly silenced.

*  In the face of ambiguity, refuse the
   temptation to guess.

*  There should be one-- and preferably
   only one --obvious way to do it.

*  Although that way may not be obvious
   at first unless you're Dutch.

*  Now is better than never.

*  Although never is often better than
   *right* now.

*  If the implementation is hard to
   explain, it's a bad idea.

*  If the implementation is easy to
   explain, it may be a good idea.

*  Namespaces are one honking great idea
   -- let's do more of those!

XIST examples

An introduction to XIST by examples