I'm implementing a Cursor class now which keeps track of the current
parent Element, text node and character position so that I can easily (I
hope ;-) work out where the splitting and inserting needs to occur. Wish
me luck!!
Sorry to revive this thread, but there's something else thats causing me
confusion now!
My cursor class is going quite well and I can insert text and element
nodes. It also has methods to 'move' the 'cursor' forward and backward
by a node at a time. It keeps the current_node in an instance variable
which is initially assigned an element from a DOM tree instance created
elsewhere.
The problem I've come up against is when I use the next_node() method,
and the current_node is a (leaf) Text node, the nextSibling property of
current_node is None, where I know (from the document structure) that it
shouldn't be. To make matters more confusing, if I manually create an
instance of my DOM tree (interactively) and check the nextSibling of the
same Text node, it is the correct value (another Element node) while the
nextSibling property of the SectionCursor instance's current_node
property (referring to the same node) is None. I *think* it only applies
to leaf Text nodes.
Here is the *complete* code for my SectionCursor class:
(note that 'sections' are large(ish) document fragments from the main
document)
==========================================
class SectionCursor:
def __init__(self, section_element):
"""Create a SectionCursor instance using the 'section_element' as
the parent element."""
self.section_element = section_element
self.current_node = self.section_element.firstChild
self.char_pos = 0
def forward(self, skip=1):
"""Move the cursor forward 'skip' character positions."""
if self.current_node.nodeType == Node.TEXT_NODE:
self.char_pos += skip
if self.char_pos > len(self.current_node.data):
self.next_node()
else: self.next_node()
def backward(self, skip=1):
"""Move the cursor backward 'skip' character positions."""
if self.current_node.nodeType == Node.TEXT_NODE:
self.char_pos -= skip
if self.char_pos < 0:
self.previous_node()
else: self.previous_node()
def next_node(self):
"""Move the cursor to the next node; either the first child or next
sibling."""
if self.current_node.hasChildNodes():
self.current_node = self.current_node.firstChild
elif self.current_node.nextSibling is not None:
self.current_node = self.current_node.nextSibling
else: return False
self.char_pos = 0
return True
def previous_node(self):
"""Move the cursor to the previous node; either the previous sibling
or the parent."""
if self.current_node.previousSibling is not None:
self.current_node = self.current_node.previousSibling
elif self.current_node.parentNode != self.section_element:
self.current_node = self.current_node.parentNode
else: return False
if self.current_node.nodeType == Node.TEXT_NODE:
self.char_pos = len(self.current_node.data) - 1
else:
self.char_pos = 0
return True
def jump_to(self, node, char_pos=0):
"""Jump to a node and character position."""
self.current_node = node
self.char_pos = char_pos
def insert_node(self, ref_doc, new_node):
"""Insert a node (new_node); ref_doc is an instance of the Document
class."""
if self.current_node.nodeType == Node.TEXT_NODE:
parent_node = self.current_node.parentNode
text_node = self.current_node
next_node = text_node.nextSibling
preceeding_portion =
ref_doc.createTextNode(text_node.data[:self.char_pos])
proceeding_portion =
ref_doc.createTextNode(text_node.data[self.char_pos:])
parent_node.replaceChild(preceeding_portion, text_node)
parent_node.insertBefore(new_node, next_node)
parent_node.insertBefore(proceeding_portion, next_node)
# where is the cursor?
else:
parent_node = self.current_node.parent_element
parent_node.insertBefore(new_node, self.current_node)
# where is the cursor?
def append_child_node(self, ref_doc, new_node):
pass
def insert_element(self, ref_doc, tag_name, attrs=None):
"""Insert an element called tag_name and with the attributes in the
attrs dictionary; ref_doc is an instance of the Document class."""
new_element = ref_doc.createElement(tag_name)
if attrs is not None:
for name, value in attrs.items():
new_element.setAttribute(name, value)
self.insert_node(ref_doc, new_element)
def insert_text(self, ref_doc, text):
"""Insert the text in 'text'; ref_doc is an instance of the Document
class."""
new_text = ref_doc.createTextNode(text)
self.insert_node(ref_doc, new_text)
def remove_node(self):
"""Remove the current node."""
condemned_node = self.current_node
if not self.next_node():
self.previous_node()
parent_node = condemned_node.parentNode
old_child = parent_node.removeChild(condemned_node)
old_child.unlink()
def remove_text(self, ref_doc, count=None):
"""Remove count (or all) characters from the current cursor
position."""
if self.current_node.nodeType != Node.TEXT_NODE:
return False
text = self.current_node.data
new_text = text[:self.char_pos]
if count is not None:
new_text += text[self.char_pos + count:]
new_text_node = ref_doc.createTextNode(new_text)
parent_node = self.current_node.parentNode
self.current_node = parent_node.replaceChild(new_text_node,
self.current_node)
#self.char_pos = 0
==========================================
I've noticed that when you print any minidom node (except a Text node)
it shows the node's memory address. But it doesn't do this with Text
nodes. Does anyone know why this is? If I assign a Text node from one
DOM tree to a variable, I don't get a copy do I? I hope I just get
another reference to the original node.
Cheers,
Richard