# Plugin for TWiki Collaboration Platform, http://TWiki.org/
#
# Copyright (C) 2005-2018 TWiki Contributor.
# Copyright (C) 2005 ILOG http://www.ilog.fr
# Copyright (C) 2008-2010 Foswiki Contributors.
# All Rights Reserved. TWiki Contributors are listed in the
# AUTHORS file in the root of this distribution.
# NOTE: Please extend that file, not this notice.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version. For
# more details read LICENSE in the root of the TWiki distribution.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# As per the GPL, removal of this notice is prohibited.
# The generator works by expanding an HTML parse tree to "decorated"
# text, where the decorators are non-printable characters. These characters
# act to express format requirements - for example, the need to have a
# newline before some text, or the need for a space. Whitespace is then
# collapsed down to the minimum that satisfies the format requirements.
=pod
---+ package TWiki::Plugins::WysiwygPlugin::HTML2TML::Node;
Object for storing a parsed HTML tag, and processing it
to generate TML from the parse tree.
See also TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf
=cut
package TWiki::Plugins::WysiwygPlugin::HTML2TML::Node;
use TWiki::Plugins::WysiwygPlugin::HTML2TML::Base;
our @ISA = qw( TWiki::Plugins::WysiwygPlugin::HTML2TML::Base );
use strict;
use warnings;
use TWiki::Func; # needed for regular expressions
use Assert;
use vars qw( $reww );
use TWiki::Plugins::WysiwygPlugin::Constants;
use TWiki::Plugins::WysiwygPlugin::HTML2TML::WC;
use HTML::Entities ();
my %jqueryChiliClass = map { $_ => 1 }
qw( cplusplus csharp css bash delphi html java js
lotusscript php-f php sql tml );
my %tml2htmlClass = map { $_ => 1 }
qw( WYSIWYG_PROTECTED WYSIWYG_STICKY TMLverbatim WYSIWYG_LINK
TMLhtml WYSIWYG_HIDDENWHITESPACE );
=pod
---++ ObjectMethod new( $context, $tag, \%attrs )
Construct a new HTML tag node using the given tag name
and attribute hash.
=cut
sub new {
my ( $class, $context, $tag, $attrs ) = @_;
my $this = {};
$this->{context} = $context;
$this->{tag} = $tag;
$this->{nodeType} = 2;
$this->{attrs} = {};
if ($attrs) {
foreach my $attr ( keys %$attrs ) {
$this->{attrs}->{ lc($attr) } = $attrs->{$attr};
}
}
$this->{head} = $this->{tail} = undef;
return bless( $this, $class );
}
# debug
sub stringify {
my ( $this, $shallow ) = @_;
my $r = '';
if ( $this->{tag} ) {
$r .= '<' . $this->{tag};
foreach my $attr ( sort keys %{ $this->{attrs} } ) {
$r .= " " . $attr . "='" . $this->{attrs}->{$attr} . "'";
}
$r .= ' /' if $WC::SELFCLOSING{ lc( $this->{tag} ) };
$r .= '>';
}
if ($shallow) {
$r .= '...';
}
else {
my $kid = $this->{head};
while ($kid) {
$r .= $kid->stringify();
$kid = $kid->{next};
}
}
if ( $this->{tag} and not $WC::SELFCLOSING{ lc( $this->{tag} ) } ) {
$r .= '' . $this->{tag} . '>';
}
return $r;
}
=pod
---++ ObjectMethod addChild( $node )
Add a child node to the ordered list of children of this node
=cut
sub addChild {
my ( $this, $node ) = @_;
ASSERT( $node != $this ) if DEBUG;
$node->{next} = undef;
$node->{parent} = $this;
my $kid = $this->{tail};
if ($kid) {
$kid->{next} = $node;
$node->{prev} = $kid;
}
else {
$node->{prev} = undef;
$this->{head} = $node;
}
$this->{tail} = $node;
}
# top and tail a string
sub _trim {
my $s = shift;
# Item5076: removed CHECKn from the following exprs, because loss of it
# breaks line-sensitive TML content inside flattened content.
$s =~ s/^[ \t\n$WC::CHECKw$WC::CHECKs]+/$WC::CHECKw/o;
$s =~ s/[ \t\n$WC::CHECKw]+$/$WC::CHECKw/o;
return $s;
}
# Both object method and static method
sub hasClass {
my ( $this, $class ) = @_;
return 0 unless $this;
if (
UNIVERSAL::isa(
$this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node'
)
)
{
return hasClass( $this->{attrs}, $class );
}
return 0 unless defined $this->{class};
return $this->{class} =~ /\b$class\b/ ? 1 : 0;
}
# Both object method and static method
sub _removeClass {
my ( $this, $class ) = @_;
return 0 unless $this;
if (
UNIVERSAL::isa(
$this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node'
)
)
{
return _removeClass( $this->{attrs}, $class );
}
return 0 unless hasClass( $this, $class );
$this->{class} =~ s/\b$class\b//;
$this->{class} =~ s/\s+/ /g;
$this->{class} =~ s/^\s+//;
$this->{class} =~ s/\s+$//;
if ( !$this->{class} ) {
delete $this->{class};
}
return 1;
}
# Both object method and static method
sub _addClass {
my ( $this, $class ) = @_;
if (
UNIVERSAL::isa(
$this, 'TWiki::Plugins::WysiwygPlugin::HTML2TML::Node'
)
)
{
_addClass( $this->{attrs}, $class );
return;
}
_removeClass( $this, $class ); # avoid duplication
if ( $this->{class} ) {
$this->{class} .= ' ' . $class;
}
else {
$this->{class} = $class;
}
}
# Move the content of $node into $this
sub _eat {
my ( $this, $node ) = @_;
my $kid = $this->{tail};
if ($kid) {
$kid->{next} = $node->{head};
if ( $node->{head} ) {
$node->{head}->{prev} = $kid;
}
}
else {
$this->{head} = $node->{head};
}
$this->{tail} = $node->{tail};
$kid = $node->{head};
while ($kid) {
$kid->{parent} = $this;
$kid = $kid->{next};
}
$node->{head} = $node->{tail} = undef;
}
=pod
---++ ObjectMethod rootGenerate($opts) -> $text
Generates TML from this HTML node. The generation is done
top down and bottom up, so that higher level nodes can make
decisions on whether to allow TML conversion in lower nodes,
and lower level nodes can constrain conversion in higher level
nodes.
$opts is a bitset. $WC::VERY_CLEAN will cause the generator
to drop unrecognised HTML (e.g. divs and spans that don't
generate TML)
=cut
sub rootGenerate {
my ( $this, $opts ) = @_;
#print STDERR "Raw [", WC::debugEncode($this->stringify()), "]\n\n";
$this->cleanParseTree();
#print STDERR "Cleaned [", WC::debugEncode($this->stringify()), "]\n\n";
# Perform some transformations on the parse tree
$this->_collapse();
#print STDERR "Collapsed [", WC::debugEncode($this->stringify()), "]\n\n";
my ( $f, $text ) = $this->generate($opts);
# Debug support
#print STDERR "Converted [",WC::debugEncode($text),"]\n";
# Move leading \n out of protected region. Delicate hack fix required to
# maintain TWiki variables at the start of lines.
$text =~ s/$WC::PON$WC::NBBR/$WC::CHECKn$WC::PON/g;
# isolate whitespace checks and convert to $NBSP
$text =~ s/$WC::CHECKw$WC::CHECKw+/$WC::CHECKw/go;
$text =~
s/([$WC::CHECKn$WC::CHECKs$WC::NBSP $WC::TAB$WC::NBBR]($WC::PON|$WC::POFF)?)$WC::CHECKw/$1/go;
$text =~
s/$WC::CHECKw(($WC::PON|$WC::POFF)?[$WC::CHECKn$WC::CHECKs$WC::NBSP $WC::NBBR])/$1/go;
$text =~ s/^($WC::CHECKw)+//gos;
$text =~ s/($WC::CHECKw)+$//gos;
$text =~ s/($WC::CHECKw)+/$WC::NBSP/go;
# isolate $CHECKs and convert to $NBSP
$text =~ s/$WC::CHECKs$WC::CHECKs+/$WC::CHECKs/go;
$text =~ s/([ $WC::NBSP$WC::TAB])$WC::CHECKs/$1/go;
$text =~ s/$WC::CHECKs( |$WC::NBSP)/$1/go;
$text =~ s/($WC::CHECKs)+/$WC::NBSP/go;
# SMELL: Removed per Item11859. This was done because TMCE used to
# insert a
before
... It doesn't do that in 3.4.9
#$text =~ s/
$WC::NBBR/$WC::NBBR/g; # Remove BR before P
#die "Converted ",WC::debugEncode($text),"\n";
#print STDERR "Conv2 [",WC::debugEncode($text),"]\n";
my @regions = split( /([$WC::PON$WC::POFF])/o, $text );
my $protect = 0;
$text = '';
foreach my $tml (@regions) {
if ( $tml eq $WC::PON ) {
$protect++;
next;
}
elsif ( $tml eq $WC::POFF ) {
$protect--;
next;
}
# isolate $NBBR and convert to \n.
unless ($protect) {
$tml =~ s/\n$WC::NBBR/$WC::NBBR$WC::NBBR/go;
$tml =~ s/$WC::NBBR\n/$WC::NBBR$WC::NBBR/go;
$tml =~ s/$WC::NBBR( |$WC::NBSP)+$WC::NBBR/$WC::NBBR$WC::NBBR/go;
$tml =~ s/ +$WC::NBBR/$WC::NBBR/go;
$tml =~ s/$WC::NBBR +/$WC::NBBR/go;
$tml =~ s/$WC::NBBR$WC::NBBR+/$WC::NBBR$WC::NBBR/go;
# Now convert adjacent NBBRs to recreate empty lines
# 1 NBBR -> 1 newline
# 2 NBBRs ->
... It doesn't do that in 3.4.9
# Item5127: Remove BR just before EOLs
#unless ($protect) {
# $tml =~ s/
\n/\n/g;
#}
#print STDERR " -> [",WC::debugEncode($tml),"]\n";
$text .= $tml;
}
# Collapse adjacent tags
# SMELL: Can't collapse verbatim based upon simple close/open compare
# because the previous opening verbatim tag might have different
# class from the next one.
foreach my $tag (qw(noautolink literal)) {
$text =~ s#$tag>(\s*)<$tag>#$1#gs;
}
# Top and tail, and terminate with a single newline
$text =~ s/^\n*//s;
$text =~ s/\s*$/\n/s;
#print STDERR "TML [",WC::debugEncode($text),"]\n";
return $text;
}
sub _compareClass {
my ( $node1, $node2 ) = @_;
my $n1Class = $node1->{attrs}->{class} || '';
my $n1Sort = join( ' ', sort( split( / /, $n1Class ) ) );
my $n2Class = $node2->{attrs}->{class} || '';
my $n2Sort = join( ' ', sort( split( / /, $n2Class ) ) );
return ( $n1Sort eq $n2Sort );
}
# collapse adjacent nodes together, if they share the same class
sub _collapseOneClass {
my $node = shift;
my $class = shift;
if ( defined( $node->{tag} ) && $node->hasClass($class) ) {
my $next = $node->{next};
my @edible;
my $collapsible;
while (
$next
&& (
( !$next->{tag} && $next->{text} =~ /^\s*$/ )
|| ( $node->{tag} eq $next->{tag}
&& $next->hasClass($class)
&& ( _compareClass( $node, $next ) ) )
)
)
{
push( @edible, $next );
$collapsible ||= $next->hasClass($class);
$next = $next->{next};
}
if ($collapsible) {
foreach my $meal (@edible) {
$meal->_remove();
if ( $meal->{tag} ) {
require TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf;
$node->addChild(
new TWiki::Plugins::WysiwygPlugin::HTML2TML::Leaf(
$WC::NBBR)
);
$node->_eat($meal);
}
}
}
}
}
# Collapse adjacent VERBATIM nodes together
# Collapse adjacent STICKY nodes together
# Collapse a
that contains only a protected span into a protected P
# Collapse em in em
# Collapse adjacent text nodes
sub _collapse {
my $this = shift;
my @jobs = ($this);
while ( scalar(@jobs) ) {
my $node = shift(@jobs);
# SMELL: Not sure if we really still have to collapse consecutive verbatim.
# Extra whitespace to separate verbatim blocks is removed, and they will
# still eventually be merged.
_collapseOneClass( $node, 'TMLverbatim' );
_collapseOneClass( $node, 'WYSIWYG_STICKY' );
if ( $node->{tag} eq 'p'
&& $node->{head}
&& $node->{head} == $node->{tail} )
{
my $kid = $node->{head};
if ( uc( $kid->{tag} ) eq 'SPAN'
&& $kid->hasClass('WYSIWYG_PROTECTED') )
{
$kid->_remove();
$node->_eat($kid);
$node->_addClass('WYSIWYG_PROTECTED');
}
}
# Pressing return in a "twikiDeleteMe" paragraph will cause the paragraph
# to be split into a 2nd paragraph with the same class. We only want to clean
# the first one in the blockquote, and preserve the rest without the class.
if ( $node->{tag} eq 'p'
&& $node->hasClass('twikiDeleteMe')
&& $node->{parent}
&& $node->{parent}->{tag} eq 'blockquote' )
{
my $next = $node->{next};
while ($next) {
if ( $next
&& $next->{tag} eq 'p'
&& $next->hasClass('twikiDeleteMe') )
{
$next->_removeClass('twikiDeleteMe');
}
$next = $next->{next};
}
$node->_inline();
}
# If this is an emphasis (b, i, code, tt, strong) then
# flatten out any child nodes that express the same emphasis.
# This has to be done because TWiki emphases are single level.
if ( $WC::EMPHTAG{ $node->{tag} } ) {
my $kid = $node->{head};
while ($kid) {
if ( $WC::EMPHTAG{ $kid->{tag} }
&& $WC::EMPHTAG{ $kid->{tag} } eq
$WC::EMPHTAG{ $node->{tag} } )
{
$kid = $kid->_inline();
}
else {
$kid = $kid->{next};
}
}
}
$node->_combineLeaves();
my $kid = $node->{head};
while ($kid) {
push( @jobs, $kid );
$kid = $kid->{next};
}
}
}
# If this node has the specified class, insert a new "span" node with that
# class between this node and all of this node's children.
sub _moveClassToSpan {
my $this = shift;
my $class = shift;
if ( $this->{tag}
and lc( $this->{tag} ) ne 'span'
and $this->_removeClass($class) )
{
my %new_attrs = ( class => $class );
$new_attrs{style} = $this->{attrs}->{style}
if exists $this->{attrs}->{style};
my $newspan =
new TWiki::Plugins::WysiwygPlugin::HTML2TML::Node( $this->{context},
'span', \%new_attrs );
my $kid = $this->{head};
while ($kid) {
$newspan->addChild($kid);
$kid = $kid->{next};
}
$this->{head} = $this->{tail} = $newspan;
}
}
# the actual generate function. rootGenerate is only applied to the root node.
sub generate {
my ( $this, $options ) = @_;
my $fn;
my $flags;
my $text;
if ( $this->_isProtectedByAttrs() ) {
return $this->_defaultTag($options);
}
if ( $this->hasClass('TMLhtml') ) {
return $this->_defaultTag( $options & ~$WC::VERY_CLEAN );
}
my $tag = $this->{tag};
if ( $this->hasClass('WYSIWYG_LITERAL') ) {
if ( $tag eq 'div' || $tag eq 'p' || $tag eq 'span' ) {
$text = '';
my $kid = $this->{head};
while ($kid) {
$text .= $kid->stringify();
$kid = $kid->{next};
}
}
else {
$this->_removeClass('WYSIWYG_LITERAL');
$text = $this->stringify();
}
return ( 0, '
|
|$WC::NBSP|$WC::NBBR|$WC::CHECKn|$WC::CHECKs|$WC::CHECKw|$WC::CHECK1|$WC::CHECK2|$WC::TAB|\s)+$//so;
return $td;
}
# probe down into a table row to determine if the
# containing table can be converted to TML.
sub _isConvertableTableRow {
my ( $this, $options, $rowspan ) = @_;
return 0 if ( $this->_isProtectedByAttrs() );
my ( $flags, $text );
my @row;
my $ignoreCols = 0;
my $kid = $this->{head};
my $colIdx = 0;
while ( $rowspan and $rowspan->[$colIdx] ) {
push @row, $WC::NBSP . '^' . $WC::NBSP;
$rowspan->[$colIdx]--;
$colIdx++;
}
while ($kid) {
if ( $kid->{tag} eq 'th' ) {
$kid->_removePWrapper();
$kid->_moveClassToSpan('WYSIWYG_TT');
$kid->_moveClassToSpan('WYSIWYG_COLOR');
( $flags, $text ) = $kid->_flatten( $options | $WC::IN_TABLE );
$text = _TDtrim($text);
$text = "*$text*" if length($text);
}
elsif ( $kid->{tag} eq 'td' ) {
$kid->_removePWrapper();
$kid->_moveClassToSpan('WYSIWYG_TT');
$kid->_moveClassToSpan('WYSIWYG_COLOR');
( $flags, $text ) = $kid->_flatten( $options | $WC::IN_TABLE );
$text = _TDtrim($text);
}
elsif ( !$kid->{tag} ) {
$kid = $kid->{next};
next;
}
else {
# some other sort of (unexpected) tag
return 0;
}
return 0 if ( $flags & $WC::BLOCK_TML );
if ( $kid->{attrs} ) {
my $a = _deduceAlignment($kid);
if ( $text && $a eq 'right' ) {
$text = $WC::NBSP . $text;
}
elsif ( $text && $a eq 'center' ) {
$text = $WC::NBSP . $text . $WC::NBSP;
}
elsif ( $text && $a eq 'left' ) {
$text .= $WC::NBSP;
}
if ( $kid->{attrs}->{rowspan} && $kid->{attrs}->{rowspan} > 1 ) {
return 0 unless $rowspan;
$rowspan->[$colIdx] = $kid->{attrs}->{rowspan} - 1;
}
}
$text =~ s/ /$WC::NBSP/g;
$text =~ s/ /$WC::NBSP/g;
#if (--$ignoreCols > 0) {
# # colspanned
# $text = '';
#} els
if ( $text =~ /^$WC::NBSP*$/ ) {
$text = $WC::NBSP;
}
else {
$text = $WC::NBSP . $text . $WC::NBSP;
}
if ( $kid->{attrs}
&& $kid->{attrs}->{colspan}
&& $kid->{attrs}->{colspan} > 1 )
{
$ignoreCols = $kid->{attrs}->{colspan};
}
# Pad to allow wikiwords to work
push( @row, $text );
$colIdx++;
while ( $ignoreCols > 1 ) {
if ( $rowspan and $rowspan->[$colIdx] ) {
# rowspan and colspan into the same cell
return 0;
}
push( @row, '' );
$ignoreCols--;
$colIdx++;
}
while ( $rowspan and $rowspan->[$colIdx] ) {
push @row, $WC::NBSP . '^' . $WC::NBSP;
$rowspan->[$colIdx]--;
$colIdx++;
}
$kid = $kid->{next};
}
return \@row;
}
# Remove the P tag from a table cell when it surrounds the whole content
# These "wrapper P tags" come from TMCE, when you press Enter
# in a table cell. They are impossible to remove in TMCE itself
# and they mess up the vertical alignment of table text.
sub _removePWrapper {
my $this = shift;
# Find the first kid that is a tag,
# keeping track of any content before it
my $kid = $this->{head};
my $leadingContent = '';
while ( $kid->{next} and not $kid->{tag} ) {
$leadingContent .= $kid->{text};
$kid = $kid->{next};
}
# If there are no enclosed tags, then there is nothing further to do
return unless $kid;
return unless $kid->{tag};
# If there is something (non-whitespace) before the first tag,
# then there is nothing further to do
return if $leadingContent =~ /\S/;
# This is the first node (tag)
my $firstNodeKid = $kid;
# Find the last kid that is a tag,
# keeping track of any content after it
$kid = $this->{tail};
my $trailingContent = '';
while ( $kid->{prev} and not $kid->{tag} ) {
$trailingContent .= $kid->{text};
$kid = $kid->{prev};
}
# Note that there is at least one kid that is a node (tag)
# so the checks here are for safety's sake
ASSERT($kid) if DEBUG;
ASSERT( $kid->{tag} ) if DEBUG;
return unless $kid;
return unless $kid->{tag};
# If there is something (non-whitespace) after the last tag,
# then there is nothing further to do
return if $trailingContent =~ /\S/;
# This is the last node (tag)
my $lastNodeKid = $kid;
# If there are multiple kids that are nodes (tags)
# then there is no "wrapper" tag to be removed
return unless $firstNodeKid eq $lastNodeKid;
# There is only a problem if the surrounding tag is a
tag
return unless uc( $firstNodeKid->{tag} ) eq 'P';
$firstNodeKid->_remove();
# Check if the tag has attributes
if ( keys %{ $firstNodeKid->{attrs} } ) {
# Replace the wrapper P tag with a span
my $newspan =
new TWiki::Plugins::WysiwygPlugin::HTML2TML::Node( $this->{context},
'span', $firstNodeKid->{attrs} );
$newspan->_eat($firstNodeKid);
$this->addChild($newspan);
}
else {
# Remove the wrapper P tag
$this->_eat($firstNodeKid);
}
}
# Work out the alignment of a table cell from the style and/or class
sub _deduceAlignment {
my $td = shift;
if ( $td->{attrs}->{align} ) {
return lc( $td->{attrs}->{align} );
}
else {
if ( $td->{attrs}->{style}
&& $td->{attrs}->{style} =~ /text-align\s*:\s*(left|right|center)/ )
{
return $1;
}
if ( $td->hasClass(qr/align-(left|right|center)/) ) {
return $1;
}
}
return '';
}
# convert a heading tag
sub _H {
my ( $this, $options, $depth ) = @_;
my ( $flags, $contents ) = $this->_flatten($options);
return ( 0, undef )
if ( ( $flags & $WC::BLOCK_TML )
|| ( $flags & $WC::IN_TABLE ) );
my $notoc = '';
if ( $this->hasClass('notoc') ) {
$notoc = '!!';
}
my $indicator = '+';
if ( $this->hasClass('numbered') ) {
$indicator = '#';
}
$contents =~ s/^\s+/ /;
$contents =~ s/\s+$//;
my $res =
$WC::CHECKn . '---'
. ( $indicator x $depth )
. $notoc
. $WC::CHECKs
. $contents
. $WC::CHECKn;
return ( $flags | $WC::BLOCK_TML, $res );
}
# generate an emphasis
sub _emphasis {
my ( $this, $options, $ch ) = @_;
my ( $flags, $contents ) = $this->_flatten( $options | $WC::NO_BLOCK_TML );
return ( 0, undef )
if ( !defined($contents) || ( $flags & $WC::BLOCK_TML ) );
# Remove whitespace from either side of the contents, retaining the
# whitespace
$contents =~ s/ /$WC::NBSP/go;
$contents =~ s/ /$WC::NBSP/go;
$contents =~ /^($WC::WS)(.*?)($WC::WS)$/s;
my ( $pre, $post ) = ( $1, $3 );
$contents = $2;
return ( 0, undef ) if ( $contents =~ /^ || $contents =~ />$/ );
return ( 0, '' ) unless ( $contents =~ /\S/ );
# Now see if we can collapse the emphases
if ( $ch eq '_' && $contents =~ s/^\*(.*)\*$/$1/
|| $ch eq '*' && $contents =~ s/^_(?!_)(.*)(?_checkBeforeEmphasis();
my $ae = $this->_checkAfterEmphasis();
return ( 0, undef ) unless $ae && $be;
return ( $flags,
$pre . $WC::CHECK1 . $ch . $contents . $ch . $WC::CHECK2 . $post );
}
sub isBlockNode {
my $node = shift;
return ( $node->{tag}
&& $node->{tag} =~
/^(address|blockquote|center|dir|div|dl|fieldset|form|h\d|hr|isindex|menu|noframes|noscript|ol|p|pre|table|ul)$/
);
}
sub previousLeaf {
my $node = shift;
if ( !$node ) {
return;
}
do {
while ( !$node->{prev} ) {
if ( !$node->{parent} ) {
return; # can't go any further back
}
$node = $node->{parent};
}
$node = $node->{prev};
while ( !$node->isTextNode() ) {
$node = $node->{tail};
}
} while ( !$node->isTextNode() );
return $node;
}
# Test for /^|(?<=[\s\(])/ at the end of the leaf node before.
sub _checkBeforeEmphasis {
my ($this) = @_;
my $tb = $this->previousLeaf();
return 1 unless $tb;
return 1 if ( $tb->isBlockNode() );
return 1 if ( $tb->{nodeType} == 3 && $tb->{text} =~ /[\s(*_=]$/ );
return 0;
}
sub nextLeaf {
my $node = shift;
if ( !$node ) {
return;
}
do {
while ( !$node->{next} ) {
if ( !$node->{parent} ) {
return; # end of the road
}
$node = $node->{parent};
if ( $node->isBlockNode() ) {
# leaving this $node
return $node;
}
}
$node = $node->{next};
while ( !$node->isTextNode() ) {
$node = $node->{head};
}
} while ( !$node->isTextNode() );
return $node;
}
# Test for /$|(?=[\s,.;:!?)])/ at the start of the leaf node after.
sub _checkAfterEmphasis {
my ($this) = @_;
my $tb = $this->nextLeaf();
return 1 unless $tb;
return 1 if ( $tb->isBlockNode() );
return 1 if ( $tb->{nodeType} == 3 && $tb->{text} =~ /^[\s,.;:!?)*_=]/ );
return 0;
}
# generate verbatim for P, SPAN or PRE
sub _verbatim {
my ( $this, $tag, $options ) = @_;
$options |= $WC::PROTECTED | $WC::KEEP_ENTITIES | $WC::BR2NL | $WC::KEEP_WS;
my ( $flags, $text ) = $this->_flatten($options);
# decode once, and once only. This will decode only those
# entities than have a representation in the site charset.
WC::decodeRepresentableEntities($text);
# decodes to \240, which we want to make a space.
_encodeNbsp($text);
my $p = _htmlParams( $this->{attrs}, $options );
return ( $flags, "<$tag$p>$text$tag>" );
}
# pseudo-tags that may leak through in Macros
# We have to handle this to avoid a matching close tag
sub _handleNOP {
my ( $this, $options ) = @_;
my ( $flags, $text ) = $this->_flatten($options);
return ( $flags, ' ' . $kids . '
. These are:
# 1. We haven't explicitly been told to convert to \n (by BR2NL)
# 2. We have been told that block TML is illegal
# 3. The previous node is an inline element node or text node
# 4. The next node is an inline element or text node
my $sep = "\n";
if ( $options & $WC::BR2NL ) {
}
elsif ( $options & $WC::NO_BLOCK_TML ) {
$sep = '
';
}
elsif ( $this->prevIsInline() ) {
if ( $this->isInline() ) {
# Both
and cause a NL
# if this is empty, look at next
if ( $kids !~ /^[\000-\037]*$/ && $kids !~ /^[\000-\037]*$WC::NBBR/
|| $this->nextIsInline() )
{
$sep = '
';
}
}
}
return ( $f, $sep . $kids );
}
sub _handleCAPTION { return ( 0, '' ); }
# CENTER
# CITE
sub _handleCODE { return _emphasis( @_, '=' ); }
sub _handleCOL { return _flatten(@_); }
sub _handleCOLGROUP { return _flatten(@_); }
sub _handleDD { return _flatten(@_); }
sub _handleDFN { return _flatten(@_); }
# DIR
sub _handleDIV {
my ( $this, $options ) = @_;
if ( ( $options & $WC::NO_BLOCK_TML )
|| !$this->_isConvertableIndent( $options | $WC::NO_BLOCK_TML ) )
{
return $this->_handleP($options);
}
return ( $WC::BLOCK_TML, $this->_convertIndent($options) );
}
sub _handleDL { return _LIST(@_); }
sub _handleDT { return _flatten(@_); }
sub _handleEM { return _emphasis( @_, '_' ); }
sub _handleFIELDSET { return _flatten(@_); }
sub _handleFONT {
my ( $this, $options ) = @_;
my %atts = %{ $this->{attrs} };
# Try to convert font tags into %COLOUR%..%ENDCOLOR%
# First extract the colour from a style= param, if we can.
my $colour;
if ( defined $atts{style}
&& $atts{style} =~ s/(^|\s|;)color\s*:\s*(#?\w+)\s*(;|$)// )
{
$colour = $2;
}
# override it with a color= param, if there is one.
if ( defined $atts{color} ) {
$colour = $atts{color};
}
# The presence of the WYSIWYG_COLOR class _forces_ the tag to be
# converted to a TWiki colour macro, as long as the colour is
# recognised.
if ( hasClass( \%atts, 'WYSIWYG_COLOR' ) ) {
my $percentColour = $WC::HTML2TML_COLOURMAP{ uc($colour) };
if ( defined $percentColour ) {
# All other font information will be lost.
my ( $f, $kids ) = $this->_flatten($options);
return ( $f, '%' . $percentColour . '%' . $kids . '%ENDCOLOR%' );
}
}
# May still be able to convert if there is no other font information.
delete $atts{class} if defined $atts{class} && $atts{class} =~ /^\s*$/;
delete $atts{style} if defined $atts{style} && $atts{style} =~ /^[\s;]*$/;
delete $atts{color} if defined $atts{color};
if ( defined $colour && !scalar keys %atts ) {
my $percentColour = $WC::HTML2TML_COLOURMAP{ uc($colour) };
if ( defined $percentColour ) {
my ( $f, $kids ) = $this->_flatten($options);
return ( $f, '%' . $percentColour . '%' . $kids . '%ENDCOLOR%' );
}
}
# Either the colour can't be mapped, or we can't do the conversion
# without loss of information
return ( 0, undef );
}
# FORM
sub _handleFRAME { return _flatten(@_); }
sub _handleFRAMESET { return _flatten(@_); }
sub _handleHEAD { return ( 0, '' ); }
sub _handleHR {
my ( $this, $options ) = @_;
my ( $f, $kids ) = $this->_flatten($options);
return ( $f, '
' . $kids ) if ( $options & $WC::NO_BLOCK_TML );
my $dashes = 3;
if ( $this->{attrs}->{style}
and $this->{attrs}->{style} =~ s/\bnumdashes\s*:\s*(\d+)\b// )
{
$dashes = $1;
$dashes = 3 if $dashes < 3;
$dashes = 160 if $dashes > 160; # Filter out probably-bad data
}
return ( $f | $WC::BLOCK_TML,
$WC::CHECKn . ( '-' x $dashes ) . $WC::CHECKn . $kids );
}
sub _handleHTML { return _flatten(@_); }
sub _handleH1 { return _H( @_, 1 ); }
sub _handleH2 { return _H( @_, 2 ); }
sub _handleH3 { return _H( @_, 3 ); }
sub _handleH4 { return _H( @_, 4 ); }
sub _handleH5 { return _H( @_, 5 ); }
sub _handleH6 { return _H( @_, 6 ); }
sub _handleI { return _emphasis( @_, '_' ); }
sub _handleIMG {
my ( $this, $options ) = @_;
# Hack out mce_src, which is TinyMCE-specific and causes indigestion
# when the topic is reloaded
delete $this->{attrs}->{mce_src} if defined $this->{attrs}->{mce_src};
if ( $this->{context} && $this->{context}->{rewriteURL} ) {
my $href = $this->{attrs}->{src};
# decode URL params in the href
$href =~ s/%([0-9A-F]{2})/chr(hex($1))/gei;
$href = &{ $this->{context}->{rewriteURL} }( $href, $this->{context} );
$this->{attrs}->{src} = $href;
}
return ( 0, undef )
unless $this->{context}
&& defined $this->{context}->{convertImage};
my $alt =
&{ $this->{context}->{convertImage} }
( $this->{attrs}->{src}, $this->{context} );
if ($alt) {
return ( 0, $alt );
}
return ( 0, undef );
}
# INPUT
# INS
# ISINDEX
sub _handleKBD { return _handleTT(@_); }
# LABEL
# LI
sub _handleLINK { return ( 0, '' ); }
# MAP
# MENU
sub _handleMETA { return ( 0, '' ); }
sub _handleNOFRAMES { return ( 0, '' ); }
sub _handleNOSCRIPT { return ( 0, '' ); }
sub _handleOL { return _LIST(@_); }
# OPTGROUP
# OPTION
sub _handleP {
my ( $this, $options ) = @_;
my $nbnl = $this->hasClass('WYSIWYG_NBNL');
if ( $this->hasClass('WYSIWYG_WARNING') ) {
return ( 0, '' );
}
if ( $this->hasClass('TMLverbatim') ) {
return $this->_verbatim( 'verbatim', $options );
}
if ( $this->hasClass('WYSIWYG_STICKY') ) {
return $this->_verbatim( 'sticky', $options );
}
my ( $f, $kids ) = $this->_flatten($options);
return ( $f, '