Add Spaces Revisited


Revisiting the add_nbsps() and add_spaces() function patterns. It gets activated by the show_spaces option in the config as well as in the usersettings.


What it should do and what not. Now we have Unicode and some patterns may be useful only in a certain context, here we can set different patterns for Chinese if we have to.
I build a test action and feed this action with different test data to show what it does on each pattern iteration.


Additional we should define a set of tags to determine in which context the result should differ.


  • Latin
  • Cyrillic
  • Chinese
  • Farsi
  • Hindi

Example tags:

日本語123
ABCD123
Abcd123
AbCd123
abcd123
123日本語
123ABCD
123Abcd
123AbCd
123abcd

Test Action


action/test_addspaces.php

<?php

if (!defined('IN_WACKO'))
{
    exit;
}

$info = <<<EOD
Description:
    Shows tags with applied show_spaces setting.

Usage:
    {{test_addspaces}}

Options:
    [set="tag1, tag2, ..."]
        a comma-delimited list of tag names
    [debug=0|1]
        shows result for each processed pattern
EOD;

// set defaults
$help        ??= 0;
$set        ??= '';
$debug        ??= 0;

if ($help)
{
    $tpl->help    = $this->help($info, 'blog');
    return;
}

if ($set)
{
    $_tags    = explode(',', $set);
    $tags    = array_map('trim', $_tags);
    $tags    = array_map($this->sanitize_page_tag, $tags);
}

if (!$tags)
{
    $tags = [
        '日本語123',
        'ABCD123',
        'Abcd123',
        'AbCd123',
        'abcd123',
        '123日本語',
        '123ABCD',
        '123Abcd',
        '123AbCd',
        '123abcd',
    ];
}

# Ut::debug_print_r($tags);

$addspaces = function ($text, $debug) use ($tpl)
{
    $i = 1;
    $patterns =[
        ['(' . self::PATTERN['ALPHANUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')',                                                 '\\1' . NBSP . '\\2'],
        ['(' . self::PATTERN['UPPERNUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')',                                                '\\1' . NBSP . '\\2'],
        ['(' . self::PATTERN['ALPHANUM'] . ')\/',                                                                                '\\1' . NBSP . '/'],
        ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . self::PATTERN['UPPERNUM'] . ')',    '\\1'],
        ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . '\/)',                                '\\1'],
        ['\/(' . self::PATTERN['ALPHANUM'] . ')',                                                                                '/' . NBSP . '\\1'],
        ['(' . self::PATTERN['UPPERNUM'] . ')' . NBSP . '(' . self::PATTERN['UPPERNUM'] . ')($|\b)',                            '\\1\\2'],
        # ['(\d)(' . self::PATTERN['ALPHA'] . ')',                                                                                '\\1' . NBSP . '\\2'],
        # ['(' . self::PATTERN['ALPHA'] . ')(\d)',                                                                                '\\1' . NBSP . '\\2'],
        # ['(\d)' . NBSP . '(?=\d)',                                                                                            '\\1'],
        ['(\d)' . NBSP . '(?!' . self::PATTERN['ALPHA'] . ')',                                                                    '\\1'],
    ];

    $tpl->enter($debug ? 'debug_' : 'result_');
    $tpl->text = $text;
    $tpl->enter('l_');

    foreach ($patterns as $pattern)
    {
        $text = preg_replace('/' . $pattern[0] . '/u', $pattern[1], $text);

        if ($debug)
        {
            $tpl->n            = $i;
            $tpl->pattern0    = $pattern[0];
            $tpl->pattern1    = $pattern[1];
            $tpl->text        = $text;

            $i++;
        }
    }

    $tpl->leave();
    $tpl->result = $text;
    $tpl->leave();
};

foreach ($tags as $tag)
{
    $this->sanitize_page_tag($tag);
    $addspaces($tag, $debug);
}

action/template/test_addspaces.tpl

[ === main === ]
	[ ' help ' ]
	<table class="hl-line">
		[= debug _ =
			<tr>
				<th>Pattern</th>
				<td></td>
				<td></td>
				<td><strong>[ ' text | e ' ]:</strong></td>
			</tr>
			[= l _ =
				<tr>
					<td>[ ' n ' ]. <code>[ ' pattern0 | e ' ]</code></td>
					<td>⇨ <code>[ ' pattern1 | e ' ]</code></td>
					<td>&nbsp;&nbsp;↳&nbsp;&nbsp;</td>
					<td>[ ' text | e ' ]</td>
				</tr>
			=]
			<tr>
				<td colspan=4><hr><br></td>
			</tr>
		=]
		[= result _ =
			<tr>
				<td>[ ' text | e ' ]</td>
				<td>&nbsp;&nbsp;→&nbsp;&nbsp;</td>
				<td>[ ' result | e ' ]</td>
			</tr>
		=]
	</table>	

4.2

<?php

$text = preg_replace("/(".$this->language["ALPHANUM"].")(".$this->language["UPPERNUM"].")/","\\1&nbsp;\\2",$text);
$text = preg_replace("/(".$this->language["UPPERNUM"].")(".$this->language["UPPERNUM"].")/","\\1&nbsp;\\2",$text);
$text = preg_replace("/(".$this->language["ALPHANUM"].")\//","\\1&nbsp;/",$text);
$text = preg_replace("/(".$this->language["UPPER"].")&nbsp;(?=".$this->language["UPPER"]."&nbsp;".$this->language["UPPERNUM"].")/","\\1",$text);
$text = preg_replace("/(".$this->language["UPPER"].")&nbsp;(?=".$this->language["UPPER"]."&nbsp;\/)/","\\1",$text);
$text = preg_replace("/\/(".$this->language["ALPHANUM"].")/","/&nbsp;\\1",$text);
$text = preg_replace("/(".$this->language["UPPERNUM"].")&nbsp;(".$this->language["UPPERNUM"].")($|\b)/","\\1\\2",$text);
$text = preg_replace("/([0-9])(".$this->language["ALPHA"].")/","\\1&nbsp;\\2",$text);
$text = preg_replace("/(".$this->language["ALPHA"].")([0-9])/","\\1&nbsp;\\2",$text);
$text = preg_replace("/([0-9])&nbsp;(?=[0-9])/","\\1",$text);

6.1

<?php

$patterns =[
    ['(' . self::PATTERN['ALPHANUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')',                                                 '\\1' . NBSP . '\\2'],
    ['(' . self::PATTERN['UPPERNUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')',                                                '\\1' . NBSP . '\\2'],
    ['(' . self::PATTERN['ALPHANUM'] . ')\/',                                                                                '\\1' . NBSP . '/'],
    ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . self::PATTERN['UPPERNUM'] . ')',    '\\1'],
    ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . '\/)',                                '\\1'],
    ['\/(' . self::PATTERN['ALPHANUM'] . ')',                                                                                '/' . NBSP . '\\1'],
    ['(' . self::PATTERN['UPPERNUM'] . ')' . NBSP . '(' . self::PATTERN['UPPERNUM'] . ')($|\b)',                            '\\1\\2'],
    ['(\d)' . NBSP . '(?!' . self::PATTERN['ALPHA'] . ')',                                                                    '\\1'],
];