View source for Add Spaces Revisited

{{toc}}

Revisiting the ##add_nbsps()## and ##add_spaces()## function patterns. It gets activated by the ##show_spaces## option in the config as well as in the usersettings.

What it should do and what not. Now we have Unicode and some patterns may be useful only in a certain context, here we can set different patterns for Chinese if we have to.
I build a test action and feed this action with different test data to show what it does on each pattern iteration.

Additional we should define a set of tags to determine in which context the result should differ.

  * Latin
  * Cyrillic
  * Chinese
  * Farsi
  * Hindi

Example tags:
	日本語123
	ABCD123
	Abcd123
	AbCd123
	abcd123
	123日本語
	123ABCD
	123Abcd
	123AbCd
	123abcd

===Test Action===

action/test_addspaces.php
%%(php)
<?php

if (!defined('IN_WACKO'))
{
	exit;
}

$info = <<<EOD
Description:
	Shows tags with applied show_spaces setting.

Usage:
	{{test_addspaces}}

Options:
	[set="tag1, tag2, ..."]
		a comma-delimited list of tag names
	[debug=0|1]
		shows result for each processed pattern
EOD;

// set defaults
$help		??= 0;
$set		??= '';
$debug		??= 0;

if ($help)
{
	$tpl->help	= $this->help($info, 'blog');
	return;
}

if ($set)
{
	$_tags	= explode(',', $set);
	$tags	= array_map('trim', $_tags);
	$tags	= array_map($this->sanitize_page_tag, $tags);
}

if (!$tags)
{
	$tags = [
		'日本語123',
		'ABCD123',
		'Abcd123',
		'AbCd123',
		'abcd123',
		'123日本語',
		'123ABCD',
		'123Abcd',
		'123AbCd',
		'123abcd',
	];
}

# Ut::debug_print_r($tags);

$addspaces = function ($text, $debug) use ($tpl)
{
	$i = 1;
	$patterns =[
		['(' . self::PATTERN['ALPHANUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')', 												'\\1' . NBSP . '\\2'],
		['(' . self::PATTERN['UPPERNUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')',												'\\1' . NBSP . '\\2'],
		['(' . self::PATTERN['ALPHANUM'] . ')\/',																				'\\1' . NBSP . '/'],
		['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . self::PATTERN['UPPERNUM'] . ')',	'\\1'],
		['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . '\/)',								'\\1'],
		['\/(' . self::PATTERN['ALPHANUM'] . ')',																				'/' . NBSP . '\\1'],
		['(' . self::PATTERN['UPPERNUM'] . ')' . NBSP . '(' . self::PATTERN['UPPERNUM'] . ')($|\b)',							'\\1\\2'],
		# ['(\d)(' . self::PATTERN['ALPHA'] . ')',																				'\\1' . NBSP . '\\2'],
		# ['(' . self::PATTERN['ALPHA'] . ')(\d)',																				'\\1' . NBSP . '\\2'],
		# ['(\d)' . NBSP . '(?=\d)',																							'\\1'],
		['(\d)' . NBSP . '(?!' . self::PATTERN['ALPHA'] . ')',																	'\\1'],
	];

	$tpl->enter($debug ? 'debug_' : 'result_');
	$tpl->text = $text;
	$tpl->enter('l_');

	foreach ($patterns as $pattern)
	{
		$text = preg_replace('/' . $pattern[0] . '/u', $pattern[1], $text);

		if ($debug)
		{
			$tpl->n			= $i;
			$tpl->pattern0	= $pattern[0];
			$tpl->pattern1	= $pattern[1];
			$tpl->text		= $text;

			$i++;
		}
	}

	$tpl->leave();
	$tpl->result = $text;
	$tpl->leave();
};

foreach ($tags as $tag)
{
	$this->sanitize_page_tag($tag);
	$addspaces($tag, $debug);
}
%%
action/template/test_addspaces.tpl
%%
[ === main === ]
	[ ' help ' ]
	<table class="hl-line">
		[= debug _ =
			<tr>
				<th>Pattern</th>
				<td></td>
				<td></td>
				<td><strong>[ ' text | e ' ]:</strong></td>
			</tr>
			[= l _ =
				<tr>
					<td>[ ' n ' ]. <code>[ ' pattern0 | e ' ]</code></td>
					<td>⇨ <code>[ ' pattern1 | e ' ]</code></td>
					<td>  ↳  </td>
					<td>[ ' text | e ' ]</td>
				</tr>
			=]
			<tr>
				<td colspan=4><hr><br></td>
			</tr>
		=]
		[= result _ =
			<tr>
				<td>[ ' text | e ' ]</td>
				<td>  →  </td>
				<td>[ ' result | e ' ]</td>
			</tr>
		=]
	</table>
%%

===4.2===
%%(php)
<?php

$text = preg_replace("/(".$this->language["ALPHANUM"].")(".$this->language["UPPERNUM"].")/","\\1 \\2",$text);
$text = preg_replace("/(".$this->language["UPPERNUM"].")(".$this->language["UPPERNUM"].")/","\\1 \\2",$text);
$text = preg_replace("/(".$this->language["ALPHANUM"].")\//","\\1 /",$text);
$text = preg_replace("/(".$this->language["UPPER"].") (?=".$this->language["UPPER"]." ".$this->language["UPPERNUM"].")/","\\1",$text);
$text = preg_replace("/(".$this->language["UPPER"].") (?=".$this->language["UPPER"]." \/)/","\\1",$text);
$text = preg_replace("/\/(".$this->language["ALPHANUM"].")/","/ \\1",$text);
$text = preg_replace("/(".$this->language["UPPERNUM"].") (".$this->language["UPPERNUM"].")($|\b)/","\\1\\2",$text);
$text = preg_replace("/([0-9])(".$this->language["ALPHA"].")/","\\1 \\2",$text);
$text = preg_replace("/(".$this->language["ALPHA"].")([0-9])/","\\1 \\2",$text);
$text = preg_replace("/([0-9]) (?=[0-9])/","\\1",$text);
%%

===6.1===
%%(php)
<?php

$patterns =[
	['(' . self::PATTERN['ALPHANUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')', 												'\\1' . NBSP . '\\2'],
	['(' . self::PATTERN['UPPERNUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')',												'\\1' . NBSP . '\\2'],
	['(' . self::PATTERN['ALPHANUM'] . ')\/',																				'\\1' . NBSP . '/'],
	['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . self::PATTERN['UPPERNUM'] . ')',	'\\1'],
	['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . '\/)',								'\\1'],
	['\/(' . self::PATTERN['ALPHANUM'] . ')',																				'/' . NBSP . '\\1'],
	['(' . self::PATTERN['UPPERNUM'] . ')' . NBSP . '(' . self::PATTERN['UPPERNUM'] . ')($|\b)',							'\\1\\2'],
	['(\d)' . NBSP . '(?!' . self::PATTERN['ALPHA'] . ')',																	'\\1'],
];
%%