Add Spaces Revisited


Revisiting the add_nbsps() and add_spaces() function patterns. It gets activated by the show_spaces option in the config as well as in the usersettings.


What it should do and what not. Now we have Unicode and some patterns may be useful only in a certain context, here we can set different patterns for Chinese if we have to.
I build a test action and feed this action with different test data to show what it does on each pattern iteration.


Additional we should define a set of tags to determine in which context the result should differ.


  • Latin
  • Cyrillic
  • Chinese
  • Farsi
  • Hindi

Example tags:

日本語123
ABCD123
Abcd123
AbCd123
abcd123
123日本語
123ABCD
123Abcd
123AbCd
123abcd

1. Test Action


action/test_addspaces.php


<?php

if (!defined('IN_WACKO'))
{
    exit;
}

$info = <<<EOD
Description:
    Shows tags with applied show_spaces setting.

Usage:
    {{test_addspaces}}

Options:
    [set="tag1, tag2, ..."]
        a comma-delimited list of tag names
    [debug=0|1]
        shows result for each processed pattern
EOD;

// set defaults
$help        ??= 0;
$set        ??= '';
$debug        ??= 0;

if (
$help)
{
    
$tpl->help    $this->help($info'blog');
    return;
}

if (
$set)
{
    
$_tags    explode(','$set);
    
$tags    array_map('trim'$_tags);
    
$tags    array_map($this->sanitize_page_tag$tags);
}

if (!
$tags)
{
    
$tags = [
        
'日本語123',
        
'ABCD123',
        
'Abcd123',
        
'AbCd123',
        
'abcd123',
        
'123日本語',
        
'123ABCD',
        
'123Abcd',
        
'123AbCd',
        
'123abcd',
    ];
}

# Ut::debug_print_r($tags);

$addspaces = function ($text$debug) use ($tpl)
{
    
$i 1;
    
$patterns =[
        [
'(' self::PATTERN['ALPHANUM'] . ')(' self::PATTERN['UPPERNUM'] . ')',                                                 '\\1' NBSP '\\2'],
        [
'(' self::PATTERN['UPPERNUM'] . ')(' self::PATTERN['UPPERNUM'] . ')',                                                '\\1' NBSP '\\2'],
        [
'(' self::PATTERN['ALPHANUM'] . ')\/',                                                                                '\\1' NBSP '/'],
        [
'(' self::PATTERN['UPPER'] . ')' NBSP '(?=' self::PATTERN['UPPER'] . NBSP self::PATTERN['UPPERNUM'] . ')',    '\\1'],
        [
'(' self::PATTERN['UPPER'] . ')' NBSP '(?=' self::PATTERN['UPPER'] . NBSP '\/)',                                '\\1'],
        [
'\/(' self::PATTERN['ALPHANUM'] . ')',                                                                                '/' NBSP '\\1'],
        [
'(' self::PATTERN['UPPERNUM'] . ')' NBSP '(' self::PATTERN['UPPERNUM'] . ')($|\b)',                            '\\1\\2'],
        
# ['(\d)(' . self::PATTERN['ALPHA'] . ')',                                                                                '\\1' . NBSP . '\\2'],
        # ['(' . self::PATTERN['ALPHA'] . ')(\d)',                                                                                '\\1' . NBSP . '\\2'],
        # ['(\d)' . NBSP . '(?=\d)',                                                                                            '\\1'],
        
['(\d)' NBSP '(?!' self::PATTERN['ALPHA'] . ')',                                                                    '\\1'],
    ];

    
$tpl->enter($debug 'debug_' 'result_');
    
$tpl->text $text;
    
$tpl->enter('l_');

    foreach (
$patterns as $pattern)
    {
        
$text preg_replace('/' $pattern[0] . '/u'$pattern[1], $text);

        if (
$debug)
        {
            
$tpl->n            $i;
            
$tpl->pattern0    $pattern[0];
            
$tpl->pattern1    $pattern[1];
            
$tpl->text        $text;

            
$i++;
        }
    }

    
$tpl->leave();
    
$tpl->result $text;
    
$tpl->leave();
};

foreach (
$tags as $tag)
{
    
$this->sanitize_page_tag($tag);
    
$addspaces($tag$debug);
}

action/template/test_addspaces.tpl

[ === main === ]
	[ ' help ' ]
	<table class="hl-line">
		[= debug _ =
			<tr>
				<th>Pattern</th>
				<td></td>
				<td></td>
				<td><strong>[ ' text | e ' ]:</strong></td>
			</tr>
			[= l _ =
				<tr>
					<td>[ ' n ' ]. <code>[ ' pattern0 | e ' ]</code></td>
					<td>⇨ <code>[ ' pattern1 | e ' ]</code></td>
					<td>&nbsp;&nbsp;↳&nbsp;&nbsp;</td>
					<td>[ ' text | e ' ]</td>
				</tr>
			=]
			<tr>
				<td colspan=4><hr><br></td>
			</tr>
		=]
		[= result _ =
			<tr>
				<td>[ ' text | e ' ]</td>
				<td>&nbsp;&nbsp;→&nbsp;&nbsp;</td>
				<td>[ ' result | e ' ]</td>
			</tr>
		=]
	</table>	

2. 4.2


<?php

$text 
preg_replace("/(".$this->language["ALPHANUM"].")(".$this->language["UPPERNUM"].")/","\\1&nbsp;\\2",$text);
$text preg_replace("/(".$this->language["UPPERNUM"].")(".$this->language["UPPERNUM"].")/","\\1&nbsp;\\2",$text);
$text preg_replace("/(".$this->language["ALPHANUM"].")\//","\\1&nbsp;/",$text);
$text preg_replace("/(".$this->language["UPPER"].")&nbsp;(?=".$this->language["UPPER"]."&nbsp;".$this->language["UPPERNUM"].")/","\\1",$text);
$text preg_replace("/(".$this->language["UPPER"].")&nbsp;(?=".$this->language["UPPER"]."&nbsp;\/)/","\\1",$text);
$text preg_replace("/\/(".$this->language["ALPHANUM"].")/","/&nbsp;\\1",$text);
$text preg_replace("/(".$this->language["UPPERNUM"].")&nbsp;(".$this->language["UPPERNUM"].")($|\b)/","\\1\\2",$text);
$text preg_replace("/([0-9])(".$this->language["ALPHA"].")/","\\1&nbsp;\\2",$text);
$text preg_replace("/(".$this->language["ALPHA"].")([0-9])/","\\1&nbsp;\\2",$text);
$text preg_replace("/([0-9])&nbsp;(?=[0-9])/","\\1",$text);

3. 6.1


<?php

$patterns 
=[
    [
'(' self::PATTERN['ALPHANUM'] . ')(' self::PATTERN['UPPERNUM'] . ')',                                                 '\\1' NBSP '\\2'],
    [
'(' self::PATTERN['UPPERNUM'] . ')(' self::PATTERN['UPPERNUM'] . ')',                                                '\\1' NBSP '\\2'],
    [
'(' self::PATTERN['ALPHANUM'] . ')\/',                                                                                '\\1' NBSP '/'],
    [
'(' self::PATTERN['UPPER'] . ')' NBSP '(?=' self::PATTERN['UPPER'] . NBSP self::PATTERN['UPPERNUM'] . ')',    '\\1'],
    [
'(' self::PATTERN['UPPER'] . ')' NBSP '(?=' self::PATTERN['UPPER'] . NBSP '\/)',                                '\\1'],
    [
'\/(' self::PATTERN['ALPHANUM'] . ')',                                                                                '/' NBSP '\\1'],
    [
'(' self::PATTERN['UPPERNUM'] . ')' NBSP '(' self::PATTERN['UPPERNUM'] . ')($|\b)',                            '\\1\\2'],
    
# ['(\d)(' . self::PATTERN['ALPHA'] . ')',                                                                                '\\1' . NBSP . '\\2'],
    # ['(' . self::PATTERN['ALPHA'] . ')(\d)',                                                                                '\\1' . NBSP . '\\2'],
    # ['(\d)' . NBSP . '(?=\d)',                                                                                            '\\1'],
    
['(\d)' NBSP '(?!' self::PATTERN['ALPHA'] . ')',                                                                    '\\1'],
];