Add Spaces Revisited
Revisiting the add_nbsps()
and add_spaces()
function patterns. It gets activated by the show_spaces
option in the config as well as in the usersettings.
What it should do and what not. Now we have Unicode and some patterns may be useful only in a certain context, here we can set different patterns for Chinese if we have to.
I build a test action and feed this action with different test data to show what it does on each pattern iteration.
Additional we should define a set of tags to determine in which context the result should differ.
- Latin
- Cyrillic
- Chinese
- Farsi
- Hindi
Example tags:
日本語123
ABCD123
Abcd123
AbCd123
abcd123
123日本語
123ABCD
123Abcd
123AbCd
123abcd
ABCD123
Abcd123
AbCd123
abcd123
123日本語
123ABCD
123Abcd
123AbCd
123abcd
Test Action
action/test_addspaces.php
<?php if (!defined('IN_WACKO')) { exit; } $info = <<<EOD Description: Shows tags with applied show_spaces setting. Usage: {{test_addspaces}} Options: [set="tag1, tag2, ..."] a comma-delimited list of tag names [debug=0|1] shows result for each processed pattern EOD; // set defaults $help ??= 0; $set ??= ''; $debug ??= 0; if ($help) { $tpl->help = $this->help($info, 'blog'); return; } if ($set) { $_tags = explode(',', $set); $tags = array_map('trim', $_tags); $tags = array_map($this->sanitize_page_tag, $tags); } if (!$tags) { $tags = [ '日本語123', 'ABCD123', 'Abcd123', 'AbCd123', 'abcd123', '123日本語', '123ABCD', '123Abcd', '123AbCd', '123abcd', ]; } # Ut::debug_print_r($tags); $addspaces = function ($text, $debug) use ($tpl) { $i = 1; $patterns =[ ['(' . self::PATTERN['ALPHANUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')', '\\1' . NBSP . '\\2'], ['(' . self::PATTERN['UPPERNUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')', '\\1' . NBSP . '\\2'], ['(' . self::PATTERN['ALPHANUM'] . ')\/', '\\1' . NBSP . '/'], ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . self::PATTERN['UPPERNUM'] . ')', '\\1'], ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . '\/)', '\\1'], ['\/(' . self::PATTERN['ALPHANUM'] . ')', '/' . NBSP . '\\1'], ['(' . self::PATTERN['UPPERNUM'] . ')' . NBSP . '(' . self::PATTERN['UPPERNUM'] . ')($|\b)', '\\1\\2'], # ['(\d)(' . self::PATTERN['ALPHA'] . ')', '\\1' . NBSP . '\\2'], # ['(' . self::PATTERN['ALPHA'] . ')(\d)', '\\1' . NBSP . '\\2'], # ['(\d)' . NBSP . '(?=\d)', '\\1'], ['(\d)' . NBSP . '(?!' . self::PATTERN['ALPHA'] . ')', '\\1'], ]; $tpl->enter($debug ? 'debug_' : 'result_'); $tpl->text = $text; $tpl->enter('l_'); foreach ($patterns as $pattern) { $text = preg_replace('/' . $pattern[0] . '/u', $pattern[1], $text); if ($debug) { $tpl->n = $i; $tpl->pattern0 = $pattern[0]; $tpl->pattern1 = $pattern[1]; $tpl->text = $text; $i++; } } $tpl->leave(); $tpl->result = $text; $tpl->leave(); }; foreach ($tags as $tag) { $this->sanitize_page_tag($tag); $addspaces($tag, $debug); }
action/template/test_addspaces.tpl
[ === main === ] [ ' help ' ] <table class="hl-line"> [= debug _ = <tr> <th>Pattern</th> <td></td> <td></td> <td><strong>[ ' text | e ' ]:</strong></td> </tr> [= l _ = <tr> <td>[ ' n ' ]. <code>[ ' pattern0 | e ' ]</code></td> <td>⇨ <code>[ ' pattern1 | e ' ]</code></td> <td> ↳ </td> <td>[ ' text | e ' ]</td> </tr> =] <tr> <td colspan=4><hr><br></td> </tr> =] [= result _ = <tr> <td>[ ' text | e ' ]</td> <td> → </td> <td>[ ' result | e ' ]</td> </tr> =] </table>
4.2
<?php $text = preg_replace("/(".$this->language["ALPHANUM"].")(".$this->language["UPPERNUM"].")/","\\1 \\2",$text); $text = preg_replace("/(".$this->language["UPPERNUM"].")(".$this->language["UPPERNUM"].")/","\\1 \\2",$text); $text = preg_replace("/(".$this->language["ALPHANUM"].")\//","\\1 /",$text); $text = preg_replace("/(".$this->language["UPPER"].") (?=".$this->language["UPPER"]." ".$this->language["UPPERNUM"].")/","\\1",$text); $text = preg_replace("/(".$this->language["UPPER"].") (?=".$this->language["UPPER"]." \/)/","\\1",$text); $text = preg_replace("/\/(".$this->language["ALPHANUM"].")/","/ \\1",$text); $text = preg_replace("/(".$this->language["UPPERNUM"].") (".$this->language["UPPERNUM"].")($|\b)/","\\1\\2",$text); $text = preg_replace("/([0-9])(".$this->language["ALPHA"].")/","\\1 \\2",$text); $text = preg_replace("/(".$this->language["ALPHA"].")([0-9])/","\\1 \\2",$text); $text = preg_replace("/([0-9]) (?=[0-9])/","\\1",$text);
6.1
<?php $patterns =[ ['(' . self::PATTERN['ALPHANUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')', '\\1' . NBSP . '\\2'], ['(' . self::PATTERN['UPPERNUM'] . ')(' . self::PATTERN['UPPERNUM'] . ')', '\\1' . NBSP . '\\2'], ['(' . self::PATTERN['ALPHANUM'] . ')\/', '\\1' . NBSP . '/'], ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . self::PATTERN['UPPERNUM'] . ')', '\\1'], ['(' . self::PATTERN['UPPER'] . ')' . NBSP . '(?=' . self::PATTERN['UPPER'] . NBSP . '\/)', '\\1'], ['\/(' . self::PATTERN['ALPHANUM'] . ')', '/' . NBSP . '\\1'], ['(' . self::PATTERN['UPPERNUM'] . ')' . NBSP . '(' . self::PATTERN['UPPERNUM'] . ')($|\b)', '\\1\\2'], ['(\d)' . NBSP . '(?!' . self::PATTERN['ALPHA'] . ')', '\\1'], ];