View source for Chat highlighter

{{toc numerate=1}}
Supports various date-username-message chat log formats.

##""%%(chat type="[div|table]" user=[0|1]) 
chat log
%%""##

%%(chat type="table")
[12:04] User One: Message text by User.
[12:05] User Two: Message text by Another User. Link in text https://example.com/
[12:10] User Three: Message text by Yet Another User.
With
several lines
ot text!
%%

source:master/src/formatter/highlight/chat.php

===CSS===
wacko.css
%%(css)
/** Chat-related *************************************************************/

/* SYNTAX % %(chat)% % */

.chat-user { padding: 2px; vertical-align: top; white-space: nowrap; font-weight: bold  }
.chat-time { padding: 2px; vertical-align: top; white-space: nowrap; font: 12px tahoma, arial }
.chat-text { padding: 2px; vertical-align: top; width: 100% }

.chat-u1 { background: #EEFFEE; }	/* green */
.chat-u2 { background: #EEEEFF; }	/* lilac */
.chat-u3 { background: #F8F6C9; }	/* yellow */
.chat-u4 { background: #D6EEFA; }	/* light blue */
.chat-u5 { background: #FFEEEE; }	/* pink */
.chat-u6 { background: #B3B8F4; }	/* blue */
.chat-u7 { background: #F9E8CD; }	/* orange */
.chat-u8 { background: #ECB3D2; }	/* violet */
%%

formatter/highlight/chat.php
%%(php)
<?php
/*
	Chat highlighter
	Supports various date-username-message chat log formats (see regexp description below for details)
	Installation: copy to /src/formatter/highlight/chat.php

	Usage:

	% %(chat)
	[12:04:34 01.05.2010] User: Message text by User.
	[13.01.2001 00:00:00] Another User: Message text by Another User. Link in text https://example.com/
	(03:01 AM 10 Jan 1980) Yet Another User: Message text by Yet Another User.
	With
	several lines
	of text!
	(00.00.0000 00:00) <Fourth User> : Message text by Fourth User.
	[00.00.0000 00:00] <Fifth User>: Message text by Fifth User.
	[00:00 00.00.0000] <Sixth User> Message text by Sixth User.
	(00:00 00.00.0000) <Seventh User> Message text by Seventh User.
	and many others including multiline ones
	(00:00:00 13.01.2001) EighthUser: Message text by EighthUser.
	% %
 */

if (!isset($options['type']))		$options['type']	= 'default';
if (!isset($options['user']))		$options['user']	= 0;

// sanitize $text
$text		= htmlspecialchars($text);
$output		= '';

// replace text links to HMTL
$text		= preg_replace('/\b(https?|ftp|file|nntp|telnet):\/\/\S+/u', '<a href="\\0" target="_blank">\\0</a>', $text);
$pattern	= '/^[\[\(]([^\r\n\]\)]*)[\]\)]\s*(<)?([^:\&\r\n]*)(>)?\s*:?((?:(?!^[\[\(]).*(?:\r?\n)*)*)/um';
/*
	Explanation by https://regex101.com/

	Main string parsing rgexp is:
	/^[\[\(]([^\r\n\]\)]*)[\]\)]\s*(<)?([^:\&\r\n]*)(>)?\s*:?((?:(?!^[\[\(]).*(?:\r?\n)*)*)/m
		^ asserts position at start of a line
		Match a single character present in the list below [\[\(]
		\[ matches the character [ literally (case sensitive)
		\( matches the character ( literally (case sensitive)
	1st Capturing Group ([^\r\n\]\)]*) -- DATE
		Match a single character not present in the list below [^\r\n\]\)]*
		* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
		\r matches a carriage return (ASCII 13)
		\n matches a line-feed (newline) character (ASCII 10)
		\] matches the character ] literally (case sensitive)
		\) matches the character ) literally (case sensitive)
		Match a single character present in the list below [\]\)]
		\] matches the character ] literally (case sensitive)
		\) matches the character ) literally (case sensitive)
		\s* matches any whitespace character (equal to [\r\n\t\f\v ])
		* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
	2nd Capturing Group (<)?
		? Quantifier — Matches between zero and one times, as many times as possible, giving back as needed (greedy)
		< matches the characters < literally (case sensitive)
	3rd Capturing Group ([^:\&\r\n]*) -- USERNAME
		Match a single character not present in the list below [^:\&\r\n]*
		* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
		: matches the character : literally (case sensitive)
		\& matches the character & literally (case sensitive)
		\r matches a carriage return (ASCII 13)
		\n matches a line-feed (newline) character (ASCII 10)
	4th Capturing Group (>)?
		? Quantifier — Matches between zero and one times, as many times as possible, giving back as needed (greedy)
		> matches the characters > literally (case sensitive)
		\s* matches any whitespace character (equal to [\r\n\t\f\v ])
		* Quantifier — Matches between zero and unlimited times, as many times as possible, giving back as needed (greedy)
		:? matches the character : literally (case sensitive)
	5th Capturing Group ((?:(?!^[\[\(]).*(?:\r?\n)*)*) -- MESSAGE
	Global pattern flags
		m modifier: multi line. Causes ^ and $ to match the begin/end of each line (not only begin/end of string)

	If you decide to test this $pattern at https://regex101.com/ please add g modifier (it becomes /mg) to make the regexp applied to the whole text.

	This $pattern matches to:
		[12:04:34 01.05.2010] User: Message text by User.
		[13.01.2001 00:00:00] Another User: Message text by Another User. Link in text https://example.com/
		(03:01 AM 10 Jan 1980) Yet Another User: Message text by Yet Another User.
		With
		several lines
		of text!
		(00.00.0000 00:00) <Fourth User> : Message text by Fourth User.
		[00.00.0000 00:00] <Fifth User>: Message text by Fifth User.
		[00:00 00.00.0000] <Sixth User> Message text by Sixth User.
		(00:00 00.00.0000) <Seventh User> Message text by Seventh User.
		and many others including multiline ones
		(00:00:00 13.01.2001) EighthUser: Message text by EighthUser.
		and many others including multiline ones

	Tested with Whatsapp logs.
 */

// split the $text into $matches: $1 - date, $3 - username, $5 - message
preg_match_all($pattern, $text, $matches, PREG_SET_ORDER);

// build the $names array of usernames in chat
$names	= [];
$c		= 1;

foreach($matches as $match)
{
	if (!array_key_exists($match[3], $names))
	{
		$names[$match[3]] = $c++;
	}
}

// HTML output:
//		$1 - date,
//		$3 - username,
//		$5 - message
foreach($matches as $log)
{
	if ($options['type'] == 'div')
	{
		$output .=
			'<div class="chat-u'. $names[$log[3]] .'">' .
				'[' . $log[1] . '] <b>' . $log[3] . '</b>: ' . $log[5] .
			'</div>';
	}
	else
	{
		$output .=
			'<tr class="chat-u'. $names[$log[3]] .'">' .
				'<td class="chat-user">' . $log[3] . '</td>' .
				'<td class="chat-text">' . $log[5] . '</td>' .
				'<td class="chat-time">' . $log[1] . '</td>' .
			'</tr>';
	}
}

// replace \n to <br> to keep multiline messages
$output = str_replace("\n", '<br>', $output);

if ($options['user'])
{
	// prepare alphabetical list of chat participants
	ksort($names);
	$people = '';

	foreach($names as $name => $v)
	{
		$people .= $name . ', ';
	}

	echo '<p><b>' . trim($people, ', ') . ':</b></p>';
}

if ($options['type'] == 'div')
{
	echo '<div>' . $output . '</div>';
}
else
{
	echo '<div><table>' . $output . '</table></div>';
}


%%

===beauty test===
%%(chat type="table")
[12:04:34 01.05.2010] User: Message text by User.
[13.01.2001 00:00:00] Another User: Message text by Another User. Link in text https://example.com/
(03:01 AM 10 Jan 1980) Yet Another User: Message text by Yet Another User.
With
several lines
ot text!
(00.00.0000 00:00) <Fourth User> : Message text by Fourth User.
[00.00.0000 00:00] <Fifth User>: Message text by Fifth User.
[00:00 00.00.0000] <Sixth User> Message text by Sixth User.
(00:00 00.00.0000) <Seventh User> Message text by Seventh User.
and many others including multiline ones
(00:00:00 13.01.2001) EighthUser: Message text by EighthUser.
%%
----

%%(chat type="div" user=1)
[12:04:34 01.05.2010] User: Message text by User.
[13.01.2001 00:00:00] Another User: Message text by Another User. Link in text https://example.com/
(03:01 AM 10 Jan 1980) Yet Another User: Message text by Yet Another User.
With
several lines
ot text!
[12:04:34 01.05.2010] User: Message text by User.
(00.00.0000 00:00) <Fourth User> : Message text by Fourth User.
[00.00.0000 00:00] <Fifth User>: Message text by Fifth User.
[00:00 00.00.0000] <Sixth User> Message text by Sixth User.
(00:00 00.00.0000) <Seventh User> Message text by Seventh User.
and many others including multiline ones
(00:00:00 13.01.2001) EighthUser: Message text by EighthUser.
%%
Add more lipstick! 
  1. --highlight row color tr::hover: ...--
  1. --lighter colors, see styles for info formatter--

===Suggestions===
  * add more styles
  * set output time format as argument
  * add other output layouts, e.g. user time \n text