Source for file HtmlString.class.php

Documentation is available at HtmlString.class.php

  1. <?php
  2.  
  3. require_once(dirname(__FILE__)."/String.class.php");
  4.  
  5. /**
  6. * A HtmlString data type. This class allows for HTML-safe string shortening.
  7. *
  8. * @package harmoni.primitives.collections-text
  9. *
  10. * @copyright Copyright &copy; 2005, Middlebury College
  11. * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License (GPL)
  12. *
  13. * @version $Id: HtmlString.class.php,v 1.14 2007/09/04 20:25:26 adamfranco Exp $
  14. */
  15. class HtmlString
  16. extends String
  17. {
  18. var $_children;
  19. function HtmlString($string="") {
  20. $this->_string = (string) $string;
  21. }
  22. /**
  23. * Instantiates a new String object with the passed value.
  24. * @param string $value
  25. * @return ref object
  26. * @access public
  27. * @static
  28. */
  29. function withValue($value) {
  30. $string = new HtmlString($value);
  31. return $string;
  32. }
  33.  
  34. /**
  35. * Instantiates a new String object with the passed value.
  36. *
  37. * allowing 'fromString' for string values
  38. * @param string $aString
  39. * @return ref object
  40. * @access public
  41. * @static
  42. */
  43. function fromString($aString) {
  44. $string = new HtmlString($aString);
  45. return $string;
  46. }
  47.  
  48. /**
  49. * Shorten the string to a number of words, preserving HTML tags
  50. * while enforcing the closing of html tags.
  51. *
  52. * @param integer $numWords
  53. * @param boolean $addElipses
  54. * @return void
  55. * @access public
  56. * @since 12/12/05
  57. */
  58. function trim ( $numWords, $addElipses = true ) {
  59. $tags = array();
  60. $wordCount = 0;
  61. $output = '';
  62. $inWord = false;
  63. $length = strlen($this->_string);
  64. for ($i=0; $i < $length && $wordCount < $numWords; $i++) {
  65. $char = $this->_string[$i];
  66. switch($char) {
  67. case '>':
  68. $inWord = true;
  69. $output .= '&gt;';
  70. break;
  71. case '<':
  72. // Just skip past CDATA sections.
  73. if (preg_match('/^<!\[CDATA\[$/', substr($this->_string, $i, 9))) {
  74. while ($i < $length
  75. && !($this->_string[$i] == ']'
  76. && $this->_string[$i+1] == ']'
  77. && $this->_string[$i+2] == '>')
  78. && !($this->_string[$i] == '}'
  79. && $this->_string[$i+1] == '}'
  80. && $this->_string[$i+2] == '>'))
  81. {
  82. $output .= $this->_string[$i];
  83. $i++;
  84. }
  85. $output .= ']'.']'.'>';
  86. $i++;
  87. $i++;
  88. }
  89. // Check for invalid less-than characters
  90. else if ($this->isInvalidLessThan($this->_string, $i)) {
  91. $inWord = true;
  92. $output .= '&lt;';
  93. break;
  94. } else {
  95. // We are at a tag:
  96. // - if we are starting a tag, push that tag onto the tag
  97. // stack and print it out.
  98. // - If we are closing a tag, pop it off of the tag stack.
  99. // and print it out.
  100. $tag = $this->getTag($this->_string, $i);
  101. $tagHtml = '';
  102. $isCloseTag = ($this->_string[$i+1] == '/')?true:false;
  103. $isSingleTag = $this->isSingleTag($this->_string, $i);
  104. // print "<hr>Tag: $tag<br/>isCloseTag: ".(($isCloseTag)?'true':'false')."<br/>isSingleTag: ".(($isSingleTag)?'true':'false');
  105. // iterate over the tag
  106. while ($char != '>') {
  107. $char = $this->_string[$i];
  108. if ($char == '&') {
  109. $rest = substr($this->_string, $i, 25);
  110. if (preg_match('/^&((#[0-9]{2,3})|([a-zA-Z][a-zA-Z0-9]{1,20}));/', $rest, $matches))
  111. {
  112. $tagHtml .= $char;
  113. } else {
  114. $tagHtml .= '&amp;';
  115. }
  116. } else {
  117. $tagHtml .= $char;
  118. }
  119. $i++;
  120. }
  121. $i--; // we've overrun to print the end tag, so decrement $i
  122. // Enforce trailing slashes in single tags for more valid
  123. // HTML.
  124. if ($tag == 'comment') {
  125. $output .= $tagHtml;
  126. } else {
  127. if ($isSingleTag && $tagHtml[strlen($tagHtml) - 2] != '/') {
  128. $tagHtml[strlen($tagHtml) - 1] = '/';
  129. $tagHtml .= '>';
  130. }
  131. if ($isCloseTag) {
  132. if (count($tags)) {
  133. $topTag = array_pop($tags);
  134. $output .= '</'.$topTag.'>';
  135. }
  136. } else if ($isSingleTag) {
  137. $output .= $tagHtml;
  138. } else {
  139. $output .= $this->ensureNesting($tag, $tags);
  140. array_push($tags, $tag);
  141. $output .= $tagHtml;
  142. }
  143. }
  144. }
  145. break;
  146. case " ":
  147. case "\n":
  148. case "\r":
  149. case "\t":
  150. if ($inWord) {
  151. $wordCount++;
  152. $inWord = false;
  153. }
  154. $output .= $char;
  155. break;
  156. case "&":
  157. $rest = substr($this->_string, $i, 25);
  158. if (!preg_match('/^&((#[0-9]{2,3})|([a-zA-Z][a-zA-Z0-9]{1,20}));/', $rest)) {
  159. $inWord = true;
  160. $output .= '&amp;';
  161. break;
  162. }
  163. default:
  164. $inWord = true;
  165. $output .= $char;
  166. }
  167. }
  168. // trim off any trailing whitespace
  169. $output = trim($output);
  170. // If we have text that we aren't printing, print elipses
  171. // properly nested in HTML
  172. if ($i < strlen($this->_string) && $addElipses) {
  173. $addElipses = true;
  174. $tagsToSkip = 0;
  175. $nestingTags = array("table", "tr", "ul", "ol", "select", "![CDATA[");
  176. for ($i = count($tags); $i > 0; $i--) {
  177. if (in_array($tags[$i-1], $nestingTags))
  178. $tagsToSkip++;
  179. else
  180. break;
  181. }
  182. } else {
  183. $addElipses = false;
  184. $tagsToSkip = NULL;
  185. }
  186. // if we've hit our word limit and not closed all tags, close them now.
  187. if (count($tags)) {
  188. while ($tag = array_pop($tags)) {
  189. // Ensure that our elipses appear in the proper place in the HTML
  190. if ($addElipses && $tagsToSkip === 0)
  191. $output .= dgettext('harmoni', '...');
  192. $tagsToSkip--;
  193. $output .= '</'.$tag.'>';
  194. }
  195. if ($addElipses && $tagsToSkip === 0)
  196. $output .= dgettext('harmoni', '...');
  197. } else {
  198. if ($addElipses)
  199. $output .= dgettext('harmoni', '...');
  200. }
  201. // print "<pre>'".htmlspecialchars($output)."'</pre>";
  202. $this->_string = $output;
  203. }
  204. /**
  205. * Ensure that td tags are inside of tr's, etc.
  206. *
  207. * @param string $tag
  208. * @param ref array $tags
  209. * @return string
  210. * @access public
  211. * @since 1/27/06
  212. */
  213. function ensureNesting ($tag, $tags) {
  214. if (count($tags))
  215. $lastTag = $tags[count($tags) - 1];
  216. else
  217. $lastTag = null;
  218. // print "<pre>Tag: $tag\nLastTag: $lastTag\nTags => "; print_r($tags); print "</pre>";
  219. $preString = '';
  220. switch ($tag) {
  221. case 'th':
  222. case 'td':
  223. if ($lastTag != 'tr') {
  224. $preString = $this->ensureNesting('tr', $tags).'<tr>';
  225. array_push($tags, 'tr');
  226. }
  227. break;
  228. case 'tr':
  229. if (!in_array($lastTag, array('table', 'tbody', 'thead', 'tfoot'))) {
  230. $preString = '<table>';
  231. array_push($tags, 'table');
  232. }
  233. break;
  234. case 'thead':
  235. case 'tbody':
  236. case 'tfoot':
  237. if ($lastTag != 'table') {
  238. $preString = '<table>';
  239. array_push($tags, 'table');
  240. }
  241. break;
  242. case 'li':
  243. if ($lastTag != 'ul' && $lastTag != 'ol') {
  244. $preString = '<ul>';
  245. array_push($tags, 'ul');
  246. }
  247. break;
  248. case 'dt':
  249. case 'dd':
  250. if ($lastTag != 'dl') {
  251. $preString = '<dl>';
  252. array_push($tags, 'dl');
  253. }
  254. break;
  255. case 'option':
  256. if ($lastTag != 'select' && $lastTag != 'optgroup') {
  257. $preString = '<select>';
  258. array_push($tags, 'select');
  259. }
  260. break;
  261. case 'optgroup':
  262. if ($lastTag != 'select') {
  263. $preString = '<select>';
  264. array_push($tags, 'select');
  265. }
  266. break;
  267. }
  268. return $preString;
  269. }
  270. /**
  271. * Trim the passed text to a shorter length, stripping the HTML tags
  272. *
  273. * Originally posted to php.net forums
  274. * by webmaster at joshstmarie dot com (55-Sep-2005 05:58).
  275. * Modified by Adam Franco (afranco at middlebury dot edu).
  276. *
  277. * @param string $text
  278. * @param integer $maxLength
  279. * @return string
  280. * @access public
  281. * @since 11/21/05
  282. */
  283. function stripTagsAndTrim ($word_count) {
  284. $string = strip_tags($this->_string);
  285. $trimmed = "";
  286. $string = preg_replace("/\040+/"," ", trim($string));
  287. $stringc = explode(" ",$string);
  288.  
  289. if($word_count >= sizeof($stringc))
  290. {
  291. // nothing to do, our string is smaller than the limit.
  292. return $string;
  293. }
  294. elseif($word_count < sizeof($stringc))
  295. {
  296. // trim the string to the word count
  297. for($i=0;$i<$word_count;$i++)
  298. {
  299. $trimmed .= $stringc[$i]." ";
  300. }
  301. if(substr($trimmed, strlen(trim($trimmed))-1, 1) == '.')
  302. return trim($trimmed).'..';
  303. else
  304. return trim($trimmed).'...';
  305. }
  306. }
  307. /**
  308. * Clean up the html as much as possible
  309. *
  310. * @return void
  311. * @access public
  312. * @since 12/14/05
  313. */
  314. function clean () {
  315. $this->trim(strlen($this->_string));
  316. }
  317. /**
  318. * Answer the tag that starts at the given index.
  319. *
  320. * @param string $inputString
  321. * @param integer $tagStart // index of the opening '<'
  322. * @return string
  323. * @access private
  324. * @since 12/13/05
  325. */
  326. function getTag ( $inputString, $tagStart ) {
  327. if ($inputString[$tagStart + 1] == '/')
  328. $string = substr($inputString, $tagStart + 2);
  329. else
  330. $string = substr($inputString, $tagStart + 1);
  331. // Case for comments.
  332. if (preg_match('/^!--/', $string))
  333. return 'comment';
  334. $nextSpace = strpos($string, ' ');
  335. $nextClose = strpos($string, '>');
  336. if ($nextSpace && $nextSpace < $nextClose)
  337. $tagEnd = $nextSpace;
  338. else
  339. $tagEnd = $nextClose;
  340. $tag = substr($string, 0, $tagEnd);
  341. // print "<hr>NextSpace: $nextSpace<br/>NextClose: $nextClose<pre>".htmlspecialchars($string)."</pre>";
  342. // print "<pre>".htmlspecialchars($tag)."</pre>";
  343. return $tag;
  344. }
  345. /**
  346. * Answer true if the tag begining at $tagStart does not have a close-tag,
  347. * examples are <br/>, <hr/>, <img src=''/>
  348. *
  349. * @param string $inputString
  350. * @param integer $tagStart // index of the opening '<'
  351. * @return string
  352. * @access private
  353. * @since 12/13/05
  354. */
  355. function isSingleTag ( $inputString, $tagStart ) {
  356. // if this is a close tag itself, return false
  357. if ($inputString[$tagStart + 1] == '/')
  358. return false;
  359. if ($inputString[$tagStart + 1] == '!'
  360. && $inputString[$tagStart + 2] == '--'
  361. && $inputString[$tagStart + 3] == '--')
  362. return true;
  363. // if this is a tag that ends in '/>', return true
  364. $string = substr($inputString, $tagStart + 1);
  365. $nextClose = strpos($string, '>');
  366. if ($string[$nextClose - 1] == '/')
  367. return true;
  368. // check the tag to allow exceptions for commonly invalid tags such as
  369. // <br>, <hr>, <img src=''>
  370. $tag = $this->getTag($inputString, $tagStart);
  371. $singleTags = array ('br', 'hr', 'img');
  372. if (in_array($tag, $singleTags))
  373. return true;
  374. // Otherwise
  375. return false;
  376. }
  377. /**
  378. * Answer true if the '<' doesn't seem to be the start of a tag and is
  379. * instead an invalid 'less-than' character.
  380. *
  381. * This will be the case if:
  382. * - There is a space, line-return, new-line, or '=' following the '<'
  383. * - Another '<' is found in the string before a '>'
  384. *
  385. * @param string $inputString
  386. * @param integer $tagStart // index of the opening '<'
  387. * @return string
  388. * @access private
  389. * @since 12/14/05
  390. */
  391. function isInvalidLessThan ( $inputString, $tagStart ) {
  392. // if this '<' is followed by one of our invalid following chars
  393. $invalidFollowingChars = array("\s", "\t", "\n", "\r", "=");
  394. if (in_array($inputString[$tagStart + 1], $invalidFollowingChars))
  395. return true;
  396. // grap the substring starting at our tag.
  397. for ($i = $tagStart + 1; $i < strlen($inputString); $i++) {
  398. if ($inputString[$i] == '<')
  399. return true;
  400. if ($inputString[$i] == '>')
  401. return false;
  402. }
  403. // If we have gotten to the end of the string and not found a
  404. // closing '>', then the tag must be invalid.
  405. return true;
  406. }
  407. }

Documentation generated on Wed, 19 Sep 2007 10:24:34 -0400 by phpDocumentor 1.3.0RC3