File indexing completed on 2024-06-16 05:30:14

0001 <?php
0002 /**
0003  * Zend Framework
0004  *
0005  * LICENSE
0006  *
0007  * This source file is subject to the new BSD license that is bundled
0008  * with this package in the file LICENSE.txt.
0009  * It is also available through the world-wide-web at this URL:
0010  * http://framework.zend.com/license/new-bsd
0011  * If you did not receive a copy of the license and are unable to
0012  * obtain it through the world-wide-web, please send an email
0013  * to license@zend.com so we can send you a copy immediately.
0014  *
0015  * @category   Zend
0016  * @package    Zend_Markup
0017  * @subpackage Parser
0018  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0019  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0020  * @version    $Id$
0021  */
0022 
0023 /**
0024  * @see Zend_Markup_TokenList
0025  */
0026 // require_once 'Zend/Markup/TokenList.php';
0027 
0028 /**
0029  * @see Zend_Markup_Parser_ParserInterface
0030  */
0031 // require_once 'Zend/Markup/Parser/ParserInterface.php';
0032 
0033 /**
0034  * @category   Zend
0035  * @package    Zend_Markup
0036  * @subpackage Parser
0037  * @copyright  Copyright (c) 2005-2015 Zend Technologies USA Inc. (http://www.zend.com)
0038  * @license    http://framework.zend.com/license/new-bsd     New BSD License
0039  */
0040 class Zend_Markup_Parser_Bbcode implements Zend_Markup_Parser_ParserInterface
0041 {
0042     const NEWLINE   = "[newline\0]";
0043 
0044     // there is a parsing difference between the default tags and single tags
0045     const TYPE_DEFAULT = 'default';
0046     const TYPE_SINGLE  = 'single';
0047 
0048     const NAME_CHARSET = '^\[\]=\s';
0049 
0050     const STATE_SCAN       = 0;
0051     const STATE_SCANATTRS  = 1;
0052     const STATE_PARSEVALUE = 2;
0053 
0054     /**
0055      * Token tree
0056      *
0057      * @var Zend_Markup_TokenList
0058      */
0059     protected $_tree;
0060 
0061     /**
0062      * Current token
0063      *
0064      * @var Zend_Markup_Token
0065      */
0066     protected $_current;
0067 
0068     /**
0069      * Source to tokenize
0070      *
0071      * @var string
0072      */
0073     protected $_value = '';
0074 
0075     /**
0076      * Length of the value
0077      *
0078      * @var int
0079      */
0080     protected $_valueLen = 0;
0081 
0082     /**
0083      * Current pointer
0084      *
0085      * @var int
0086      */
0087     protected $_pointer = 0;
0088 
0089     /**
0090      * The buffer
0091      *
0092      * @var string
0093      */
0094     protected $_buffer = '';
0095 
0096     /**
0097      * Temporary tag storage
0098      *
0099      * @var array
0100      */
0101     protected $_temp;
0102 
0103     /**
0104      * Stoppers that we are searching for
0105      *
0106      * @var array
0107      */
0108     protected $_searchedStoppers = array();
0109 
0110     /**
0111      * Tag information
0112      *
0113      * @var array
0114      */
0115     protected $_tags = array(
0116         'Zend_Markup_Root' => array(
0117             'type'     => self::TYPE_DEFAULT,
0118             'stoppers' => array(),
0119         ),
0120         '*' => array(
0121             'type'     => self::TYPE_DEFAULT,
0122             'stoppers' => array(self::NEWLINE, '[/*]', '[/]'),
0123         ),
0124         'hr' => array(
0125             'type'     => self::TYPE_SINGLE,
0126             'stoppers' => array(),
0127         ),
0128         'code' => array(
0129             'type'         => self::TYPE_DEFAULT,
0130             'stoppers'     => array('[/code]', '[/]'),
0131             'parse_inside' => false
0132         )
0133     );
0134 
0135     /**
0136      * Token array
0137      *
0138      * @var array
0139      */
0140     protected $_tokens = array();
0141 
0142     /**
0143      * State
0144      *
0145      * @var int
0146      */
0147     protected $_state = self::STATE_SCAN;
0148 
0149 
0150     /**
0151      * Prepare the parsing of a bbcode string, the real parsing is done in {@link _parse()}
0152      *
0153      * @param  string $value
0154      * @return Zend_Markup_TokenList
0155      */
0156     public function parse($value)
0157     {
0158         if (!is_string($value)) {
0159             /**
0160              * @see Zend_Markup_Parser_Exception
0161              */
0162             // require_once 'Zend/Markup/Parser/Exception.php';
0163             throw new Zend_Markup_Parser_Exception('Value to parse should be a string.');
0164         }
0165 
0166         if (empty($value)) {
0167             /**
0168              * @see Zend_Markup_Parser_Exception
0169              */
0170             // require_once 'Zend/Markup/Parser/Exception.php';
0171             throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.');
0172         }
0173 
0174         $this->_value = str_replace(array("\r\n", "\r", "\n"), self::NEWLINE, $value);
0175 
0176         // variable initialization for tokenizer
0177         $this->_valueLen         = strlen($this->_value);
0178         $this->_pointer          = 0;
0179         $this->_buffer           = '';
0180         $this->_temp             = array();
0181         $this->_state            = self::STATE_SCAN;
0182         $this->_tokens           = array();
0183 
0184         $this->_tokenize();
0185 
0186         // variable initialization for treebuilder
0187         $this->_searchedStoppers = array();
0188         $this->_tree             = new Zend_Markup_TokenList();
0189         $this->_current          = new Zend_Markup_Token(
0190             '',
0191             Zend_Markup_Token::TYPE_NONE,
0192             'Zend_Markup_Root'
0193         );
0194 
0195         $this->_tree->addChild($this->_current);
0196 
0197         $this->_createTree();
0198 
0199         return $this->_tree;
0200     }
0201 
0202     /**
0203      * Tokenize
0204      *
0205      * @param string $input
0206      *
0207      * @return void
0208      */
0209     protected function _tokenize()
0210     {
0211         $attribute = '';
0212 
0213         while ($this->_pointer < $this->_valueLen) {
0214             switch ($this->_state) {
0215                 case self::STATE_SCAN:
0216                     $matches = array();
0217                     $regex   = '#\G(?<text>[^\[]*)(?<open>\[(?<name>[' . self::NAME_CHARSET . ']+)?)?#';
0218                     preg_match($regex, $this->_value, $matches, null, $this->_pointer);
0219 
0220                     $this->_pointer += strlen($matches[0]);
0221 
0222                     if (!empty($matches['text'])) {
0223                         $this->_buffer .= $matches['text'];
0224                     }
0225 
0226                     if (!isset($matches['open'])) {
0227                         // great, no tag, we are ending the string
0228                         break;
0229                     }
0230                     if (!isset($matches['name'])) {
0231                         $this->_buffer .= $matches['open'];
0232                         break;
0233                     }
0234 
0235                     $this->_temp = array(
0236                         'tag'        => '[' . $matches['name'],
0237                         'name'       => $matches['name'],
0238                         'attributes' => array()
0239                     );
0240 
0241                     if ($this->_pointer >= $this->_valueLen) {
0242                         // damn, no tag
0243                         $this->_buffer .= $this->_temp['tag'];
0244                         break 2;
0245                     }
0246 
0247                     if ($this->_value[$this->_pointer] == '=') {
0248                         $this->_pointer++;
0249 
0250                         $this->_temp['tag'] .= '=';
0251                         $this->_state        = self::STATE_PARSEVALUE;
0252                         $attribute           = $this->_temp['name'];
0253                     } else {
0254                         $this->_state = self::STATE_SCANATTRS;
0255                     }
0256                     break;
0257                 case self::STATE_SCANATTRS:
0258                     $matches = array();
0259                     $regex   = '#\G((?<end>\s*\])|\s+(?<attribute>[' . self::NAME_CHARSET . ']+)(?<eq>=?))#';
0260                     if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
0261                         break 2;
0262                     }
0263 
0264                     $this->_pointer += strlen($matches[0]);
0265 
0266                     if (!empty($matches['end'])) {
0267                         if (!empty($this->_buffer)) {
0268                             $this->_tokens[] = array(
0269                                 'tag' => $this->_buffer,
0270                                 'type' => Zend_Markup_Token::TYPE_NONE
0271                             );
0272                             $this->_buffer = '';
0273                         }
0274                         $this->_temp['tag'] .= $matches['end'];
0275                         $this->_temp['type'] = Zend_Markup_Token::TYPE_TAG;
0276 
0277                         $this->_tokens[] = $this->_temp;
0278                         $this->_temp     = array();
0279 
0280                         $this->_state = self::STATE_SCAN;
0281                     } else {
0282                         // attribute name
0283                         $attribute = $matches['attribute'];
0284 
0285                         $this->_temp['tag'] .= $matches[0];
0286 
0287                         $this->_temp['attributes'][$attribute] = '';
0288 
0289                         if (empty($matches['eq'])) {
0290                             $this->_state = self::STATE_SCANATTRS;
0291                         } else {
0292                             $this->_state = self::STATE_PARSEVALUE;
0293                         }
0294                     }
0295                     break;
0296                 case self::STATE_PARSEVALUE:
0297                     $matches = array();
0298                     $regex   = '#\G((?<quote>"|\')(?<valuequote>.*?)\\2|(?<value>[^\]\s]+))#';
0299                     if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
0300                         $this->_state = self::STATE_SCANATTRS;
0301                         break;
0302                     }
0303 
0304                     $this->_pointer += strlen($matches[0]);
0305 
0306                     if (!empty($matches['quote'])) {
0307                         $this->_temp['attributes'][$attribute] = $matches['valuequote'];
0308                     } else {
0309                         $this->_temp['attributes'][$attribute] = $matches['value'];
0310                     }
0311                     $this->_temp['tag'] .= $matches[0];
0312 
0313                     $this->_state = self::STATE_SCANATTRS;
0314                     break;
0315             }
0316         }
0317 
0318         if (!empty($this->_buffer)) {
0319             $this->_tokens[] = array(
0320                 'tag'  => $this->_buffer,
0321                 'type' => Zend_Markup_Token::TYPE_NONE
0322             );
0323         }
0324     }
0325 
0326     /**
0327      * Parse the token array into a tree
0328      *
0329      * @param array $tokens
0330      *
0331      * @return void
0332      */
0333     public function _createTree()
0334     {
0335         foreach ($this->_tokens as $token) {
0336             // first we want to know if this tag is a stopper, or at least a searched one
0337             if ($this->_isStopper($token['tag'])) {
0338                 // find the stopper
0339                 $oldItems = array();
0340 
0341                 while (!in_array($token['tag'], $this->_tags[$this->_current->getName()]['stoppers'])) {
0342                     $oldItems[]     = clone $this->_current;
0343                     $this->_current = $this->_current->getParent();
0344                 }
0345 
0346                 // we found the stopper, so stop the tag
0347                 $this->_current->setStopper($token['tag']);
0348                 $this->_removeFromSearchedStoppers($this->_current);
0349                 $this->_current = $this->_current->getParent();
0350 
0351                 // add the old items again if there are any
0352                 if (!empty($oldItems)) {
0353                     foreach (array_reverse($oldItems) as $item) {
0354                         /* @var $token Zend_Markup_Token */
0355                         $this->_current->addChild($item);
0356                         $item->setParent($this->_current);
0357                         $this->_current = $item;
0358                     }
0359                 }
0360             } else {
0361                 if ($token['type'] == Zend_Markup_Token::TYPE_TAG) {
0362                     if ($token['tag'] == self::NEWLINE) {
0363                         // this is a newline tag, add it as a token
0364                         $this->_current->addChild(new Zend_Markup_Token(
0365                             "\n",
0366                             Zend_Markup_Token::TYPE_NONE,
0367                             '',
0368                             array(),
0369                             $this->_current
0370                         ));
0371                     } elseif (isset($token['name']) && ($token['name'][0] == '/')) {
0372                         // this is a stopper, add it as a empty token
0373                         $this->_current->addChild(new Zend_Markup_Token(
0374                             $token['tag'],
0375                             Zend_Markup_Token::TYPE_NONE,
0376                             '',
0377                             array(),
0378                             $this->_current
0379                         ));
0380                     } elseif (isset($this->_tags[$this->_current->getName()]['parse_inside'])
0381                         && !$this->_tags[$this->_current->getName()]['parse_inside']
0382                     ) {
0383                         $this->_current->addChild(new Zend_Markup_Token(
0384                             $token['tag'],
0385                             Zend_Markup_Token::TYPE_NONE,
0386                             '',
0387                             array(),
0388                             $this->_current
0389                         ));
0390                     } else {
0391                         // add the tag
0392                         $child = new Zend_Markup_Token(
0393                             $token['tag'],
0394                             $token['type'],
0395                             $token['name'],
0396                             $token['attributes'],
0397                             $this->_current
0398                         );
0399                         $this->_current->addChild($child);
0400 
0401                         // add stoppers for this tag, if its has stoppers
0402                         if ($this->_getType($token['name']) == self::TYPE_DEFAULT) {
0403                             $this->_current = $child;
0404 
0405                             $this->_addToSearchedStoppers($this->_current);
0406                         }
0407                     }
0408                 } else {
0409                     // no tag, just add it as a simple token
0410                     $this->_current->addChild(new Zend_Markup_Token(
0411                         $token['tag'],
0412                         Zend_Markup_Token::TYPE_NONE,
0413                         '',
0414                         array(),
0415                         $this->_current
0416                     ));
0417                 }
0418             }
0419         }
0420     }
0421 
0422     /**
0423      * Check if there is a tag declaration, and if it isnt there, add it
0424      *
0425      * @param string $name
0426      *
0427      * @return void
0428      */
0429     protected function _checkTagDeclaration($name)
0430     {
0431         if (!isset($this->_tags[$name])) {
0432             $this->_tags[$name] = array(
0433                 'type'     => self::TYPE_DEFAULT,
0434                 'stoppers' => array(
0435                     '[/' . $name . ']',
0436                     '[/]'
0437                 )
0438             );
0439         }
0440     }
0441     /**
0442      * Check the tag's type
0443      *
0444      * @param  string $name
0445      * @return string
0446      */
0447     protected function _getType($name)
0448     {
0449         $this->_checkTagDeclaration($name);
0450 
0451         return $this->_tags[$name]['type'];
0452     }
0453 
0454     /**
0455      * Check if the tag is a stopper
0456      *
0457      * @param  string $tag
0458      * @return bool
0459      */
0460     protected function _isStopper($tag)
0461     {
0462         $this->_checkTagDeclaration($this->_current->getName());
0463 
0464         if (!empty($this->_searchedStoppers[$tag])) {
0465             return true;
0466         }
0467 
0468         return false;
0469     }
0470 
0471     /**
0472      * Add to searched stoppers
0473      *
0474      * @param  Zend_Markup_Token $token
0475      * @return void
0476      */
0477     protected function _addToSearchedStoppers(Zend_Markup_Token $token)
0478     {
0479         $this->_checkTagDeclaration($token->getName());
0480 
0481         foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) {
0482             if (!isset($this->_searchedStoppers[$stopper])) {
0483                 $this->_searchedStoppers[$stopper] = 0;
0484             }
0485             ++$this->_searchedStoppers[$stopper];
0486         }
0487     }
0488 
0489     /**
0490      * Remove from searched stoppers
0491      *
0492      * @param  Zend_Markup_Token $token
0493      * @return void
0494      */
0495     protected function _removeFromSearchedStoppers(Zend_Markup_Token $token)
0496     {
0497         $this->_checkTagDeclaration($token->getName());
0498 
0499         foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) {
0500             --$this->_searchedStoppers[$stopper];
0501         }
0502     }
0503 
0504 }