Viewing file: parser.php (24.84 KB) -rw-rw-r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php
if(!defined('DOKU_INC')) define('DOKU_INC',fullpath(dirname(__FILE__).'/../../').'/');
require_once DOKU_INC . 'inc/parser/lexer.php'; require_once DOKU_INC . 'inc/parser/handler.php';
/** * Define various types of modes used by the parser - they are used to * populate the list of modes another mode accepts */ global $PARSER_MODES; $PARSER_MODES = array( // containers are complex modes that can contain many other modes // hr breaks the principle but they shouldn't be used in tables / lists // so they are put here 'container' => array('listblock','table','quote','hr'),
// some mode are allowed inside the base mode only 'baseonly' => array('header'),
// modes for styling text -- footnote behaves similar to styling 'formatting' => array('strong', 'emphasis', 'underline', 'monospace', 'subscript', 'superscript', 'deleted', 'footnote'),
// modes where the token is simply replaced - they can not contain any // other modes 'substition' => array('acronym','smiley','wordblock','entity', 'camelcaselink', 'internallink','media', 'externallink','linebreak','emaillink', 'windowssharelink','filelink','notoc', 'nocache','multiplyentity','quotes','rss'),
// modes which have a start and end token but inside which // no other modes should be applied 'protected' => array('preformatted','code','file','php','html','htmlblock','phpblock'),
// inside this mode no wiki markup should be applied but lineendings // and whitespace isn't preserved 'disabled' => array('unformatted'),
// used to mark paragraph boundaries 'paragraphs' => array('eol') );
//-------------------------------------------------------------------
/** * Sets up the Lexer with modes and points it to the Handler * For an intro to the Lexer see: wiki:parser */ class Doku_Parser {
var $Handler;
var $Lexer;
var $modes = array();
var $connected = false;
function addBaseMode(& $BaseMode) { $this->modes['base'] = & $BaseMode; if ( !$this->Lexer ) { $this->Lexer = & new Doku_Lexer($this->Handler,'base', true); } $this->modes['base']->Lexer = & $this->Lexer; }
/** * PHP preserves order of associative elements * Mode sequence is important */ function addMode($name, & $Mode) { if ( !isset($this->modes['base']) ) { $this->addBaseMode(new Doku_Parser_Mode_base()); } $Mode->Lexer = & $this->Lexer; $this->modes[$name] = & $Mode; }
function connectModes() {
if ( $this->connected ) { return; }
foreach ( array_keys($this->modes) as $mode ) {
// Base isn't connected to anything if ( $mode == 'base' ) { continue; }
$this->modes[$mode]->preConnect();
foreach ( array_keys($this->modes) as $cm ) {
if ( $this->modes[$cm]->accepts($mode) ) { $this->modes[$mode]->connectTo($cm); }
}
$this->modes[$mode]->postConnect(); }
$this->connected = true; }
function parse($doc) { if ( $this->Lexer ) { $this->connectModes(); // Normalize CRs and pad doc $doc = "\n".str_replace("\r\n","\n",$doc)."\n"; $this->Lexer->parse($doc); $this->Handler->_finalize(); return $this->Handler->calls; } else { return false; } }
}
//------------------------------------------------------------------- /** * This class and all the subclasses below are * used to reduce the effort required to register * modes with the Lexer. For performance these * could all be eliminated later perhaps, or * the Parser could be serialized to a file once * all modes are registered * * @author Harry Fuecks <hfuecks@gmail.com> */ class Doku_Parser_Mode {
var $Lexer;
var $allowedModes = array();
// returns a number used to determine in which order modes are added function getSort() { trigger_error('getSort() not implemented in '.get_class($this), E_USER_WARNING); }
// Called before any calls to connectTo function preConnect() {}
// Connects the mode function connectTo($mode) {}
// Called after all calls to connectTo function postConnect() {}
function accepts($mode) { return in_array($mode, (array) $this->allowedModes ); }
}
//------------------------------------------------------------------- class Doku_Parser_Mode_base extends Doku_Parser_Mode {
function Doku_Parser_Mode_base() { global $PARSER_MODES;
$this->allowedModes = array_merge ( $PARSER_MODES['container'], $PARSER_MODES['baseonly'], $PARSER_MODES['paragraphs'], $PARSER_MODES['formatting'], $PARSER_MODES['substition'], $PARSER_MODES['protected'], $PARSER_MODES['disabled'] ); }
function getSort() { return 0; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_footnote extends Doku_Parser_Mode {
function Doku_Parser_Mode_footnote() { global $PARSER_MODES;
$this->allowedModes = array_merge ( $PARSER_MODES['container'], $PARSER_MODES['formatting'], $PARSER_MODES['substition'], $PARSER_MODES['protected'], $PARSER_MODES['disabled'] );
unset($this->allowedModes[array_search('footnote', $this->allowedModes)]); }
function connectTo($mode) { $this->Lexer->addEntryPattern( '\x28\x28(?=.*\x29\x29)',$mode,'footnote' ); }
function postConnect() { $this->Lexer->addExitPattern( '\x29\x29','footnote' ); }
function getSort() { return 150; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_header extends Doku_Parser_Mode {
function preConnect() { //we're not picky about the closing ones, two are enough $this->Lexer->addSpecialPattern( '[ \t]*={2,}[^\n]+={2,}[ \t]*(?=\n)', 'base', 'header' ); }
function getSort() { return 50; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_notoc extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addSpecialPattern('~~NOTOC~~',$mode,'notoc'); }
function getSort() { return 30; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_nocache extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addSpecialPattern('~~NOCACHE~~',$mode,'nocache'); }
function getSort() { return 40; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_linebreak extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addSpecialPattern('\x5C{2}(?=\s)',$mode,'linebreak'); }
function getSort() { return 140; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_eol extends Doku_Parser_Mode {
function connectTo($mode) { $badModes = array('listblock','table'); if ( in_array($mode, $badModes) ) { return; } $this->Lexer->addSpecialPattern('\n',$mode,'eol'); }
function getSort() { return 370; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_hr extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addSpecialPattern('\n[ \t]*-{4,}[ \t]*(?=\n)',$mode,'hr'); }
function getSort() { return 160; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_formatting extends Doku_Parser_Mode { var $type;
var $formatting = array ( 'strong' => array ( 'entry'=>'\*\*(?=.*\*\*)', 'exit'=>'\*\*', 'sort'=>70 ),
'emphasis'=> array ( 'entry'=>'//(?=[^\x00]*[^:]//)', //hack for bug #384 'exit'=>'//', 'sort'=>80 ),
'underline'=> array ( 'entry'=>'__(?=.*__)', 'exit'=>'__', 'sort'=>90 ),
'monospace'=> array ( 'entry'=>'\x27\x27(?=.*\x27\x27)', 'exit'=>'\x27\x27', 'sort'=>100 ),
'subscript'=> array ( 'entry'=>'<sub>(?=.*</sub>)', 'exit'=>'</sub>', 'sort'=>110 ),
'superscript'=> array ( 'entry'=>'<sup>(?=.*</sup>)', 'exit'=>'</sup>', 'sort'=>120 ),
'deleted'=> array ( 'entry'=>'<del>(?=.*</del>)', 'exit'=>'</del>', 'sort'=>130 ), );
function Doku_Parser_Mode_formatting($type) { global $PARSER_MODES;
if ( !array_key_exists($type, $this->formatting) ) { trigger_error('Invalid formatting type '.$type, E_USER_WARNING); }
$this->type = $type;
// formatting may contain other formatting but not it self $modes = $PARSER_MODES['formatting']; $key = array_search($type, $modes); if ( is_int($key) ) { unset($modes[$key]); }
$this->allowedModes = array_merge ( $modes, $PARSER_MODES['substition'], $PARSER_MODES['disabled'] ); }
function connectTo($mode) {
// Can't nest formatting in itself if ( $mode == $this->type ) { return; }
$this->Lexer->addEntryPattern( $this->formatting[$this->type]['entry'], $mode, $this->type ); }
function postConnect() {
$this->Lexer->addExitPattern( $this->formatting[$this->type]['exit'], $this->type );
}
function getSort() { return $this->formatting[$this->type]['sort']; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_listblock extends Doku_Parser_Mode {
function Doku_Parser_Mode_listblock() { global $PARSER_MODES;
$this->allowedModes = array_merge ( $PARSER_MODES['formatting'], $PARSER_MODES['substition'], $PARSER_MODES['disabled'], $PARSER_MODES['protected'] #XXX new );
// $this->allowedModes[] = 'footnote'; }
function connectTo($mode) { $this->Lexer->addEntryPattern('\n {2,}[\-\*]',$mode,'listblock'); $this->Lexer->addEntryPattern('\n\t{1,}[\-\*]',$mode,'listblock');
$this->Lexer->addPattern('\n {2,}[\-\*]','listblock'); $this->Lexer->addPattern('\n\t{1,}[\-\*]','listblock');
}
function postConnect() { $this->Lexer->addExitPattern('\n','listblock'); }
function getSort() { return 10; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_table extends Doku_Parser_Mode {
function Doku_Parser_Mode_table() { global $PARSER_MODES;
$this->allowedModes = array_merge ( $PARSER_MODES['formatting'], $PARSER_MODES['substition'], $PARSER_MODES['disabled'], $PARSER_MODES['protected'] ); }
function connectTo($mode) { $this->Lexer->addEntryPattern('\n\^',$mode,'table'); $this->Lexer->addEntryPattern('\n\|',$mode,'table'); }
function postConnect() { $this->Lexer->addPattern('\n\^','table'); $this->Lexer->addPattern('\n\|','table'); #$this->Lexer->addPattern(' {2,}','table'); $this->Lexer->addPattern('[\t ]+','table'); $this->Lexer->addPattern('\^','table'); $this->Lexer->addPattern('\|','table'); $this->Lexer->addExitPattern('\n','table'); }
function getSort() { return 60; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_unformatted extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addEntryPattern('<nowiki>(?=.*</nowiki>)',$mode,'unformatted'); $this->Lexer->addEntryPattern('%%(?=.*%%)',$mode,'unformattedalt'); }
function postConnect() { $this->Lexer->addExitPattern('</nowiki>','unformatted'); $this->Lexer->addExitPattern('%%','unformattedalt'); $this->Lexer->mapHandler('unformattedalt','unformatted'); }
function getSort() { return 170; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_php extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addEntryPattern('<php>(?=.*</php>)',$mode,'php'); $this->Lexer->addEntryPattern('<PHP>(?=.*</PHP>)',$mode,'phpblock'); }
function postConnect() { $this->Lexer->addExitPattern('</php>','php'); $this->Lexer->addExitPattern('</PHP>','phpblock'); }
function getSort() { return 180; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_html extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addEntryPattern('<html>(?=.*</html>)',$mode,'html'); $this->Lexer->addEntryPattern('<HTML>(?=.*</HTML>)',$mode,'htmlblock'); }
function postConnect() { $this->Lexer->addExitPattern('</html>','html'); $this->Lexer->addExitPattern('</HTML>','htmlblock'); }
function getSort() { return 190; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_preformatted extends Doku_Parser_Mode {
function connectTo($mode) { // Has hard coded awareness of lists... $this->Lexer->addEntryPattern('\n (?![\*\-])',$mode,'preformatted'); $this->Lexer->addEntryPattern('\n\t(?![\*\-])',$mode,'preformatted');
// How to effect a sub pattern with the Lexer! $this->Lexer->addPattern('\n ','preformatted'); $this->Lexer->addPattern('\n\t','preformatted');
}
function postConnect() { $this->Lexer->addExitPattern('\n','preformatted'); }
function getSort() { return 20; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_code extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addEntryPattern('<code(?=.*</code>)',$mode,'code'); }
function postConnect() { $this->Lexer->addExitPattern('</code>','code'); }
function getSort() { return 200; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_file extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addEntryPattern('<file>(?=.*</file>)',$mode,'file'); }
function postConnect() { $this->Lexer->addExitPattern('</file>','file'); }
function getSort() { return 210; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_quote extends Doku_Parser_Mode {
function Doku_Parser_Mode_quote() { global $PARSER_MODES;
$this->allowedModes = array_merge ( $PARSER_MODES['formatting'], $PARSER_MODES['substition'], $PARSER_MODES['disabled'], $PARSER_MODES['protected'] #XXX new ); #$this->allowedModes[] = 'footnote'; #$this->allowedModes[] = 'preformatted'; #$this->allowedModes[] = 'unformatted'; }
function connectTo($mode) { $this->Lexer->addEntryPattern('\n>{1,}',$mode,'quote'); }
function postConnect() { $this->Lexer->addPattern('\n>{1,}','quote'); $this->Lexer->addExitPattern('\n','quote'); }
function getSort() { return 220; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_acronym extends Doku_Parser_Mode { // A list var $acronyms = array(); var $pattern = '';
function Doku_Parser_Mode_acronym($acronyms) { $this->acronyms = $acronyms; }
function preConnect() { if(!count($this->acronyms)) return;
$bound = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]'; $acronyms = array_map('Doku_Lexer_Escape',$this->acronyms); $this->pattern = '(?<=^|'.$bound.')(?:'.join('|',$acronyms).')(?='.$bound.')'; }
function connectTo($mode) { if(!count($this->acronyms)) return;
if ( strlen($this->pattern) > 0 ) { $this->Lexer->addSpecialPattern($this->pattern,$mode,'acronym'); } }
function getSort() { return 240; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_smiley extends Doku_Parser_Mode { // A list var $smileys = array(); var $pattern = '';
function Doku_Parser_Mode_smiley($smileys) { $this->smileys = $smileys; }
function preConnect() { if(!count($this->smileys) || $this->pattern != '') return;
$sep = ''; foreach ( $this->smileys as $smiley ) { $this->pattern .= $sep.Doku_Lexer_Escape($smiley); $sep = '|'; } }
function connectTo($mode) { if(!count($this->smileys)) return;
if ( strlen($this->pattern) > 0 ) { $this->Lexer->addSpecialPattern($this->pattern,$mode,'smiley'); } }
function getSort() { return 230; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_wordblock extends Doku_Parser_Mode { // A list var $badwords = array(); var $pattern = '';
function Doku_Parser_Mode_wordblock($badwords) { $this->badwords = $badwords; }
function preConnect() {
if ( count($this->badwords) == 0 || $this->pattern != '') { return; }
$sep = ''; foreach ( $this->badwords as $badword ) { $this->pattern .= $sep.'(?<=\b)(?i)'.Doku_Lexer_Escape($badword).'(?-i)(?=\b)'; $sep = '|'; }
}
function connectTo($mode) { if ( strlen($this->pattern) > 0 ) { $this->Lexer->addSpecialPattern($this->pattern,$mode,'wordblock'); } }
function getSort() { return 250; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_entity extends Doku_Parser_Mode { // A list var $entities = array(); var $pattern = '';
function Doku_Parser_Mode_entity($entities) { $this->entities = $entities; }
function preConnect() { if(!count($this->entities) || $this->pattern != '') return;
$sep = ''; foreach ( $this->entities as $entity ) { $this->pattern .= $sep.Doku_Lexer_Escape($entity); $sep = '|'; } }
function connectTo($mode) { if(!count($this->entities)) return;
if ( strlen($this->pattern) > 0 ) { $this->Lexer->addSpecialPattern($this->pattern,$mode,'entity'); } }
function getSort() { return 260; } }
//------------------------------------------------------------------- // Implements the 640x480 replacement class Doku_Parser_Mode_multiplyentity extends Doku_Parser_Mode {
function connectTo($mode) {
$this->Lexer->addSpecialPattern( '(?<=\b)\d+[xX]\d+(?=\b)',$mode,'multiplyentity' );
}
function getSort() { return 270; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_quotes extends Doku_Parser_Mode {
function connectTo($mode) { global $conf;
$ws = '\s/\#~:+=&%@\-\x28\x29\]\[{}><"\''; // whitespace $punc = ';,\.?!';
if($conf['typography'] == 2){ $this->Lexer->addSpecialPattern( "(?<=^|[$ws])'(?=[^$ws$punc])",$mode,'singlequoteopening' ); $this->Lexer->addSpecialPattern( "(?<=^|[^$ws]|[$punc])'(?=$|[$ws$punc])",$mode,'singlequoteclosing' ); $this->Lexer->addSpecialPattern( "(?<=^|[^$ws$punc])'(?=$|[^$ws$punc])",$mode,'apostrophe' ); }
$this->Lexer->addSpecialPattern( "(?<=^|[$ws])\"(?=[^$ws$punc])",$mode,'doublequoteopening' ); $this->Lexer->addSpecialPattern( "\"",$mode,'doublequoteclosing' );
}
function getSort() { return 280; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_camelcaselink extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addSpecialPattern( '\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b',$mode,'camelcaselink' ); }
function getSort() { return 290; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_internallink extends Doku_Parser_Mode {
function connectTo($mode) { // Word boundaries? $this->Lexer->addSpecialPattern("\[\[.+?\]\]",$mode,'internallink'); }
function getSort() { return 300; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_media extends Doku_Parser_Mode {
function connectTo($mode) { // Word boundaries? $this->Lexer->addSpecialPattern("\{\{[^\}]+\}\}",$mode,'media'); }
function getSort() { return 320; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_rss extends Doku_Parser_Mode {
function connectTo($mode) { $this->Lexer->addSpecialPattern("\{\{rss>[^\}]+\}\}",$mode,'rss'); }
function getSort() { return 310; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_externallink extends Doku_Parser_Mode { var $schemes = array('http','https','telnet','gopher','wais','ftp','ed2k','irc','ldap'); var $patterns = array();
function preConnect() { if(count($this->patterns)) return;
$ltrs = '\w'; $gunk = '/\#~:.?+=&%@!\-'; $punc = '.:?\-;,'; $host = $ltrs.$punc; $any = $ltrs.$gunk.$punc;
foreach ( $this->schemes as $scheme ) { $this->patterns[] = '\b(?i)'.$scheme.'(?-i)://['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; }
$this->patterns[] = '\b(?i)www?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])'; $this->patterns[] = '\b(?i)ftp?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])';
}
function connectTo($mode) {
foreach ( $this->patterns as $pattern ) { $this->Lexer->addSpecialPattern($pattern,$mode,'externallink'); } }
function getSort() { return 330; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_filelink extends Doku_Parser_Mode {
var $pattern;
function preConnect() {
$ltrs = '\w'; $gunk = '/\#~:.?+=&%@!\-'; $punc = '.:?\-;,'; $host = $ltrs.$punc; $any = $ltrs.$gunk.$punc;
$this->pattern = '\b(?i)file(?-i)://['.$any.']+?['. $punc.']*[^'.$any.']'; }
function connectTo($mode) { $this->Lexer->addSpecialPattern( $this->pattern,$mode,'filelink'); }
function getSort() { return 360; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_windowssharelink extends Doku_Parser_Mode {
var $pattern;
function preConnect() { $this->pattern = "\\\\\\\\\w+?(?:\\\\[\w$]+)+"; }
function connectTo($mode) { $this->Lexer->addSpecialPattern( $this->pattern,$mode,'windowssharelink'); }
function getSort() { return 350; } }
//------------------------------------------------------------------- class Doku_Parser_Mode_emaillink extends Doku_Parser_Mode {
function connectTo($mode) { // pattern below is defined in inc/mail.php $this->Lexer->addSpecialPattern('<'.PREG_PATTERN_VALID_EMAIL.'>',$mode,'emaillink'); }
function getSort() { return 340; } }
//Setup VIM: ex: et ts=4 enc=utf-8 :
|