Commit 6e97b162 authored by Kasper Skårhøj's avatar Kasper Skårhøj
Browse files

See Changelog: Updates to Indexed Search (mainly), t3lib_cs (bug),...

See Changelog: Updates to Indexed Search (mainly), t3lib_cs (bug), t3lib_tcemain (bug), right-click Context menu, TS option "USERUID_substToken"


git-svn-id: https://svn.typo3.org/TYPO3v4/Core/trunk@515 709f56b5-9817-0410-a4d7-c38de5d9e867
parent 380e2940
2004-11-28 Kasper Skårhøj,,, <kasper@typo3.com>
* Main feature: Lots of updates on Indexed Search extension. The changes are mainly in the indexer, not the search plugin. The work is NOT FINISHED yet and don't update a production site with this work! One main thing to be aware of is that all indexing is done internally as utf-8. You should flush your old index tables before running the new one.
* Fixed bug in t3lib_cs::utf8_strtrunc() (or so)... do'h Martin!
* Fixed bug (spelling mistake) in the hook "processDatamap_preProcessFieldArray"
* Fixed order of configuration forms in Extension Manager
* Added timezone option in TYPO3_CONF_VARS array
* Added right-click feature on context menus. Can be disabled with TYPO3_CONF_VARS if you don't like it. And a rightclick on the page/folder _title_ will also activate the menu! Theoretically it is not valid XHTML. Works in Mozilla and MSIE. Thanks Wolfgang!
* Added TS option "USERUID_substToken"
2004-11-26 Michael Stucki <michael@typo3.org>
* Fixed bug #0000527: Title tag is added even if the page title was empty. Thanks to Hannes Schmid.
......
......@@ -135,6 +135,7 @@ TCEforms:
- "readonly" flag, or user group dependant. See "Message-Id: <200210241441.50295.r.fritz@colorcube.de>"
- ? type:
- "Inverse relations"/"Foreign relations": A "Pseudo field", which lists records REFERING TO this record (foreign relations, eg. many small price-records belonging to ONE shop-article). Possibly this could also EDIT those references (attaching/adding new, removing old, no manual ordering though! - This is what RENE is doing (Message-Id: <E17LO4D-0002hj-00@cube.colorcube>)
- Concealed password fields, support for two fields with the same password being submitted... (JavaScript evaluated?)
- BUGS:
- Ask to save record when you want to add a category with the "Plus" icon.
- CHECK: ###STORAGE_PID### incorrectly calculated?
......@@ -747,6 +748,17 @@ General Crawler ("crawler"):
- From session log we can read out the status-arrays of the threads and display in backend (handler shows as it likes.)
- Start / stop crawler session
For indexing (and caching and publishing!) we basically need to configure traversal through configurable paramters like:
- id
- type
- L (sys_language)
- MP (?)
- Simulate user logins (eg. sending "no-login", "user:kasper, password=blabla", "user:homer, password=blabla2")
- Additional parameter ranges per page (eg. "tx_myext[var1] : 1-7 AND tx_myext[var1] : 0,1 ")
- Command parameter like "&DO_INDEX=1" or "&RECACHE=1" (or both!)
- Use CLI script for crawler?
__________________________________________-
CACHE MANAGEMENT EXTENSION
......
......@@ -519,7 +519,7 @@ class t3lib_BEfunc {
function BEgetRootLine($uid,$clause='') {
$loopCheck = 100;
$theRowArray = Array();
$output=Array();
$output = Array();
while ($uid!=0 && $loopCheck>0) {
$loopCheck--;
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
......@@ -540,7 +540,7 @@ class t3lib_BEfunc {
break;
}
}
if ($uid==0) {$theRowArray[]=Array('uid'=>0,'title'=>'');}
if ($uid==0) {$theRowArray[] = Array('uid'=>0,'title'=>'');}
if (is_array($theRowArray)) {
reset($theRowArray);
$c=count($theRowArray);
......
......@@ -1680,7 +1680,7 @@ class t3lib_cs {
if ($bc+$i > $len) return substr($str,0,$i);
// fallthru: multibyte char fits into length
}
return substr($str,$len);
return substr($str,0,$len);
}
/**
......
......@@ -117,6 +117,24 @@ class t3lib_folderTree extends t3lib_treeView {
return $theFolderIcon;
}
/**
* Wrapping $title in a-tags.
*
* @param string Title string
* @param string Item record
* @param integer Bank pointer (which mount point number)
* @return string
* @access private
*/
function wrapTitle($title,$row,$bank=0) {
$aOnClick = 'return jumpTo(\''.$this->getJumpToParam($row).'\',this,\''.$this->domIdPrefix.$this->getId($row).'_'.$bank.'\');';
$CSM = '';
if ($GLOBALS['TYPO3_CONF_VARS']['BE']['useOnContextMenuHandler']) {
$CSM = ' oncontextmenu="'.htmlspecialchars($GLOBALS['TBE_TEMPLATE']->wrapClickMenuOnIcon('',$row['path'],'',0,'','',TRUE)).'"';
}
return '<a href="#" onclick="'.htmlspecialchars($aOnClick).'"'.$CSM.'>'.$title.'</a>';
}
/**
* Returns the id from the record - for folders, this is an md5 hash.
*
......
......@@ -477,7 +477,7 @@ class t3lib_TCEmain {
// Hook: processDatamap_preProcessIncomingFieldArray
foreach($hookObjectsArr as $hookObj) {
if (method_exists($hookObj, 'processDatamap_preProcessIncomingFieldArray')) {
if (method_exists($hookObj, 'processDatamap_preProcessFieldArray')) {
$hookObj->processDatamap_preProcessFieldArray($incomingFieldArray, $table, $id, $this);
}
}
......
......@@ -175,6 +175,7 @@ class t3lib_tsparser_ext extends t3lib_TStemplate {
var $ext_printAll=0;
var $ext_CEformName="forms[0]";
var $ext_defaultOnlineResourceFlag=0;
var $doNotSortCategoriesBeforeMakingForm = FALSE;
// ts analyzer
var $templateTitles=array();
......@@ -1086,7 +1087,7 @@ class t3lib_tsparser_ext extends t3lib_TStemplate {
$help=$this->helpConfig;
$this->rArr=explode(",",$this->setup["resources"].",".implode($this->dirResources,","));
asort($this->categories[$category]);
if (!$this->doNotSortCategoriesBeforeMakingForm) asort($this->categories[$category]);
while(list($name,$type)=each($this->categories[$category])) {
$params = $theConstants[$name];
if (is_array($params)) {
......
......@@ -75,6 +75,7 @@ $TYPO3_CONF_VARS = Array(
'multiplyDBfieldSize' => 1, // Double: 1-5: Amount used to multiply the DB field size when the install tool is evaluating the database size (eg. "2.5"). This is useful if you want to expand the size of fields for utf-8 etc. For western european sites using utf-8 the need should not be for more than twice the normal single-byte size (2) and for chinese / asian languages 3 should suffice.
'setMemoryLimit' => 0, // Integer, memory_limit in MB: If more than 16, TYPO3 will try to use ini_set() to set the memory limit of PHP to the value. This works only if the function ini_set() is not disabled by your sysadmin.
'displayErrors' => 0, // Integer, -1,0,1. 0=Do not display any PHP error messages. 1=Display error messages. -1=Default setting. With this option, you can override the PHP setting "display_errors". It is suggested that you leave this unchanged but enable the "error_log" option in php.ini instead.
'serverTimeZone' => 1 // Integer, GMT offset of servers time (from time()). Default is "1" which is "GMT+1" (central european time). This value can be used in extensions that are GMT aware and wants to convert times to/from other timezones.
),
'EXT' => Array ( // Options related to the Extension Management
'noEdit' => 1, // Boolean: If set, the Extension Manager does NOT allow extension files to be edited! (Otherwise both local and global extensions can be edited.)
......@@ -144,6 +145,7 @@ $TYPO3_CONF_VARS = Array(
'customPermOptions' => array(), // Array with sets of custom permission options. Syntax is; 'key' => array('header' => 'header string, language splitted', 'items' => array('key' => array('label, language splitted', 'icon reference', 'Description text, language splitted'))). Keys cannot contain ":|," characters.
'fileDenyPattern' => '\.php$|\.php.$', // A regular expression that - if it matches a filename - will deny the file upload/rename or whatever in the webspace. Matching with eregi() (case-insensitive).
'interfaces' => 'backend', // This determines which interface options is available in the login prompt and in which order (All options: ",backend,frontend")
'useOnContextMenuHandler' => 1, // Boolean. If set, the context menus (clickmenus) in the backend are activated on right-click - although this is not a XHTML attribute!
'loginLabels' => 'Username|Password|Interface|Log In|Log Out|Backend,Front End|Administration Login on ###SITENAME###|(Note: Cookies and JavaScript must be enabled!)|Important Messages:|Your login attempt did not succeed. Make sure to spell your username and password correctly, including upper/lowercase characters.', // Language labels of the login prompt.
'loginNews' => array(), // In this array you can define news-items for the login screen. To this array, add arrays with assoc keys 'date', 'header', 'content' (HTML content) and for those appropriate value pairs
'XCLASS' => Array(), // See 'Inside TYPO3' document for more information.
......@@ -360,4 +362,4 @@ unset($LOCAL_LANG);
// Setting some global vars:
$EXEC_TIME = time(); // $EXEC_TIME is set so that the rest of the script has a common value for the script execution time
$SIM_EXEC_TIME = $EXEC_TIME; // $SIM_EXEC_TIME is set to $EXEC_TIME but can be altered later in the script if we want to simulate another execution-time when selecting from eg. a database
?>
\ No newline at end of file
?>
......@@ -139,6 +139,24 @@ class localPageTree extends t3lib_browseTree {
}
return $str;
}
/**
* Wrapping $title in a-tags.
*
* @param string Title string
* @param string Item record
* @param integer Bank pointer (which mount point number)
* @return string
* @access private
*/
function wrapTitle($title,$row,$bank=0) {
$aOnClick = 'return jumpTo(\''.$this->getJumpToParam($row).'\',this,\''.$this->domIdPrefix.$this->getId($row).'_'.$bank.'\');';
$CSM = '';
if ($GLOBALS['TYPO3_CONF_VARS']['BE']['useOnContextMenuHandler']) {
$CSM = ' oncontextmenu="'.htmlspecialchars($GLOBALS['TBE_TEMPLATE']->wrapClickMenuOnIcon('','pages',$row['uid'],0,'&bank='.$this->bank,'',TRUE)).'"';
}
return '<a href="#" onclick="'.htmlspecialchars($aOnClick).'"'.$CSM.'>'.$title.'</a>';
}
}
......
......@@ -3670,6 +3670,7 @@ EXTENSION KEYS:
// Load tsStyleConfig class and parse configuration template:
$tsStyleConfig = t3lib_div::makeInstance('t3lib_tsStyleConfig');
$tsStyleConfig->doNotSortCategoriesBeforeMakingForm = TRUE;
$theConstants = $tsStyleConfig->ext_initTSstyleConfig(
t3lib_div::getUrl($absPath.'ext_conf_template.txt'),
$relPath,
......
......@@ -249,11 +249,11 @@ if(t3lib_extMgm::isLoaded('obts')) {
*/
class tslib_cObj {
var $align = Array ('center', 'right', 'left');
var $caseConvStrings = array(
'áéúíâêûôîæøåäöü',
'ÁÉÚÍÄËÜÖÏÆØÅÄÖÜ'
/* var $caseConvStrings = array(
'�������',
'��������
);
*/
/**
* Holds ImageMagick parameters and extensions used for compression
*
......@@ -3471,7 +3471,7 @@ class tslib_cObj {
* @param array TypoScript configuration.
* @return string Return string
* @author Thomas Bley (all from moregroupware cvs code / readmessage.inc.php, published under gpl by Thomas)
* @author Kasper Skårhøj
* @author Kasper Skhj
*/
function removeBadHTML($text, $conf) {
......@@ -3749,7 +3749,7 @@ class tslib_cObj {
}
}
$locationData = $GLOBALS['TSFE']->id.':'.$this->currentRecord;
$rec='&locationData='.$locationData;
$rec='&locationData='.rawurlencode($locationData);
$hArr = array(
$jumpUrl,
$locationData,
......@@ -6999,7 +6999,7 @@ class tslib_controlTable {
var $cMt = 0; // content margin, top
var $cMb = 1; // content margin, bottom
var $contentW = 0; // sætter en lille gif-spacer nedest i content-framen
var $contentW = 0; // ster en lille gif-spacer nedest i content-framen
var $tableParams = 'border="0" cellspacing="0" cellpadding="0"';
......@@ -7032,7 +7032,7 @@ class tslib_controlTable {
if ($this->bm) $rows++;
if ($this->content) $rows++;
if ($this->contentW) $rows++;
if (!$rows && $cols) $rows=1; // hvis der slet ingen rækker er sat i midten men der trods alt er nogle kolonner
if (!$rows && $cols) $rows=1; // hvis der slet ingen rker er sat i midten men der trods alt er nogle kolonner
if ($rows&&$cols) {
$res = chr(10).'<table '.$this->tableParams.'>';
......@@ -7064,7 +7064,7 @@ class tslib_controlTable {
if ($this->rm) { $res.='<td'.$rowspan.' '.$this->rmTDparams.'>'.$this->rm.'</td>'; }
$res.= '</tr>';
// flere end de 2 rækker
// flere end de 2 rker
$mCount = count($middle);
for($a=1;$a<$mCount;$a++) {
$res.='<tr>'.$middle[$a].'</tr>';
......
......@@ -362,7 +362,7 @@
$this->no_cache = $no_cache ? 1 : 0;
$this->cHash = $cHash;
$this->jumpurl = $jumpurl;
$this->MP = $this->TYPO3_CONF_VARS['FE']['enable_mount_pids'] ? $MP : '';
$this->MP = $this->TYPO3_CONF_VARS['FE']['enable_mount_pids'] ? (string)$MP : '';
$this->RDCT = $RDCT;
$this->clientInfo = t3lib_div::clientInfo();
$this->uniqueString=md5(microtime());
......@@ -1322,7 +1322,7 @@
$GLOBALS['TT']->pull();
$GLOBALS['TT']->push('Cache Row','');
if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
$this->config = unserialize($row['cache_data']); // Fetches the lowlevel config stored with the cached data
$this->config = (array)unserialize($row['cache_data']); // Fetches the lowlevel config stored with the cached data
$this->content = $row['HTML']; // Getting the content
$this->cacheContentFlag=1; // Setting flag, so we know, that some cached content is gotten.
......@@ -1351,8 +1351,8 @@
'all' => $this->all,
'id' => intval($this->id),
'type' => intval($this->type),
'gr_list' => $this->gr_list,
'MP' => $this->MP,
'gr_list' => (string)$this->gr_list,
'MP' => (string)$this->MP,
'cHash' => $this->cHash_array
)
);
......@@ -2389,8 +2389,16 @@ if (version == "n3") {
function processOutput() {
// Substitutes username mark with the username
if ($this->fe_user->user['uid']) {
// User name:
$token = trim($this->config['config']['USERNAME_substToken']);
$this->content = str_replace($token ? $token : '<!--###USERNAME###-->',$this->fe_user->user['username'],$this->content);
// User uid (if configured):
$token = trim($this->config['config']['USERUID_substToken']);
if ($token) {
$this->content = str_replace($token, $this->fe_user->user['uid'], $this->content);
}
}
// Substitutes get_URL_ID in case of GET-fallback
if ($this->getMethodUrlIdToken) {
......
This diff is collapsed.
<?php
/***************************************************************
* Copyright notice
*
* (c) 2001-2004 Kasper Skaarhoj (kasperYYYY@typo3.com)
* All rights reserved
*
* This script is part of the TYPO3 project. The TYPO3 project is
* free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* The GNU General Public License can be found at
* http://www.gnu.org/copyleft/gpl.html.
* A copy is found in the textfile GPL.txt and important notices to the license
* from the author is found in LICENSE.txt distributed with these scripts.
*
*
* This script is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
/**
* External standard parsers for indexed_search
*
* @author Kasper Skrhj <kasperYYYY@typo3.com>
* @coauthor Olivier Simah <noname_paris@yahoo.fr>
*/
/**
* [CLASS/FUNCTION INDEX of SCRIPT]
*
*
*
* 73: class tx_indexed_search_extparse
* 90: function initParser($extension)
* 215: function initBackend($extension)
*
* SECTION: Reading documents (for parsing)
* 261: function readFileContent($ext,$absFile,$cPKey)
* 441: function fileContentParts($ext,$absFile)
* 480: function splitPdfInfo($pdfInfoArray)
* 499: function removeEndJunk($string)
*
* SECTION: Backend analyzer
* 526: function getIcon($extension)
*
* TOTAL FUNCTIONS: 7
* (This index is automatically created/updated by the extension "extdeveval")
*
*/
/**
* External standard parsers for indexed_search
* MUST RETURN utf-8 content!
*
* @author Kasper Skaarhoj <kasperYYYY@typo3.com>
* @package TYPO3
* @subpackage tx_indexedsearch
*/
class tx_indexed_search_extparse {
// This value is also overridden from config.
var $pdf_mode = -20; // zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10
// This array is configured in initialization:
var $app = array();
var $pObj; // Reference to parent object (indexer class)
/**
* Initialize external parser for parsing content.
*
* @param string File extension
* @return boolean Returns true if extension is supported/enabled, otherwise false.
*/
function initParser($extension) {
// Then read indexer-config and set if appropriate:
$indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
// If windows, apply extension to tool name:
$exe = (TYPO3_OS == 'WIN') ? '.exe' : ''; // lg
$extOK = FALSE;
// Ignore extensions
$ignoreExtensions = t3lib_div::trimExplode(',', strtolower($indexerConfig['ignoreExtensions']),1);
if (in_array($extension, $ignoreExtensions)) {
$this->pObj->log_setTSlogMessage('Extension "'.$extension.'" was set to be ignored.',1);
return FALSE;
}
// Switch on file extension:
switch($extension) {
case 'pdf':
// PDF
if ($indexerConfig['pdftools']) {
$pdfPath = ereg_replace("\/$",'',$indexerConfig['pdftools']).'/';
if (ini_get('safe_mode') || (@is_file($pdfPath.'pdftotext'.$exe) && @is_file($pdfPath.'pdfinfo'.$exe))) {
$this->app['pdfinfo'] = $pdfPath.'pdfinfo'.$exe;
$this->app['pdftotext'] = $pdfPath.'pdftotext'.$exe;
// PDF mode:
$this->pdf_mode = t3lib_div::intInRange($indexerConfig['pdf_mode'],-100,100);
$extOK = TRUE;
} else $this->pObj->log_setTSlogMessage("PDF tools was not found in paths '".$pdfPath."pdftotext' and/or '".$pdfPath."pdfinfo'",3);
} else $this->pObj->log_setTSlogMessage('PDF tools disabled',1);
break;
case 'doc':
// Catdoc
if ($indexerConfig['catdoc']) {
$catdocPath = ereg_replace("\/$",'',$indexerConfig['catdoc']).'/';
if (ini_get('safe_mode') || @is_file($catdocPath.'catdoc'.$exe)) {
$this->app['catdoc'] = $catdocPath.'catdoc'.$exe;
$extOK = TRUE;
} else $this->pObj->log_setTSlogMessage("'catdoc' tool for reading Word-files was not found in paths '".$catdocPath."catdoc'",3);
} else $this->pObj->log_setTSlogMessage('catdoc tools (Word-files) disabled',1);
break;
case 'pps': // MS PowerPoint(?)
case 'ppt': // MS PowerPoint
// ppthtml
if ($indexerConfig['ppthtml']) {
$ppthtmlPath = ereg_replace('\/$','',$indexerConfig['ppthtml']).'/';
if (ini_get('safe_mode') || @is_file($ppthtmlPath.'ppthtml'.$exe)){
$this->app['ppthtml'] = $ppthtmlPath.'ppthtml'.$exe;
$extOK = TRUE;
} else $this->pObj->log_setTSlogMessage("'ppthtml' tool for reading Powerpoint-files was not found in paths '".$ppthtmlPath."ppthtml'",3);
} else $this->pObj->log_setTSlogMessage('ppthtml tools (Powerpoint-files) disabled',1);
break;
case 'xls': // MS Excel
// Xlhtml
if ($indexerConfig['xlhtml']) {
$xlhtmlPath = ereg_replace('\/$','',$indexerConfig['xlhtml']).'/';
if (ini_get('safe_mode') || @is_file($xlhtmlPath.'xlhtml'.$exe)){
$this->app['xlhtml'] = $xlhtmlPath.'xlhtml'.$exe;
$extOK = TRUE;
} else $this->pObj->log_setTSlogMessage("'xlhtml' tool for reading Excel-files was not found in paths '".$xlhtmlPath."xlhtml'",3);
} else $this->pObj->log_setTSlogMessage('xlhtml tools (Excel-files) disabled',1);
break;
case 'sxc': // Open Office Calc.
case 'sxi': // Open Office Impress
case 'sxw': // Open Office Writer
// ooo_extract.rb can be found at: http://www.math.umd.edu/~dcarrera/openoffice/misc/tools/ooo_extract.html
// I had to run this on debian before I could run the ooo_extract.rb script:
// apt-get install libzlib-ruby1.8
// apt-get install librexml-ruby1.8
// ruby + ooo_extract
if ($indexerConfig['nativeOOMethod']) {
if (t3lib_extMgm::isLoaded('libunzipped')) {
$this->app['nativeOOMethod'] = TRUE;
$extOK = TRUE;
$this->pObj->log_setTSlogMessage('Using "libunzipped" for extraction of Open Office files, "'.$extension.'".',1);
} else $this->pObj->log_setTSlogMessage('The extension "libunzipped" was not loaded (for extraction of Open Office files, "'.$extension.'")',2);
} else {
if ($indexerConfig['ruby']) {
$rubyPath = ereg_replace('\/$','',$indexerConfig['ruby']).'/';
$oooExPath = ereg_replace('\/$','',$indexerConfig['OOoExtract']).'/';
if (ini_get('safe_mode') || (@is_file($rubyPath.'ruby'.$exe) && @is_file($oooExPath.'ooo_extract.rb'))){
$this->app['ruby'] = $rubyPath.'ruby'.$exe;
$this->app['OOo'] = $oooExPath.'ooo_extract.rb';
$extOK = TRUE;
} else $this->pObj->log_setTSlogMessage("'Ruby and OOo_extract' tools for reading OOo documents were not found in paths '".$rubyPath."ruby' OR '".$oooExPath."ooo_extract.rb'",3);
} else $this->pObj->log_setTSlogMessage('Ruby & OOo_extract tools (OpenOffice-files) disabled',1);
}
break;
case 'rtf':
// Catdoc
if ($indexerConfig['unrtf']) {
$unrtfPath = ereg_replace("\/$",'',$indexerConfig['unrtf']).'/';
if (ini_get('safe_mode') || @is_file($unrtfPath.'unrtf'.$exe)) {
$this->app['unrtf'] = $unrtfPath.'unrtf'.$exe;
$extOK = TRUE;
} else $this->pObj->log_setTSlogMessage("'unrtf' tool for reading RTF-files was not found in paths '".$unrtfPath."unrtf'",3);
} else $this->pObj->log_setTSlogMessage('unrtf tool (RTF-files) disabled',1);
break;
case 'txt': // Raw text
case 'html': // PHP strip-tags()
case 'htm': // PHP strip-tags()
case 'csv': // Raw text
case 'xml': // PHP strip-tags()
case 'jpg': // PHP EXIF
case 'jpeg': // PHP EXIF
case 'tif': // PHP EXIF
$extOK = TRUE;
break;
}
// If extension was OK:
if ($extOK) {
$this->supportedExtensions[$extension] = TRUE;
return TRUE;
}
}
/**
* Initialize external parser for backend modules
* Doesn't evaluate if parser is configured right - more like returning POSSIBLE supported extensions (for showing icons etc).
*
* @param string File extension to initialize for.
* @return boolean Returns true if the extension is supported and enabled, otherwise false.
*/
function initBackend($extension) {
switch($extension) {
case 'pdf': // PDF
case 'doc': // MS Word files
case 'pps': // MS PowerPoint
case 'ppt': // MS PowerPoint
case 'xls': // MS Excel
case 'sxc': // Open Office Calc.
case 'sxi': // Open Office Impress
case 'sxw': // Open Office Writer
case 'rtf': // RTF documents
case 'txt': // ASCII Text documents
case 'html': // HTML
case 'htm': // HTML
case 'csv': // Comma Separated Values
case 'xml': // Generic XML
case 'jpg': // Jpeg images (EXIF comment)
case 'jpeg': // Jpeg images (EXIF comment)
case 'tif': // TIf images (EXIT comment)
return TRUE;
break;
}
}
/************************
*
* Reading documents (for parsing)
*
************************/
/**
* Reads the content of an external file being indexed.
*
* @param string File extension, eg. "pdf", "doc" etc.
* @param string Absolute filename of file (must exist and be validated OK before calling function)
* @param string Pointer to section (zero for all other than PDF which will have an indication of pages into which the document should be splitted.)
* @return array Standard content array (title, description, keywords, body keys)
*/
function readFileContent($ext,$absFile,$cPKey) {
unset($contentArr);
// Return immediately if initialization didn't set support up:
if (!$this->supportedExtensions[$ext]) return FALSE;
// Switch by file extension
switch ($ext) {
case 'pdf':
if ($this->app['pdfinfo']) {
// Getting pdf-info:
$cmd = $this->app['pdfinfo'].' '.$absFile;
exec($cmd,$res);
$pdfInfo = $this->splitPdfInfo($res);
if (intval($pdfInfo['pages'])) {
list($low,$high) = explode('-',$cPKey);
// Get pdf content:
$tempFileName = t3lib_div::tempnam('Typo3_indexer'); // Create temporary name
@unlink ($tempFileName); // Delete if exists, just to be safe.
$cmd = $this->app['pdftotext'].' -f '.$low.' -l '.$high.' -enc UTF-8 -q '.$absFile.' '.$tempFileName;
exec($cmd,$res);
if (@is_file($tempFileName)) {
$content = t3lib_div::getUrl($tempFileName);
unlink($tempFileName);
} else {
$this->pObj->log_setTSlogMessage('PDFtoText Failed on this document: '.$absFile.". Maybe the PDF file is locked for printing or encrypted.",2);
}
$contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content));
}
}
break;
case 'doc':
if ($this->app['catdoc']) {
$cmd = $this->app['catdoc'].' -d utf-8 '.$absFile;
exec($cmd,$res);
$content = implode(chr(10),$res);
$contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content));
}
break;
case 'pps':
case 'ppt':
if ($this->app['ppthtml']) {
$cmd = $this->app['ppthtml'].' '.$absFile;
exec($cmd,$res);
$content = implode(chr(10),$res);
$content = $this->pObj->convertHTMLToUtf8($content);
$contentArr = $this->pObj->splitHTMLContent($this->removeEndJunk($content));