[CLEANUP] Improve the @param/@return/@var PHPDoc
[Packages/TYPO3.CMS.git] / typo3 / sysext / indexed_search / Classes / Example / CrawlerHook.php
1 <?php
2 namespace TYPO3\CMS\IndexedSearch\Example;
3
4 /*
5 * This file is part of the TYPO3 CMS project.
6 *
7 * It is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License, either version 2
9 * of the License, or any later version.
10 *
11 * For the full copyright and license information, please read the
12 * LICENSE.txt file that was distributed with this source code.
13 *
14 * The TYPO3 project - inspiring people to share!
15 */
16
17 /**
18 * Index search crawler hook example
19 *
20 * @author Kasper Skårhøj <kasperYYYY@typo3.com>
21 */
22 class CrawlerHook {
23
24 /**
25 * Function is called when an indexing session starts according to the time intervals set for the indexing configuration.
26 *
27 * @return string Return a text string for the first, initiating queue entry for the crawler.
28 */
29 public function initMessage() {
30 return 'Start of Custom Example Indexing session!';
31 }
32
33 /**
34 * This will do two things:
35 * 1) Carry out actual indexing of content (one or more items)
36 * 2) Add one or more new entries into the crawlers queue so we are called again (another instance) for further indexing in the session (optional of course, if all indexing is done, we add no new entries)
37 *
38 * @param array $cfgRec Indexing Configuration Record (the record which holds the information that lead to this indexing session...)
39 * @param array $session_data Session data variable. Passed by reference. Changed content is saved and passed back upon next instance in the session.
40 * @param array $params Params array from the queue entry.
41 * @param object $pObj Parent Object (from "indexed_search" extension)
42 * @return void
43 */
44 public function indexOperation($cfgRec, &$session_data, $params, &$pObj) {
45 // Init session data array if not already:
46 if (!is_array($session_data)) {
47 $session_data = array(
48 'step' => 0
49 );
50 }
51 // Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left)
52 $session_data['step']++;
53 switch ((int)$session_data['step']) {
54 case 1:
55 // Indexing Example: Content accessed with GET parameters added to URL:
56 // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
57 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
58 // Set up language uid, if any:
59 $sys_language_uid = 0;
60 // Set up 2 example items to index:
61 $exampleItems = array(
62 array(
63 'ID' => '123',
64 'title' => 'Title of Example 1',
65 'content' => 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy'
66 ),
67 array(
68 'ID' => 'example2',
69 'title' => 'Title of Example 2',
70 'content' => 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.'
71 )
72 );
73 // For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!)
74 foreach ($exampleItems as $item) {
75 // Prepare the GET variables array that must be added to the page URL in order to view result:
76 parse_str('&itemID=' . rawurlencode($item['ID']), $GETparams);
77 // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
78 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
79 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, FALSE);
80 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
81 $indexerObj->forceIndexing = TRUE;
82 // Indexing the content of the item (see \TYPO3\CMS\IndexedSearch\Indexer::backend_indexAsTYPO3Page() for options)
83 $indexerObj->backend_indexAsTYPO3Page($item['title'], '', '', $item['content'], $GLOBALS['LANG']->charSet, $item['tstamp'], $item['create_date'], $item['ID']);
84 }
85 break;
86 case 2:
87 // Indexing Example: Content accessed directly in file system:
88 // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
89 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
90 // Set up language uid, if any:
91 $sys_language_uid = 0;
92 // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
93 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
94 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
95 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
96 $indexerObj->hash['phash'] = -1;
97 // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
98 // Index document:
99 $indexerObj->indexRegularDocument('fileadmin/templates/index.html', TRUE);
100 break;
101 case 3:
102 // Indexing Example: Content accessed on External URLs:
103 // Index external URL:
104 $indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance(\TYPO3\CMS\IndexedSearch\Indexer::class);
105 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
106 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
107 $indexerObj->hash['phash'] = -1;
108 // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
109 // Index external URL (HTML only):
110 $indexerObj->indexExternalUrl('http://www.google.com/');
111 break;
112 }
113 // Finally, set entry for next indexing instance (if all steps are not completed)
114 if ($session_data['step'] <= 3) {
115 $title = 'Step #' . $session_data['step'] . ' of 3';
116 // Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do.
117 $pObj->addQueueEntryForHook($cfgRec, $title);
118 }
119 }
120
121 }