Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
services
t3o sites
extensions.typo3.org
extensions.typo3.org
Commits
0c6520a9
Commit
0c6520a9
authored
Feb 10, 2017
by
Andreas Wolf
Browse files
Merge branch '5-configure-solr-indexes' into 'master'
Resolve "Implement indexer for Solr and TYPO3 extension data" Closes
#5
See merge request
!24
parents
7cbd2968
4bfda2dd
Changes
21
Hide whitespace changes
Inline
Side-by-side
.gitignore
View file @
0c6520a9
...
...
@@ -12,4 +12,4 @@ htdocs/index.php
htdocs/typo3conf/ext/*
htdocs/typo3conf/PackageStates.php
!htdocs/typo3conf/ext/ter*
data/etc/solr/
data/etc/solr/
server/solr/mycores/t3o/data
data/etc/solr/server/solr/mycores/t3o/conf/_schema_analysis_stopwords_english.json
0 → 100644
View file @
0c6520a9
{
"initArgs"
:{
"ignoreCase"
:
false
},
"managedList"
:[]}
\ No newline at end of file
data/etc/solr/server/solr/mycores/t3o/conf/admin-extra.html
0 → 100644
View file @
0c6520a9
<!-- The content of this page will be statically included into the top
of the admin page. Uncomment this as an example to see there the content
will show up.
<hr>
<i>This line will appear before the first table</i>
<tr>
<td colspan="2">
This row will be appended to the end of the first table
</td>
</tr>
<hr>
-->
data/etc/solr/server/solr/mycores/t3o/conf/currency.xml
0 → 100644
View file @
0c6520a9
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
<currencyConfig
version=
"1.0"
>
<rates>
<!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
<rate
from=
"USD"
to=
"ARS"
rate=
"4.333871"
comment=
"ARGENTINA Peso"
/>
<rate
from=
"USD"
to=
"AUD"
rate=
"1.025768"
comment=
"AUSTRALIA Dollar"
/>
<rate
from=
"USD"
to=
"EUR"
rate=
"0.743676"
comment=
"European Euro"
/>
<rate
from=
"USD"
to=
"BRL"
rate=
"1.881093"
comment=
"BRAZIL Real"
/>
<rate
from=
"USD"
to=
"CAD"
rate=
"1.030815"
comment=
"CANADA Dollar"
/>
<rate
from=
"USD"
to=
"CLP"
rate=
"519.0996"
comment=
"CHILE Peso"
/>
<rate
from=
"USD"
to=
"CNY"
rate=
"6.387310"
comment=
"CHINA Yuan"
/>
<rate
from=
"USD"
to=
"CZK"
rate=
"18.47134"
comment=
"CZECH REP. Koruna"
/>
<rate
from=
"USD"
to=
"DKK"
rate=
"5.515436"
comment=
"DENMARK Krone"
/>
<rate
from=
"USD"
to=
"HKD"
rate=
"7.801922"
comment=
"HONG KONG Dollar"
/>
<rate
from=
"USD"
to=
"HUF"
rate=
"215.6169"
comment=
"HUNGARY Forint"
/>
<rate
from=
"USD"
to=
"ISK"
rate=
"118.1280"
comment=
"ICELAND Krona"
/>
<rate
from=
"USD"
to=
"INR"
rate=
"49.49088"
comment=
"INDIA Rupee"
/>
<rate
from=
"USD"
to=
"XDR"
rate=
"0.641358"
comment=
"INTNL MON. FUND SDR"
/>
<rate
from=
"USD"
to=
"ILS"
rate=
"3.709739"
comment=
"ISRAEL Sheqel"
/>
<rate
from=
"USD"
to=
"JPY"
rate=
"76.32419"
comment=
"JAPAN Yen"
/>
<rate
from=
"USD"
to=
"KRW"
rate=
"1169.173"
comment=
"KOREA (SOUTH) Won"
/>
<rate
from=
"USD"
to=
"KWD"
rate=
"0.275142"
comment=
"KUWAIT Dinar"
/>
<rate
from=
"USD"
to=
"MXN"
rate=
"13.85895"
comment=
"MEXICO Peso"
/>
<rate
from=
"USD"
to=
"NZD"
rate=
"1.285159"
comment=
"NEW ZEALAND Dollar"
/>
<rate
from=
"USD"
to=
"NOK"
rate=
"5.859035"
comment=
"NORWAY Krone"
/>
<rate
from=
"USD"
to=
"PKR"
rate=
"87.57007"
comment=
"PAKISTAN Rupee"
/>
<rate
from=
"USD"
to=
"PEN"
rate=
"2.730683"
comment=
"PERU Sol"
/>
<rate
from=
"USD"
to=
"PHP"
rate=
"43.62039"
comment=
"PHILIPPINES Peso"
/>
<rate
from=
"USD"
to=
"PLN"
rate=
"3.310139"
comment=
"POLAND Zloty"
/>
<rate
from=
"USD"
to=
"RON"
rate=
"3.100932"
comment=
"ROMANIA Leu"
/>
<rate
from=
"USD"
to=
"RUB"
rate=
"32.14663"
comment=
"RUSSIA Ruble"
/>
<rate
from=
"USD"
to=
"SAR"
rate=
"3.750465"
comment=
"SAUDI ARABIA Riyal"
/>
<rate
from=
"USD"
to=
"SGD"
rate=
"1.299352"
comment=
"SINGAPORE Dollar"
/>
<rate
from=
"USD"
to=
"ZAR"
rate=
"8.329761"
comment=
"SOUTH AFRICA Rand"
/>
<rate
from=
"USD"
to=
"SEK"
rate=
"6.883442"
comment=
"SWEDEN Krona"
/>
<rate
from=
"USD"
to=
"CHF"
rate=
"0.906035"
comment=
"SWITZERLAND Franc"
/>
<rate
from=
"USD"
to=
"TWD"
rate=
"30.40283"
comment=
"TAIWAN Dollar"
/>
<rate
from=
"USD"
to=
"THB"
rate=
"30.89487"
comment=
"THAILAND Baht"
/>
<rate
from=
"USD"
to=
"AED"
rate=
"3.672955"
comment=
"U.A.E. Dirham"
/>
<rate
from=
"USD"
to=
"UAH"
rate=
"7.988582"
comment=
"UKRAINE Hryvnia"
/>
<rate
from=
"USD"
to=
"GBP"
rate=
"0.647910"
comment=
"UNITED KINGDOM Pound"
/>
<!-- Cross-rates for some common currencies -->
<rate
from=
"EUR"
to=
"GBP"
rate=
"0.869914"
/>
<rate
from=
"EUR"
to=
"NOK"
rate=
"7.800095"
/>
<rate
from=
"GBP"
to=
"NOK"
rate=
"8.966508"
/>
</rates>
</currencyConfig>
\ No newline at end of file
data/etc/solr/server/solr/mycores/t3o/conf/elevate.xml
0 → 100644
View file @
0c6520a9
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- If this file is found in the config directory, it will only be
loaded once at startup. If it is found in Solr's data
directory, it will be re-loaded every commit.
-->
<elevate>
<query
text=
"foo bar"
>
<doc
id=
"1"
/>
<doc
id=
"2"
/>
<doc
id=
"3"
/>
</query>
<query
text=
"ipod"
>
<doc
id=
"MA147LL/A"
/>
<!-- put the actual ipod at the top -->
<doc
id=
"IW-02"
exclude=
"true"
/>
<!-- exclude this cable -->
</query>
</elevate>
data/etc/solr/server/solr/mycores/t3o/conf/english/_schema_analysis_stopwords_english.json
0 → 100644
View file @
0c6520a9
{
"initArgs"
:
{
"ignoreCase"
:
true
},
"initializedOn"
:
"2014-04-29T23:08:58.000Z"
,
"managedList"
:
[
"i"
,
"me"
,
"my"
,
"myself"
,
"we"
,
"us"
,
"our"
,
"ours"
,
"ourselves"
,
"you"
,
"your"
,
"yours"
,
"yourself"
,
"yourselves"
,
"he"
,
"him"
,
"his"
,
"himself"
,
"she"
,
"her"
,
"hers"
,
"herself"
,
"it"
,
"its"
,
"itself"
,
"they"
,
"them"
,
"their"
,
"theirs"
,
"themselves"
,
"what"
,
"which"
,
"who"
,
"whom"
,
"this"
,
"that"
,
"these"
,
"those"
,
"am"
,
"is"
,
"are"
,
"was"
,
"were"
,
"be"
,
"been"
,
"being"
,
"have"
,
"has"
,
"had"
,
"having"
,
"do"
,
"does"
,
"did"
,
"doing"
,
"would"
,
"should"
,
"could"
,
"ought"
,
"i'm"
,
"you're"
,
"he's"
,
"she's"
,
"it's"
,
"we're"
,
"they're"
,
"i've"
,
"you've"
,
"we've"
,
"they've"
,
"i'd"
,
"you'd"
,
"he'd"
,
"she'd"
,
"we'd"
,
"they'd"
,
"i'll"
,
"you'll"
,
"he'll"
,
"she'll"
,
"we'll"
,
"they'll"
,
"isn't"
,
"aren't"
,
"wasn't"
,
"weren't"
,
"hasn't"
,
"haven't"
,
"hadn't"
,
"doesn't"
,
"don't"
,
"didn't"
,
"won't"
,
"wouldn't"
,
"shan't"
,
"shouldn't"
,
"can't"
,
"cannot"
,
"couldn't"
,
"mustn't"
,
"let's"
,
"that's"
,
"who's"
,
"what's"
,
"here's"
,
"there's"
,
"when's"
,
"where's"
,
"why's"
,
"how's"
,
"an"
,
"the"
,
"and"
,
"but"
,
"if"
,
"or"
,
"because"
,
"as"
,
"until"
,
"while"
,
"of"
,
"at"
,
"by"
,
"for"
,
"with"
,
"about"
,
"against"
,
"between"
,
"into"
,
"through"
,
"during"
,
"before"
,
"after"
,
"above"
,
"below"
,
"to"
,
"from"
,
"up"
,
"down"
,
"in"
,
"out"
,
"on"
,
"off"
,
"over"
,
"under"
,
"again"
,
"further"
,
"then"
,
"once"
,
"here"
,
"there"
,
"when"
,
"where"
,
"why"
,
"how"
,
"all"
,
"any"
,
"both"
,
"each"
,
"few"
,
"more"
,
"most"
,
"other"
,
"some"
,
"such"
,
"no"
,
"nor"
,
"not"
,
"only"
,
"own"
,
"same"
,
"so"
,
"than"
,
"too"
,
"very"
]
}
\ No newline at end of file
data/etc/solr/server/solr/mycores/t3o/conf/english/protwords.txt
0 → 100644
View file @
0c6520a9
TYPO3
\ No newline at end of file
data/etc/solr/server/solr/mycores/t3o/conf/english/schema.xml
0 → 100644
View file @
0c6520a9
<?xml version="1.0" encoding="UTF-8" ?>
<!--
The schema name property is constructed as follows
tx_solr - The extension key
x-y-z - The extension version this schema is meant to work with
YYYYMMDD - The date the schema file was changed the last time
When changing the schema the name property must be updated. There is a
status report - tx_solr_report_SchemaStatus - checking against this
name property, that status check must be updated as well.
-->
<schema
name=
"tx_solr-6-0-0--20161209"
version=
"1.6"
>
<!-- attribute "name" is the name of this schema and is only used for display purposes.
Applications should change this to reflect the nature of the search collection.
version="1.4" is Solr's version number for the schema syntax and semantics. It should
not normally be changed by applications.
1.0: multiValued attribute did not exist, all fields are multiValued by nature
1.1: multiValued attribute introduced, false by default
1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
1.3: removed optional field compress feature
1.4: default auto-phrase (QueryParser feature) to off
1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
1.6: useDocValuesAsStored defaults to true.
-->
<uniqueKey>
id
</uniqueKey>
<!-- xinclude field types -->
<xi:include
href=
"../general_schema_types.xml"
xmlns:xi=
"http://www.w3.org/2001/XInclude"
/>
<!-- xinclude fields-->
<xi:include
href=
"../general_schema_fields.xml"
xmlns:xi=
"http://www.w3.org/2001/XInclude"
/>
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
Synonyms and stopwords are customized by external files, and stemming is enabled.
Duplicate tokens at the same position (which may result from Stemmed Synonyms or
WordDelim parts) are removed.
-->
<fieldType
name=
"text"
class=
"solr.TextField"
positionIncrementGap=
"100"
>
<analyzer
type=
"index"
>
<tokenizer
class=
"solr.WhitespaceTokenizerFactory"
/>
<filter
class=
"solr.WordDelimiterFilterFactory"
generateWordParts=
"1"
generateNumberParts=
"1"
catenateWords=
"1"
catenateNumbers=
"1"
catenateAll=
"0"
splitOnCaseChange=
"1"
preserveOriginal=
"1"
protected=
"english/protwords.txt"
/>
<filter
class=
"solr.LowerCaseFilterFactory"
/>
<filter
class=
"solr.ManagedSynonymFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.ManagedStopFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.EnglishPossessiveFilterFactory"
/>
<filter
class=
"solr.SnowballPorterFilterFactory"
language=
"English"
protected=
"english/protwords.txt"
/>
<filter
class=
"solr.RemoveDuplicatesTokenFilterFactory"
/>
</analyzer>
<analyzer
type=
"query"
>
<tokenizer
class=
"solr.WhitespaceTokenizerFactory"
/>
<filter
class=
"solr.WordDelimiterFilterFactory"
generateWordParts=
"1"
generateNumberParts=
"1"
catenateWords=
"0"
catenateNumbers=
"0"
catenateAll=
"0"
splitOnCaseChange=
"1"
preserveOriginal=
"1"
protected=
"english/protwords.txt"
/>
<filter
class=
"solr.LowerCaseFilterFactory"
/>
<filter
class=
"solr.ManagedSynonymFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.ManagedStopFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.SnowballPorterFilterFactory"
language=
"English"
protected=
"english/protwords.txt"
/>
<filter
class=
"solr.RemoveDuplicatesTokenFilterFactory"
/>
</analyzer>
</fieldType>
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType
name=
"textTight"
class=
"solr.TextField"
positionIncrementGap=
"100"
>
<analyzer>
<tokenizer
class=
"solr.WhitespaceTokenizerFactory"
/>
<filter
class=
"solr.WordDelimiterFilterFactory"
generateWordParts=
"0"
generateNumberParts=
"0"
catenateWords=
"1"
catenateNumbers=
"1"
catenateAll=
"0"
preserveOriginal=
"1"
protected=
"english/protwords.txt"
/>
<filter
class=
"solr.LowerCaseFilterFactory"
/>
<filter
class=
"solr.ManagedSynonymFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.ManagedStopFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.EnglishPossessiveFilterFactory"
/>
<filter
class=
"solr.SnowballPorterFilterFactory"
language=
"English"
protected=
"english/protwords.txt"
/>
<filter
class=
"solr.RemoveDuplicatesTokenFilterFactory"
/>
</analyzer>
</fieldType>
<!-- Exact matching of words like textWhiteSpaceTokenized,
but with enabled Synonym and Stop Filter
-->
<fieldType
name=
"textExact"
class=
"solr.TextField"
positionIncrementGap=
"100"
>
<analyzer>
<tokenizer
class=
"solr.WhitespaceTokenizerFactory"
/>
<filter
class=
"solr.LowerCaseFilterFactory"
/>
<filter
class=
"solr.ManagedSynonymFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.ManagedStopFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.RemoveDuplicatesTokenFilterFactory"
/>
</analyzer>
</fieldType>
<!-- Setup simple analysis for spell checking -->
<fieldType
name=
"textSpell"
class=
"solr.TextField"
positionIncrementGap=
"100"
omitNorms=
"true"
>
<analyzer
type=
"index"
>
<tokenizer
class=
"solr.StandardTokenizerFactory"
/>
<filter
class=
"solr.LowerCaseFilterFactory"
/>
<!-- no synonyms here because we do not want to add them as spell suggestion -->
<filter
class=
"solr.ManagedStopFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.StandardFilterFactory"
/>
<filter
class=
"solr.RemoveDuplicatesTokenFilterFactory"
/>
</analyzer>
<analyzer
type=
"query"
>
<tokenizer
class=
"solr.StandardTokenizerFactory"
/>
<filter
class=
"solr.LowerCaseFilterFactory"
/>
<filter
class=
"solr.ManagedSynonymFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.ManagedStopFilterFactory"
managed=
"english"
/>
<filter
class=
"solr.StandardFilterFactory"
/>
<filter
class=
"solr.RemoveDuplicatesTokenFilterFactory"
/>
</analyzer>
</fieldType>
</schema>
\ No newline at end of file
data/etc/solr/server/solr/mycores/t3o/conf/english/synonyms.txt
0 → 100644
View file @
0c6520a9
aaa => aaaa
bbb => bbbb1 bbbb2
ccc => cccc1,cccc2
a\=>a => b\=>b
a\,a => b\,b
fooaaa,baraaa,bazaaa
# Some synonym groups specific to this example
GB,gib,gigabyte,gigabytes
MB,mib,megabyte,megabytes
Television, Televisions, TV, TVs
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
#after us won't split it into two words.
# Synonym mappings can be used for spelling correction too
pixima => pixma
data/etc/solr/server/solr/mycores/t3o/conf/general_schema_fields.xml
0 → 100644
View file @
0c6520a9
<fields>
<!--
Valid attributes for fields:
name: mandatory - the name for the field
type: mandatory - the name of a previously defined type from the
section
indexed: true if this field should be indexed (searchable or sortable)
stored: true if this field should be retrievable
multiValued: true if this field may contain multiple values per document
omitNorms: (expert) set to true to omit the norms associated with
this field (this disables length normalization and index-time
boosting for the field, and saves some memory). Only full-text
fields or fields that need an index-time boost need norms.
termVectors: [false] set to true to store the term vector for a
given field.
When using MoreLikeThis, fields used for similarity should be
stored for best performance.
termPositions: Store position information with the term vector.
This will increase storage costs.
termOffsets: Store offset information with the term vector. This
will increase storage costs.
default: a value that should be used if no value is specified
when adding a document.
-->
<field
name=
"_version_"
type=
"long"
indexed=
"true"
stored=
"true"
/>
<!--
points to the root document of a block of nested documents.
Required for nested document support
-->
<field
name=
"_root_"
type=
"string"
indexed=
"true"
stored=
"false"
/>
<!--
The document id is derived from a site-specific key (hash) and some
record properties like:
$document->id = $siteHash . '/' . $type . '/' . $record['uid'];
-->
<field
name=
"id"
type=
"string"
indexed=
"true"
stored=
"true"
required=
"true"
/>
<!--
An additional ID used for record collapsing
typically will be $type/$record['uid']
When indexing files the id field is not generated after that schema
so we need an additional field for collapsing results
-->
<field
name=
"variantId"
type=
"string"
indexed=
"true"
stored=
"true"
/>
<!--
Using these fields we can "connect" the indexed documents to
specific sites.
-->
<field
name=
"site"
type=
"string"
indexed=
"true"
stored=
"true"
docValues=
"true"
/>
<field
name=
"siteHash"
type=
"string"
indexed=
"true"
stored=
"true"
docValues=
"true"
/>
<!--
The application key which will come in handy as soon as other
systems start sending their content to the same index. Thus you
can search "external" systems' content through TYPO3
-->
<field
name=
"appKey"
type=
"string"
indexed=
"true"
stored=
"false"
docValues=
"true"
required=
"true"
/>
<!--
The type by default represents the table name of a record. For
external systems this field could be used differently.
-->
<field
name=
"type"
type=
"string"
indexed=
"true"
stored=
"true"
docValues=
"true"
required=
"true"
/>
<!--
The content hashs or more known as cHash in short is used to store
a hash over the GET parameters used to request a URL and identify a
cache entry for the generated page.
-->
<field
name=
"contentHash"
type=
"string"
indexed=
"false"
stored=
"true"
/>
<!--
Here, default is used to create a "timestamp" field indicating when
the document was indexed.
-->
<field
name=
"indexed"
type=
"date"
indexed=
"true"
stored=
"true"
default=
"NOW/SECOND"
/>
<!-- system fields -->
<field
name=
"uid"
type=
"integer"
indexed=
"true"
stored=
"true"
/>
<field
name=
"pid"
type=
"integer"
indexed=
"true"
stored=
"true"
docValues=
"true"
/>
<field
name=
"typeNum"
type=
"integer"
indexed=
"true"
stored=
"true"
/>
<field
name=
"created"
type=
"date"
indexed=
"true"
stored=
"true"
docValues=
"true"
/>