schema.xml 6 KB
Newer Older
Thomas Löffler's avatar
Thomas Löffler committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
<?xml version="1.0" encoding="UTF-8" ?>
<!--
	The schema name property is constructed as follows

	tx_solr		- The extension key
	x-y-z		- The extension version this schema is meant to work with
	YYYYMMDD	- The date the schema file was changed the last time

	When changing the schema the name property must be updated. There is a
	status report - tx_solr_report_SchemaStatus - checking against this
	name property, that status check must be updated as well.
-->
<schema name="tx_solr-6-0-0--20161209" version="1.6" >
	<!-- attribute "name" is the name of this schema and is only used for display purposes.
		Applications should change this to reflect the nature of the search collection.
		version="1.4" is Solr's version number for the schema syntax and semantics.  It should
		not normally be changed by applications.
		1.0: multiValued attribute did not exist, all fields are multiValued by nature
		1.1: multiValued attribute introduced, false by default
		1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
		1.3: removed optional field compress feature
		1.4: default auto-phrase (QueryParser feature) to off
		1.5: omitNorms defaults to true for primitive field types (int, float, boolean, string...)
		1.6: useDocValuesAsStored defaults to true.
	-->

	<uniqueKey>id</uniqueKey>

	<!-- xinclude field types -->
	<xi:include href="../general_schema_types.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>

	<!--  xinclude fields-->
	<xi:include href="../general_schema_fields.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>


	<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
		words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
		so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
		Synonyms and stopwords are customized by external files, and stemming is enabled.
		Duplicate tokens at the same position (which may result from Stemmed Synonyms or
		WordDelim parts) are removed.
	-->
	<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
		<analyzer type="index">
			<tokenizer class="solr.WhitespaceTokenizerFactory"/>

			<filter class="solr.WordDelimiterFilterFactory"
				generateWordParts="1"
				generateNumberParts="1"
				catenateWords="1"
				catenateNumbers="1"
				catenateAll="0"
				splitOnCaseChange="1"
				preserveOriginal="1"
				protected="english/protwords.txt"
			/>
			<filter class="solr.LowerCaseFilterFactory"/>

			<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
			<filter class="solr.ManagedStopFilterFactory" managed="english"/>

			<filter class="solr.EnglishPossessiveFilterFactory"/>
			<filter class="solr.SnowballPorterFilterFactory" language="English" protected="english/protwords.txt"/>
			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
		</analyzer>
		<analyzer type="query">
			<tokenizer class="solr.WhitespaceTokenizerFactory"/>

			<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
				generateNumberParts="1"
				catenateWords="0"
				catenateNumbers="0"
				catenateAll="0"
				splitOnCaseChange="1"
				preserveOriginal="1"
				protected="english/protwords.txt"
			/>
			<filter class="solr.LowerCaseFilterFactory"/>

			<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
			<filter class="solr.ManagedStopFilterFactory" managed="english"/>

			<filter class="solr.SnowballPorterFilterFactory" language="English" protected="english/protwords.txt"/>
			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
		</analyzer>
	</fieldType>


	<!-- Less flexible matching, but less false matches.	Probably not ideal for product names,
		but may be good for SKUs.	Can insert dashes in the wrong place and still match. -->
	<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
		<analyzer>
			<tokenizer class="solr.WhitespaceTokenizerFactory"/>

			<filter class="solr.WordDelimiterFilterFactory"
				generateWordParts="0"
				generateNumberParts="0"
				catenateWords="1"
				catenateNumbers="1"
				catenateAll="0"
				preserveOriginal="1"
				protected="english/protwords.txt"
			/>
			<filter class="solr.LowerCaseFilterFactory"/>

			<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
			<filter class="solr.ManagedStopFilterFactory" managed="english"/>

			<filter class="solr.EnglishPossessiveFilterFactory"/>
			<filter class="solr.SnowballPorterFilterFactory" language="English" protected="english/protwords.txt"/>
			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
		</analyzer>
	</fieldType>

	<!-- Exact matching of words like textWhiteSpaceTokenized,
		but with enabled Synonym and Stop Filter
	 -->
	<fieldType name="textExact" class="solr.TextField" positionIncrementGap="100" >
		<analyzer>
			<tokenizer class="solr.WhitespaceTokenizerFactory"/>

			<filter class="solr.LowerCaseFilterFactory"/>
			<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
			<filter class="solr.ManagedStopFilterFactory" managed="english"/>
			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
		</analyzer>
	</fieldType>

	<!-- Setup simple analysis for spell checking -->
	<fieldType name="textSpell" class="solr.TextField" positionIncrementGap="100" omitNorms="true">
		<analyzer type="index">
			<tokenizer class="solr.StandardTokenizerFactory"/>

			<filter class="solr.LowerCaseFilterFactory"/>

			<!-- no synonyms here because we do not want to add them as spell suggestion -->
			<filter class="solr.ManagedStopFilterFactory" managed="english"/>
			<filter class="solr.StandardFilterFactory" />
			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
		</analyzer>
		<analyzer type="query">
			<tokenizer class="solr.StandardTokenizerFactory" />

			<filter class="solr.LowerCaseFilterFactory"/>

			<filter class="solr.ManagedSynonymFilterFactory" managed="english"/>
			<filter class="solr.ManagedStopFilterFactory" managed="english"/>

			<filter class="solr.StandardFilterFactory" />
			<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
		</analyzer>
	</fieldType>

</schema>