Skip to content

Commit a5e0259

Browse files
authored
SOLR-17999: Fix the Managed Model Store Initialization in the LLM module (#4056)
1 parent 70d8caa commit a5e0259

7 files changed

Lines changed: 305 additions & 4 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Fix the Text to Vector Managed Model Store Initialization in LLM Module
3+
type: fixed
4+
authors:
5+
- name: Ilaria Petreti
6+
- name: Alessandro Benedetti
7+
links:
8+
- name: SOLR-17999
9+
url: https://issues.apache.org/jira/browse/SOLR-17999

solr/modules/language-models/src/java/org/apache/solr/languagemodels/textvectorisation/update/processor/TextToVectorUpdateProcessorFactory.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,21 @@
2121
import org.apache.solr.common.params.RequiredSolrParams;
2222
import org.apache.solr.common.params.SolrParams;
2323
import org.apache.solr.common.util.NamedList;
24+
import org.apache.solr.core.SolrCore;
25+
import org.apache.solr.core.SolrResourceLoader;
2426
import org.apache.solr.languagemodels.textvectorisation.model.SolrTextToVectorModel;
2527
import org.apache.solr.languagemodels.textvectorisation.store.rest.ManagedTextToVectorModelStore;
2628
import org.apache.solr.request.SolrQueryRequest;
2729
import org.apache.solr.response.SolrQueryResponse;
30+
import org.apache.solr.rest.ManagedResource;
31+
import org.apache.solr.rest.ManagedResourceObserver;
2832
import org.apache.solr.schema.DenseVectorField;
2933
import org.apache.solr.schema.FieldType;
3034
import org.apache.solr.schema.IndexSchema;
3135
import org.apache.solr.schema.SchemaField;
3236
import org.apache.solr.update.processor.UpdateRequestProcessor;
3337
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
38+
import org.apache.solr.util.plugin.SolrCoreAware;
3439

3540
/**
3641
* Vectorises a textual field value and add the resulting vector to another field.
@@ -47,10 +52,12 @@
4752
*
4853
* *
4954
*/
50-
public class TextToVectorUpdateProcessorFactory extends UpdateRequestProcessorFactory {
55+
public class TextToVectorUpdateProcessorFactory extends UpdateRequestProcessorFactory
56+
implements SolrCoreAware, ManagedResourceObserver {
5157
private static final String INPUT_FIELD_PARAM = "inputField";
5258
private static final String OUTPUT_FIELD_PARAM = "outputField";
5359
private static final String MODEL_NAME = "model";
60+
private ManagedTextToVectorModelStore modelStore = null;
5461

5562
private String inputField;
5663
private String outputField;
@@ -66,6 +73,23 @@ public void init(final NamedList<?> args) {
6673
modelName = required.get(MODEL_NAME);
6774
}
6875

76+
@Override
77+
public void inform(SolrCore core) {
78+
final SolrResourceLoader solrResourceLoader = core.getResourceLoader();
79+
ManagedTextToVectorModelStore.registerManagedTextToVectorModelStore(solrResourceLoader, this);
80+
}
81+
82+
@Override
83+
public void onManagedResourceInitialized(NamedList<?> args, ManagedResource res)
84+
throws SolrException {
85+
if (res instanceof ManagedTextToVectorModelStore) {
86+
modelStore = (ManagedTextToVectorModelStore) res;
87+
}
88+
if (modelStore != null) {
89+
modelStore.loadStoredModels();
90+
}
91+
}
92+
6993
@Override
7094
public UpdateRequestProcessor getInstance(
7195
SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?xml version="1.0" ?>
2+
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
3+
license agreements. See the NOTICE file distributed with this work for additional
4+
information regarding copyright ownership. The ASF licenses this file to
5+
You under the Apache License, Version 2.0 (the "License"); you may not use
6+
this file except in compliance with the License. You may obtain a copy of
7+
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
8+
by applicable law or agreed to in writing, software distributed under the
9+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
10+
OF ANY KIND, either express or implied. See the License for the specific
11+
language governing permissions and limitations under the License. -->
12+
13+
<config>
14+
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
15+
<dataDir>${solr.data.dir:}</dataDir>
16+
<directoryFactory name="DirectoryFactory"
17+
class="${solr.directoryFactory:solr.MockDirectoryFactory}" />
18+
<schemaFactory class="ClassicIndexSchemaFactory" />
19+
20+
<requestDispatcher>
21+
<requestParsers />
22+
</requestDispatcher>
23+
24+
<query>
25+
<filterCache class="solr.CaffeineCache" size="4096"
26+
initialSize="2048" autowarmCount="0" />
27+
</query>
28+
<requestHandler name="/select" class="solr.SearchHandler" />
29+
30+
<updateHandler class="solr.DirectUpdateHandler2">
31+
<autoCommit>
32+
<maxTime>15000</maxTime>
33+
<openSearcher>false</openSearcher>
34+
</autoCommit>
35+
<autoSoftCommit>
36+
<maxTime>1000</maxTime>
37+
</autoSoftCommit>
38+
<updateLog>
39+
<str name="dir">${solr.data.dir:}</str>
40+
</updateLog>
41+
</updateHandler>
42+
43+
<!-- Query request handler managing models and features -->
44+
<requestHandler name="/query" class="solr.SearchHandler">
45+
<lst name="defaults">
46+
<str name="echoParams">explicit</str>
47+
<str name="wt">json</str>
48+
<str name="indent">true</str>
49+
<str name="df">id</str>
50+
</lst>
51+
</requestHandler>
52+
53+
</config>
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
<?xml version="1.0" ?>
2+
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
3+
license agreements. See the NOTICE file distributed with this work for additional
4+
information regarding copyright ownership. The ASF licenses this file to
5+
You under the Apache License, Version 2.0 (the "License"); you may not use
6+
this file except in compliance with the License. You may obtain a copy of
7+
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
8+
by applicable law or agreed to in writing, software distributed under the
9+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
10+
OF ANY KIND, either express or implied. See the License for the specific
11+
language governing permissions and limitations under the License. -->
12+
13+
<config>
14+
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
15+
<dataDir>${solr.data.dir:}</dataDir>
16+
<directoryFactory name="DirectoryFactory"
17+
class="${solr.directoryFactory:solr.MockDirectoryFactory}" />
18+
<schemaFactory class="ClassicIndexSchemaFactory" />
19+
20+
<requestDispatcher>
21+
<requestParsers />
22+
</requestDispatcher>
23+
24+
<!-- Query parser used to run vector search queries-->
25+
<queryParser name="knn_text_to_vector"
26+
class="org.apache.solr.languagemodels.textvectorisation.search.TextToVectorQParserPlugin" />
27+
28+
<query>
29+
<filterCache class="solr.CaffeineCache" size="4096"
30+
initialSize="2048" autowarmCount="0" />
31+
</query>
32+
<requestHandler name="/select" class="solr.SearchHandler" />
33+
34+
<updateHandler class="solr.DirectUpdateHandler2">
35+
<autoCommit>
36+
<maxTime>15000</maxTime>
37+
<openSearcher>false</openSearcher>
38+
</autoCommit>
39+
<autoSoftCommit>
40+
<maxTime>1000</maxTime>
41+
</autoSoftCommit>
42+
<updateLog>
43+
<str name="dir">${solr.data.dir:}</str>
44+
</updateLog>
45+
</updateHandler>
46+
47+
<!-- Query request handler managing models and features -->
48+
<requestHandler name="/query" class="solr.SearchHandler">
49+
<lst name="defaults">
50+
<str name="echoParams">explicit</str>
51+
<str name="wt">json</str>
52+
<str name="indent">true</str>
53+
<str name="df">id</str>
54+
</lst>
55+
</requestHandler>
56+
57+
</config>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
<?xml version="1.0" ?>
2+
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
3+
license agreements. See the NOTICE file distributed with this work for additional
4+
information regarding copyright ownership. The ASF licenses this file to
5+
You under the Apache License, Version 2.0 (the "License"); you may not use
6+
this file except in compliance with the License. You may obtain a copy of
7+
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
8+
by applicable law or agreed to in writing, software distributed under the
9+
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
10+
OF ANY KIND, either express or implied. See the License for the specific
11+
language governing permissions and limitations under the License. -->
12+
13+
<config>
14+
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
15+
<dataDir>${solr.data.dir:}</dataDir>
16+
<directoryFactory name="DirectoryFactory"
17+
class="${solr.directoryFactory:solr.MockDirectoryFactory}" />
18+
<schemaFactory class="ClassicIndexSchemaFactory" />
19+
20+
<requestDispatcher>
21+
<requestParsers />
22+
</requestDispatcher>
23+
24+
<query>
25+
<filterCache class="solr.CaffeineCache" size="4096"
26+
initialSize="2048" autowarmCount="0" />
27+
</query>
28+
<requestHandler name="/select" class="solr.SearchHandler" />
29+
30+
<updateHandler class="solr.DirectUpdateHandler2">
31+
<autoCommit>
32+
<maxTime>15000</maxTime>
33+
<openSearcher>false</openSearcher>
34+
</autoCommit>
35+
<autoSoftCommit>
36+
<maxTime>1000</maxTime>
37+
</autoSoftCommit>
38+
<updateLog>
39+
<str name="dir">${solr.data.dir:}</str>
40+
</updateLog>
41+
</updateHandler>
42+
43+
<!-- Query request handler managing models and features -->
44+
<requestHandler name="/query" class="solr.SearchHandler">
45+
<lst name="defaults">
46+
<str name="echoParams">explicit</str>
47+
<str name="wt">json</str>
48+
<str name="indent">true</str>
49+
<str name="df">id</str>
50+
</lst>
51+
</requestHandler>
52+
53+
<updateRequestProcessorChain name="textToVector">
54+
<processor class="solr.languagemodels.textvectorisation.update.processor.TextToVectorUpdateProcessorFactory">
55+
<str name="inputField">_text_</str>
56+
<str name="outputField">vector</str>
57+
<str name="model">dummy-1</str>
58+
</processor>
59+
<processor class="solr.RunUpdateProcessorFactory"/>
60+
</updateRequestProcessorChain>
61+
62+
</config>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.languagemodels.textvectorisation.store.rest;
18+
19+
import org.apache.solr.languagemodels.TestLanguageModelBase;
20+
import org.junit.After;
21+
import org.junit.Test;
22+
23+
public class TestManagedModelStoreInitialization extends TestLanguageModelBase {
24+
25+
@After
26+
public void cleanUp() throws Exception {
27+
afterTest();
28+
}
29+
30+
@Test
31+
public void managedModelStore_whenUpdateRequestComponentConfigured_shouldBeInitialized()
32+
throws Exception {
33+
setupTest(
34+
"solrconfig-language-models-update-request-processor-only.xml",
35+
"schema-language-models.xml",
36+
false,
37+
false);
38+
39+
assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/responseHeader/status==0");
40+
assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models==[]");
41+
}
42+
43+
@Test
44+
public void managedModelStore_whenQueryParserComponentConfigured_shouldBeInitialized()
45+
throws Exception {
46+
setupTest(
47+
"solrconfig-language-models-query-parser-only.xml",
48+
"schema-language-models.xml",
49+
false,
50+
false);
51+
52+
assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/responseHeader/status==0");
53+
assertJQ(ManagedTextToVectorModelStore.REST_END_POINT, "/models==[]");
54+
}
55+
56+
@Test
57+
public void managedModelStore_whenNoComponents_shouldNotBeInitialized() throws Exception {
58+
setupTest(
59+
"solrconfig-language-models-no-components.xml", "schema-language-models.xml", false, false);
60+
assertJQ(
61+
ManagedTextToVectorModelStore.REST_END_POINT,
62+
"/responseHeader/status==400",
63+
"/error/msg=='No REST managed resource registered for path "
64+
+ ManagedTextToVectorModelStore.REST_END_POINT
65+
+ "'");
66+
}
67+
}

solr/solr-ref-guide/modules/query-guide/pages/text-to-vector.adoc

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,38 @@ This is provided via the `language-models` xref:configuration-guide:solr-modules
7272

7373
== Language Model Configuration
7474

75-
You need to register / configure the plugins provided by the Language Models module that you want to use. This is done in `solrconfig.xml`.
75+
Language Models is a module and therefore its plugins must be configured in `solrconfig.xml`.
7676

77-
* Declaration of the `knn_text_to_vector` query parser.
77+
=== Minimum Requirements
78+
79+
* Enable the `language-models` module to make the Language Models classes available on Solr's classpath.
80+
See xref:configuration-guide:solr-modules.adoc[Solr Module] for more details.
81+
82+
* At least one of the following components must be declared in `solrconfig.xml`:
83+
** the TextToVector Update Processor (index time)
84+
+
85+
[source,xml]
86+
----
87+
<updateRequestProcessorChain name="textToVector">
88+
<processor class="solr.languagemodels.textvectorisation.update.processor.TextToVectorUpdateProcessorFactory">
89+
<str name="inputField">_text_</str>
90+
<str name="outputField">vector</str>
91+
<str name="model">dummy-1</str>
92+
</processor>
93+
<processor class="solr.RunUpdateProcessorFactory"/>
94+
</updateRequestProcessorChain>
95+
----
96+
** the TextToVector Query Parser (query time)
7897
+
7998
[source,xml]
8099
----
81100
<queryParser name="knn_text_to_vector" class="org.apache.solr.languagemodels.textvectorisation.search.TextToVectorQParserPlugin"/>
82101
----
83102

103+
[NOTE]
104+
====
105+
If no component is configured in `solrconfig.xml`, the Text-to-Vector model store will not be registered and requests to `/schema/text-to-vector-model-store` will return an error.
106+
====
84107

85108
== Text to Vector Lifecycle
86109

@@ -122,7 +145,7 @@ Accepted values:
122145
s|Required |Default: none
123146
|===
124147
+
125-
The identifier of your model, this is used by any component that intends to use the model (`knn_text_to_vector` query parser).
148+
The identifier of your model, this is used by any component that intends to use the model (e.g., `knn_text_to_vector` query parser or `TextToVectorUpdateProcessorFactory` update processor).
126149

127150
`params`::
128151
+
@@ -410,6 +433,12 @@ Faceting or querying on the boolean 'vectorised' field can also give you a quick
410433

411434

412435
=== Running a Text-to-Vector Query
436+
Before running a Text-to-Vector query, ensure that the `knn_text_to_vector` query parser is declared in `solrconfig.xml`:
437+
[source,xml]
438+
----
439+
<queryParser name="knn_text_to_vector" class="org.apache.solr.languagemodels.textvectorisation.search.TextToVectorQParserPlugin"/>
440+
----
441+
413442
To run a query that vectorises your query text, using a model you previously uploaded is simple:
414443

415444
[source,text]

0 commit comments

Comments
 (0)