]> git.aero2k.de Git - tmp/jakarta-migration.git/commitdiff
DRYD-1375: Accent-insensitive full text search. (#391)
authorRay Lee <ray.lee@lyrasis.org>
Tue, 20 Feb 2024 15:14:22 +0000 (10:14 -0500)
committerGitHub <noreply@github.com>
Tue, 20 Feb 2024 15:14:22 +0000 (10:14 -0500)
3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/proto-repo-config.xml
3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/vcsconfig.sql.txt
services/common/src/main/cspace/config/services/tenants/tenant-bindings-proto-unified.xml
services/common/src/main/java/org/collectionspace/services/common/CollectionSpaceServiceContextListener.java
services/common/src/main/java/org/collectionspace/services/common/ServiceMain.java
services/common/src/main/resources/db/postgresql/create_unaccent_text_search_configuration.sql [deleted file]
services/common/src/main/resources/db/postgresql/unaccent_text_search_configuration.sql [new file with mode: 0644]

index 6dd85c1c0ee42a5ae5b9da70858699ec7131fd93..d8b57dac662ab0d8a91b6f94b1d56ba4d32c55cb 100644 (file)
@@ -5,7 +5,7 @@
        tenant bindings file, we will create a corresponding Nuxeo repository config-
        uration file.  You won't have to change the value for most of these elements
        and attributes because they will be set when CollectionSpace starts.
-       
+
        *** NOTE *** One value you may want to change is the 'binaryStore' path which is the location
        that Nuxeo places binary files like images and PDF documents. Be sure to read the comment to
        the right of the <binaryStore> element below.
@@ -16,7 +16,7 @@
 <!-- Values in the following elements are primarily added during execution -->
 <!-- of the 'ant deploy' command, within the Services source code tree. -->
 <!-- (Several values are also added during system startup, into repository-specific -->
-<!-- 'clones' of this prototype configuration file, as noted below.) --> 
+<!-- 'clones' of this prototype configuration file, as noted below.) -->
 <!-- See http://doc.nuxeo.com/display/ADMINDOC/VCS+Configuration for more config details -->
 <component name="config:default-repository">
        <extension target="org.nuxeo.ecm.core.blob.BlobManager" point="configuration">
     <extension target="org.nuxeo.ecm.core.storage.sql.RepositoryService" point="repository">
         <repository name="default" factory="org.nuxeo.ecm.core.storage.sql.ra.PoolingRepositoryFactory">
                        <pool minPoolSize="0" maxPoolSize="20" blockingTimeoutMillis="100" idleTimeoutMinutes="10"/>
-                       <clustering enabled="false" delay="1000"/>                
+                       <clustering enabled="false" delay="1000"/>
                        <xa-datasource>@XA_DATASOURCE@</xa-datasource> <!-- The transactional datasource for Nuxeo -->
                        <noDDL>false</noDDL>
-                       <sqlInitFile>vcsconfig.sql.txt</sqlInitFile> <!-- see https://doc.nuxeo.com/display/ADMINDOC/VCS+Configuration#VCSConfiguration-DatabaseCreationOption -->              
+                       <sqlInitFile>vcsconfig.sql.txt</sqlInitFile> <!-- see https://doc.nuxeo.com/display/ADMINDOC/VCS+Configuration#VCSConfiguration-DatabaseCreationOption -->
                        <aclOptimizations enabled="true"/>
                        <pathOptimizations enabled="true"/>
                        <idType>varchar</idType>
                        <indexing>
-                       <fulltext disabled="false" analyzer="english">
+                       <fulltext disabled="false" analyzer="cspace_english">
                          <index name="default">
                                <!-- all props implied -->
                          </index>
@@ -49,7 +49,7 @@
                          </index>
                        </fulltext>
                        </indexing>
-                       <usersSeparator key="," />                      
+                       <usersSeparator key="," />
                        <property name="ServerName">@DB_SERVER_HOSTNAME@</property>
                        <property name="DatabaseName"></property> <!-- The value of the database name element is inserted during system startup. -->
                        <property name="JDBCOptions">@DB_JDBC_OPTIONS@</property>
index ba241564031038bd51e6afe0e52012ee72b679f7..4b5013751deeb4885e32d4ff7d361bd696e60483 100644 (file)
@@ -4,6 +4,27 @@
 # or https://doc.nuxeo.com/nxdoc/repository-configuration/#page-title
 #
 
+#CATEGORY: first
+
+#
+# Ensure that a configuration exists for the CSpace-specific fulltext analyzer named in
+# proto-repo-config.xml.
+#
+# We modify the configuration later (see unaccent_text_search_configuration.sql). Those
+# modifications can't be done here, because they require having postgres extensions installed,
+# and script does not run as a superuser, which is required to install extensions.
+#
+# For now, the text search configuration just needs to exist, so Nuxeo will be able to start.
+#
+
+#TEST:
+SELECT 1 FROM pg_ts_config WHERE cfgname = '${fulltextAnalyzer}';
+
+#IF: emptyResult
+CREATE TEXT SEARCH CONFIGURATION ${fulltextAnalyzer} ( COPY = english );
+
+
+
 #CATEGORY: afterTableCreation
 
 #
index 9da61c0a98844415e96f8c98aa289fee5c5a4194..cb233e4b98b708b9ba6856c0f36e513cdd44c8e6 100644 (file)
                        <service:initHandler xmlns:service="http://collectionspace.org/services/config/service">
                                <service:classname>org.collectionspace.services.common.init.RunSqlScripts</service:classname>
                                <service:params>
+                                       <service:property>
+                                               <service:key>sqlScriptName</service:key>
+                                               <service:value>unaccent_text_search_configuration.sql</service:value>
+                                       </service:property>
                                </service:params>
                        </service:initHandler>
                </tenant:serviceBindings>
index 527bc06cf65317da310087b65b1c8b1a6b375119..dd2ce05e514b32e30b9d5a95a85d8f76e0f3f7c1 100644 (file)
@@ -29,6 +29,9 @@ public class CollectionSpaceServiceContextListener implements ServletContextList
             // Upgrade database schema
             svcMain.upgradeDatabase();
 
+            // Create required postgres extensions
+            svcMain.createRequiredExtensions();
+
             // Create required indexes (aka indices) in tables not associated
             // with any specific tenant.
             svcMain.createRequiredIndices();
index dac61e7301f6097c51cd8caaf6a3ae3d65e29743..807e4005d6be44a0c30959725684368a5dae3c38 100644 (file)
@@ -687,6 +687,37 @@ public class ServiceMain {
                }
        }
 
+       void createRequiredExtensions() throws Exception {
+               Hashtable<String, TenantBindingType> tenantBindingTypeMap = tenantBindingConfigReader.getTenantBindings();
+
+               // Loop through all tenants in tenant-bindings.xml
+
+               String cspaceInstanceId = getCspaceInstanceId();
+
+               for (TenantBindingType tbt : tenantBindingTypeMap.values()) {
+                       List<String> repositoryNameList = ConfigUtils.getRepositoryNameList(tbt);
+
+                       if (repositoryNameList != null && repositoryNameList.isEmpty() == false) {
+                               // Loop through each repo/DB defined in a tenant bindings file
+
+                               for (String repositoryName : repositoryNameList) {
+                                       try {
+                                               JDBCTools.executeUpdate(JDBCTools.CSADMIN_NUXEO_DATASOURCE_NAME, repositoryName, cspaceInstanceId, "CREATE EXTENSION IF NOT EXISTS \"unaccent\"");
+                                       }
+                                       catch(Exception e) {
+                                               logger.warn("Could not install unaccent postgresql extension. Accent-insensitive full text search is not available without this extension. On some platforms you may need to manually install this extension as a superuser.");
+                                       }
+                               }
+                       } else {
+                               String errMsg = "repositoryNameList was empty or null.";
+
+                               logger.error(errMsg);
+
+                               throw new Exception(errMsg);
+                       }
+               }
+       }
+
        /**
         * Create required indexes (aka indices) in database tables not associated
         * with any specific tenant.
@@ -762,7 +793,7 @@ public class ServiceMain {
             for (ServiceBindingType sbt: sbtList) {
                 if (sbt.getName().equalsIgnoreCase(RUNSQLSCRIPTS_SERVICE_NAME)) {
                     runInitHandler(cspaceInstanceId, tbt, sbt);
-                    return;
+                    continue;
                 }
             }
         }
diff --git a/services/common/src/main/resources/db/postgresql/create_unaccent_text_search_configuration.sql b/services/common/src/main/resources/db/postgresql/create_unaccent_text_search_configuration.sql
deleted file mode 100644 (file)
index 6cfd37f..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-DO $$
-BEGIN
-
-       IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'unaccent') THEN
-               CREATE EXTENSION unaccent;
-       END IF;
-
-       IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'unaccent_english') THEN
-               CREATE TEXT SEARCH CONFIGURATION unaccent_english ( COPY = english );
-
-               ALTER TEXT SEARCH CONFIGURATION unaccent_english
-                       ALTER MAPPING FOR asciihword, asciiword, hword_asciipart, hword, hword_part, word
-                       WITH unaccent, english_stem;
-       END IF;
-
-END $$;
\ No newline at end of file
diff --git a/services/common/src/main/resources/db/postgresql/unaccent_text_search_configuration.sql b/services/common/src/main/resources/db/postgresql/unaccent_text_search_configuration.sql
new file mode 100644 (file)
index 0000000..62f59d1
--- /dev/null
@@ -0,0 +1,17 @@
+/*
+ * If the unaccent extension is installed, modify the cspace_english text search configuration to
+ * be accent-insensitive.
+ */
+
+DO $$
+BEGIN
+
+       IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'unaccent') THEN
+               IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'cspace_english') THEN
+                       ALTER TEXT SEARCH CONFIGURATION cspace_english
+                               ALTER MAPPING FOR asciihword, asciiword, hword_asciipart, hword, hword_part, word
+                               WITH unaccent, english_stem;
+               END IF;
+       END IF;
+
+END $$;