From: Ray Lee Date: Tue, 20 Feb 2024 15:14:22 +0000 (-0500) Subject: DRYD-1375: Accent-insensitive full text search. (#391) X-Git-Url: https://git.aero2k.de/?a=commitdiff_plain;h=0c4f54399cfc801038e37a431db041cda955db75;p=tmp%2Fjakarta-migration.git DRYD-1375: Accent-insensitive full text search. (#391) --- diff --git a/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/proto-repo-config.xml b/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/proto-repo-config.xml index 6dd85c1c0..d8b57dac6 100644 --- a/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/proto-repo-config.xml +++ b/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/proto-repo-config.xml @@ -5,7 +5,7 @@ tenant bindings file, we will create a corresponding Nuxeo repository config- uration file. You won't have to change the value for most of these elements and attributes because they will be set when CollectionSpace starts. - + *** NOTE *** One value you may want to change is the 'binaryStore' path which is the location that Nuxeo places binary files like images and PDF documents. Be sure to read the comment to the right of the element below. @@ -16,7 +16,7 @@ - + @@ -29,15 +29,15 @@ - + @XA_DATASOURCE@ false - vcsconfig.sql.txt + vcsconfig.sql.txt varchar - + @@ -49,7 +49,7 @@ - + @DB_SERVER_HOSTNAME@ @DB_JDBC_OPTIONS@ diff --git a/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/vcsconfig.sql.txt b/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/vcsconfig.sql.txt index ba2415640..4b5013751 100644 --- a/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/vcsconfig.sql.txt +++ b/3rdparty/nuxeo/nuxeo-server/9.10-HF30/config/vcsconfig.sql.txt @@ -4,6 +4,27 @@ # or https://doc.nuxeo.com/nxdoc/repository-configuration/#page-title # +#CATEGORY: first + +# +# Ensure that a configuration exists for the CSpace-specific fulltext analyzer named in +# proto-repo-config.xml. +# +# We modify the configuration later (see unaccent_text_search_configuration.sql). Those +# modifications can't be done here, because they require having postgres extensions installed, +# and script does not run as a superuser, which is required to install extensions. +# +# For now, the text search configuration just needs to exist, so Nuxeo will be able to start. +# + +#TEST: +SELECT 1 FROM pg_ts_config WHERE cfgname = '${fulltextAnalyzer}'; + +#IF: emptyResult +CREATE TEXT SEARCH CONFIGURATION ${fulltextAnalyzer} ( COPY = english ); + + + #CATEGORY: afterTableCreation # diff --git a/services/common/src/main/cspace/config/services/tenants/tenant-bindings-proto-unified.xml b/services/common/src/main/cspace/config/services/tenants/tenant-bindings-proto-unified.xml index 9da61c0a9..cb233e4b9 100644 --- a/services/common/src/main/cspace/config/services/tenants/tenant-bindings-proto-unified.xml +++ b/services/common/src/main/cspace/config/services/tenants/tenant-bindings-proto-unified.xml @@ -71,6 +71,10 @@ org.collectionspace.services.common.init.RunSqlScripts + + sqlScriptName + unaccent_text_search_configuration.sql + diff --git a/services/common/src/main/java/org/collectionspace/services/common/CollectionSpaceServiceContextListener.java b/services/common/src/main/java/org/collectionspace/services/common/CollectionSpaceServiceContextListener.java index 527bc06cf..dd2ce05e5 100644 --- a/services/common/src/main/java/org/collectionspace/services/common/CollectionSpaceServiceContextListener.java +++ b/services/common/src/main/java/org/collectionspace/services/common/CollectionSpaceServiceContextListener.java @@ -29,6 +29,9 @@ public class CollectionSpaceServiceContextListener implements ServletContextList // Upgrade database schema svcMain.upgradeDatabase(); + // Create required postgres extensions + svcMain.createRequiredExtensions(); + // Create required indexes (aka indices) in tables not associated // with any specific tenant. svcMain.createRequiredIndices(); diff --git a/services/common/src/main/java/org/collectionspace/services/common/ServiceMain.java b/services/common/src/main/java/org/collectionspace/services/common/ServiceMain.java index dac61e730..807e4005d 100644 --- a/services/common/src/main/java/org/collectionspace/services/common/ServiceMain.java +++ b/services/common/src/main/java/org/collectionspace/services/common/ServiceMain.java @@ -687,6 +687,37 @@ public class ServiceMain { } } + void createRequiredExtensions() throws Exception { + Hashtable tenantBindingTypeMap = tenantBindingConfigReader.getTenantBindings(); + + // Loop through all tenants in tenant-bindings.xml + + String cspaceInstanceId = getCspaceInstanceId(); + + for (TenantBindingType tbt : tenantBindingTypeMap.values()) { + List repositoryNameList = ConfigUtils.getRepositoryNameList(tbt); + + if (repositoryNameList != null && repositoryNameList.isEmpty() == false) { + // Loop through each repo/DB defined in a tenant bindings file + + for (String repositoryName : repositoryNameList) { + try { + JDBCTools.executeUpdate(JDBCTools.CSADMIN_NUXEO_DATASOURCE_NAME, repositoryName, cspaceInstanceId, "CREATE EXTENSION IF NOT EXISTS \"unaccent\""); + } + catch(Exception e) { + logger.warn("Could not install unaccent postgresql extension. Accent-insensitive full text search is not available without this extension. On some platforms you may need to manually install this extension as a superuser."); + } + } + } else { + String errMsg = "repositoryNameList was empty or null."; + + logger.error(errMsg); + + throw new Exception(errMsg); + } + } + } + /** * Create required indexes (aka indices) in database tables not associated * with any specific tenant. @@ -762,7 +793,7 @@ public class ServiceMain { for (ServiceBindingType sbt: sbtList) { if (sbt.getName().equalsIgnoreCase(RUNSQLSCRIPTS_SERVICE_NAME)) { runInitHandler(cspaceInstanceId, tbt, sbt); - return; + continue; } } } diff --git a/services/common/src/main/resources/db/postgresql/create_unaccent_text_search_configuration.sql b/services/common/src/main/resources/db/postgresql/create_unaccent_text_search_configuration.sql deleted file mode 100644 index 6cfd37f88..000000000 --- a/services/common/src/main/resources/db/postgresql/create_unaccent_text_search_configuration.sql +++ /dev/null @@ -1,16 +0,0 @@ -DO $$ -BEGIN - - IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'unaccent') THEN - CREATE EXTENSION unaccent; - END IF; - - IF NOT EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'unaccent_english') THEN - CREATE TEXT SEARCH CONFIGURATION unaccent_english ( COPY = english ); - - ALTER TEXT SEARCH CONFIGURATION unaccent_english - ALTER MAPPING FOR asciihword, asciiword, hword_asciipart, hword, hword_part, word - WITH unaccent, english_stem; - END IF; - -END $$; \ No newline at end of file diff --git a/services/common/src/main/resources/db/postgresql/unaccent_text_search_configuration.sql b/services/common/src/main/resources/db/postgresql/unaccent_text_search_configuration.sql new file mode 100644 index 000000000..62f59d1bc --- /dev/null +++ b/services/common/src/main/resources/db/postgresql/unaccent_text_search_configuration.sql @@ -0,0 +1,17 @@ +/* + * If the unaccent extension is installed, modify the cspace_english text search configuration to + * be accent-insensitive. + */ + +DO $$ +BEGIN + + IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'unaccent') THEN + IF EXISTS (SELECT 1 FROM pg_ts_config WHERE cfgname = 'cspace_english') THEN + ALTER TEXT SEARCH CONFIGURATION cspace_english + ALTER MAPPING FOR asciihword, asciiword, hword_asciipart, hword, hword_part, word + WITH unaccent, english_stem; + END IF; + END IF; + +END $$;