PostgreSQL的 initdb 源代码分析之十七
继续分析:
setup_collation()
展开:
/* * populate pg_collation */ static void setup_collation(void) { #if defined(HAVE_LOCALE_T) && !defined(WIN32) int i; FILE *locale_a_handle; char localebuf[NAMEDATALEN]; int count = 0; PG_CMD_DECL; #endif fputs(_("creating collations ... "), stdout); fflush(stdout); #if defined(HAVE_LOCALE_T) && !defined(WIN32) snprintf(cmd, sizeof(cmd), "\"%s\" %s template1 >%s", backend_exec, backend_options, DEVNULL); locale_a_handle = popen_check("locale -a", "r"); if (!locale_a_handle) return; /* complaint already printed */ PG_CMD_OPEN; PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( " " collname name, " " locale name, " " encoding int) WITHOUT OIDS;\n"); while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) { size_t len; int enc; bool skip; char *quoted_locale; char alias[NAMEDATALEN]; len = strlen(localebuf); if (len == 0 || localebuf[len - 1] != '\n') { if (debug) fprintf(stderr, _("%s: locale name too long, skipped: %s\n"), progname, localebuf); continue; } localebuf[len - 1] = '\0'; /* * Some systems have locale names that don't consist entirely of ASCII * letters (such as "bokmål" or "français"). This is * pretty silly, since we need the locale itself to interpret the * non-ASCII characters. We can't do much with those, so we filter * them out. */ skip = false; for (i = 0; i < len; i++) { if (IS_HIGHBIT_SET(localebuf[i])) { skip = true; break; } } if (skip) { if (debug) fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"), progname, localebuf); continue; } enc = pg_get_encoding_from_locale(localebuf, debug); if (enc < 0) { /* error message printed by pg_get_encoding_from_locale() */ continue; } if (!PG_VALID_BE_ENCODING(enc)) continue; /* ignore locales for client-only encodings */ if (enc == PG_SQL_ASCII) continue; /* C/POSIX are already in the catalog */ count++; quoted_locale = escape_quotes(localebuf); PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n", quoted_locale, quoted_locale, enc); /* * Generate aliases such as "en_US" in addition to "en_US.utf8" for * ease of use. Note that collation names are unique per encoding * only, so this doesn't clash with "en_US" for LATIN1, say. */ if (normalize_locale_name(alias, localebuf)) PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n", escape_quotes(alias), quoted_locale, enc); } /* Add an SQL-standard name */ PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n", PG_UTF8); /* * When copying collations to the final location, eliminate aliases that * conflict with an existing locale name for the same encoding. For * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding * LATIN1. But the unnormalized locale "br_FR" already exists for LATIN1. * Prefer the alias that matches the OS locale name, else the first locale * name by sort order (arbitrary choice to be deterministic). * * Also, eliminate any aliases that conflict with pg_collation's * hard-wired entries for "C" etc. */ PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) " " SELECT DISTINCT ON (collname, encoding)" " collname, " " (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, " " (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, " " encoding, locale, locale " " FROM tmp_pg_collation" " WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)" " ORDER BY collname, encoding, (collname = locale) DESC, locale;\n"); pclose(locale_a_handle); PG_CMD_CLOSE; check_ok(); if (count == 0 && !debug) { printf(_("No usable system locales were found.\n")); printf(_("Use the option \"--debug\" to see details.\n")); } #else /* not HAVE_LOCALE_T && not WIN32 */ printf(_("not supported on this platform\n")); fflush(stdout); #endif /* not HAVE_LOCALE_T && not WIN32 */ }
其实质就是,向 pg_collation 表中插入数据
补充一点,pg_collation 的数据大概是这样的:
pgsql=# \x Expanded display is on. pgsql=# select * from pg_collation limit 10; -[ RECORD 1 ]-+----------------- collname | default collnamespace | 11 collowner | 10 collencoding | -1 collcollate | collctype | -[ RECORD 2 ]-+----------------- collname | C collnamespace | 11 collowner | 10 collencoding | -1 collcollate | C collctype | C -[ RECORD 3 ]-+----------------- collname | POSIX collnamespace | 11 collowner | 10 collencoding | -1 collcollate | POSIX collctype | POSIX -[ RECORD 4 ]-+----------------- collname | aa_DJ collnamespace | 11 collowner | 10 collencoding | 6 collcollate | aa_DJ.utf8 collctype | aa_DJ.utf8 -[ RECORD 5 ]-+----------------- collname | aa_DJ collnamespace | 11 collowner | 10 collencoding | 8 collcollate | aa_DJ collctype | aa_DJ -[ RECORD 6 ]-+----------------- collname | aa_DJ.iso88591 collnamespace | 11 collowner | 10 collencoding | 8 collcollate | aa_DJ.iso88591 collctype | aa_DJ.iso88591 -[ RECORD 7 ]-+----------------- collname | aa_DJ.utf8 collnamespace | 11 collowner | 10 collencoding | 6 collcollate | aa_DJ.utf8 collctype | aa_DJ.utf8 -[ RECORD 8 ]-+----------------- collname | aa_ER collnamespace | 11 collowner | 10 collencoding | 6 collcollate | aa_ER collctype | aa_ER -[ RECORD 9 ]-+----------------- collname | aa_ER.utf8 collnamespace | 11 collowner | 10 collencoding | 6 collcollate | aa_ER.utf8 collctype | aa_ER.utf8 -[ RECORD 10 ]+----------------- collname | aa_ER.utf8@saaho collnamespace | 11 collowner | 10 collencoding | 6 collcollate | aa_ER.utf8@saaho collctype | aa_ER.utf8@saaho pgsql=#