From: Gurjeet Singh Date: Tue, 1 Jun 2010 17:36:54 +0000 (+0530) Subject: Moved source code from http://pgfoundry.org/projects/pgadviser/ X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=dd12ce0a8c2c80c05cba2934b27a3c7242aba97b;p=pg_adviser.git Moved source code from pgfoundry.org/projects/pgadviser/ --- dd12ce0a8c2c80c05cba2934b27a3c7242aba97b diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1c08881 --- /dev/null +++ b/Makefile @@ -0,0 +1,31 @@ +# +# PostgreSQL Adviser top level makefile +# + +PGFILEDESC = "PostgreSQL Index Advisor" +subdir = contrib/adviser +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global + +all: + $(MAKE) -C index_adviser all + $(MAKE) -C pg_advise all + $(MAKE) -C resources all + @echo "PostgreSQL Index Advisor successfully made. Ready to install." + +install: + $(MAKE) -C index_adviser $@ + $(MAKE) -C pg_advise $@ + $(MAKE) -C resources $@ + @echo "PostgreSQL Index Advisor installed." + +uninstall: + $(MAKE) -C index_adviser $@ + $(MAKE) -C pg_advise $@ + $(MAKE) -C resources $@ + @echo "PostgreSQL Index Advisor uninstalled." + +clean: + $(MAKE) -C index_adviser $@ + $(MAKE) -C pg_advise $@ + $(MAKE) -C resources $@ diff --git a/README.pgadviser b/README.pgadviser new file mode 100644 index 0000000..8417d6f --- /dev/null +++ b/README.pgadviser @@ -0,0 +1,2 @@ +As of now, this module contains only an index adviser. Please read index_adviser/README.index_advisery. + diff --git a/index_adviser/Makefile b/index_adviser/Makefile new file mode 100644 index 0000000..8941341 --- /dev/null +++ b/index_adviser/Makefile @@ -0,0 +1,15 @@ + +MODULE_big = index_adviser +OBJS = index_adviser.o + +DOCS = README.index_adviser + +ifdef USE_PGXS +PGXS := $(shell pg_config --pgxs) +include $(PGXS) +else +subdir = contrib/pgadviser/index_advisor +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/index_adviser/README.index_adviser b/index_adviser/README.index_adviser new file mode 100644 index 0000000..90f55f7 --- /dev/null +++ b/index_adviser/README.index_adviser @@ -0,0 +1,340 @@ + +README for the PostgreSQL Index Adviser + +1. Introduction + ============ + + The Index Adviser is an extension for PostgreSQL, that allows the RDBMS to +recommend a set of potentially useful indexes for a given query. + + Following are the components of the Index Adviser. + + i) An extension to query planner. That is, a new backend module, that +interacts with the planner to come up with suggestions. These suggestions are +then stored in the advise_index table for later analysis. + + ii) The pg_advise_index tool; it is a separate contrib module now. It takes a +workload (a set of SQL queries) as it's input, and outputs SQL statements that, +if executed, will create the indexes suggested by the Index Adviser. + + pg_advise_index also has an option where you can specify the maximum size of +indexes you can afford (disk space restrictions), and it will try to come up +with only those indexes that would fit in that size with the greatest profit. + + iii) A pl/pgsql function, advise_index_show(). This function interprets the +contents of the advise_index table, and outputs SQL statements (similar to +pg_advise_index) that, when executed, will create the indexes suggested by the +Index Adviser. + + iv) A few helper SQL queries like advise_index_summary.sql, that help in +interpreting the Index Adviser output stored in the advise_index table. + + +2. Installation Instructions + ========================= + + i) Apply the patch to the sources, recompile, and install postgres. + + ii) Execute 'make' for this contrib module and the pg_advise_index module. + You can execute 'make install' in the pg_advise_index module; doing that + will install the pg_advise_index binary in the bin/ directory. But, + doing a 'make install' for this (pg_index_adviser) contrib module will + not help, you will have to manually install plugin generated by this + module. Copy the libpg_index_adviser (.dll or .so) file to your + PostgreSQL's $libdir/plugins/ directory. + + iii) Create the table advise_index, using the script advise_index.create.sql + provided in this contrib module. Make sure that the user, under which + you wish to generate advisory, has proper permissions on it (INSERT is + the bare minimum, but having UPDATE and DELETE permissions too wouldn't + hurt). + + iv) Create the advise_index_show() plpgsql function using the script + advise_index_show.create.sql provided in this contrib module. This + function is not a requisite for proper functioning of the Index + Adviser, but it will help you interpret the advisory generated and + inserted into advise_index table by the Adviser. + + Notes + ----- + While working with the Adviser, if you get an error like: + + ERROR: relation "advise_index" does not exist + CONTEXT: SQL statement "insert into advise_index values( 16395, + array[1], 1782.973755, 1752, 11796, now());" + + then it means that the required advise_index table is either not created or + the current user does not have INSERT permissions on it. Please create this + table as mentioned in step (ii) above. + + Also, you should make sure that you do not enable the Index Adviser in + read-only transaction, as that will cause the internal INSERT statements to + throw ERRORs similar to above. + + +3. User Interface + ============== + + If a user intends the RDBMS to recommend indexes, then she must provide it +with a workload, that is, a sample set of queries that are expected to be +executed by the application. + + There are three ways of getting index advisory from the backend: + + i) pg_advise_index tool. + --------------------- + + Create a file that contains all the queries (semicolon terminated; may +be multi-line) that are expected to be executed by the application; and +feed this file to the pg_advise_index tool with appropriate options. + + pg_advise_index -d DB -h host -U user -s 10M -o advisory.sql workload.sql + + pg_advise_index will open a connection with the PostgreSQL server by +setting appropriate session level options that will force the backend to load +the pg_index_adviser plugin. It will then prepend the keywords EXPLAIN to each +of the queries found in the workload file, and execute them against the backend. +For each query EXPLAINed, the backend will generate advice for each index that +might have been beneficial in executing these queries. + + At the end, pg_advise_index will enumerate all the indexes suggested for +the current session, and output the CREATE INDEX statements for each of them. +Optinally, if the -size option was specified, pg_advise_index will output suggestions +for only those indexes, that fit into that size. + + ii) Manually (through psql session) + -------- + + If you wish to execute the queries manually, through psql, then you must +perform all the steps that pg_advise_index performs internally. + + Connect to the database using psql. Make sure you set the environment variable PGOPTIONS for the backend to load the plugin for you. You can use a command similar to: + + $env PGOPTIONS='-c local_preload_libraries=libpg_index_adviser' psql.exe db1 + + EXPLAIN every query that you think will be excuted by the +application. The Index Adviser will store the suggestions in the +advise_index table. + + The EXPLAIN output will contain the normal plan and, if any indexes were +suggested, another plan with the suggested indexes dumped in the server log file +as a LOG output. If you wish to see these messages on your psql screen as well, +then you must set the client_min_messages GUC variable to at least LOG; like so: + + set client_min_messages to log; + +This new plan starts with a line like: + + Index Adviser: Plan using estimates for suggested indexes: + + When you are done EXPLAIN'ing all the queries, execute the following +pl/pgsql function to see the output. (Note: you must have created the +advise_index_show() function using the script available in this contrib module) + + select advise_index_show( pg_backend_pid() ); + + If you wish, you can omit pg_backend_pid() function call inside the +parameter list, and invoke it as follows. Both these invocations have the same +effect: + + select advise_index_show( null ); + + The advise_index_show() can also be helpful in enumerating advisory +generated by any previous runs of the Index Adviser; possibly other +sessions or even previous invocations of pg_advise_index tool. + + For example, the following query will enumerate all the advices +generated till now: + + select E'backend_pid\n' + || E'===========\n' + || backend_pid, + advise_index_show( backend_pid ) + from (select distinct backend_pid + from advise_index as adv + where adv.reloid in (select oid + from pg_class + where relkind = 'r') + ) as v; + + iii) Automatically (at runtime, for every query executed by the application) + ----------------------------------------------------------------------- + + Just make sure that you set the environment varibale PGOPTIONS similar to: + + '-c local_preload_libraries=libpg_index_adviser' + +before you run your application. Either set this at the start of your +application, or set it in the postgresql.conf file (not recommended, but works). + + This will result in the Index Adviser dumping it's advisory in the +advise_index table; and then you can use the above mentioned methods to +interpret the advisory. + + +4. Architecture + ============ + + The Index Adviser consists of a plugin (.dll or .so file), a command line +tool and a pl/pgsql function. The planner calls the plugin, and that in turn +exploits virtual or hypothetical indexes. This kind of indexes are simulated +during the query planning only. + + For an incoming EXPLAIN command, the planner generates the plan and, if the +plugin is loaded, then the query is sent to the Index Adviser for any +suggestions it can make. The Adviser derives a set of potentially-useful indexes +(index candidates) for this query by analyzing the query predicates. These +indexes are inserted into the system catalog as virtual indexes; that is, they +are not created on disk. + + Then, the query is again sent to the planner, and this time the planner +makes it's decisions taking the just-created vitual indexes into account +too. All index candidates used in the final plan represent the +recommendation for the query and are inserted into the advise_index table +by the Adviser. + + The gain of this recommendation is estimated by comparing the execution cost +difference of this plan to the plan generated before virtual indexes were +created. + + Based on the index recommendations collected in the advise_index table the +optimal index configuration for a given set of workloads under disk space +restrictions can be determined by solving the knapsack problem. This is done by +the pg_advise_index tool with a -s (size) option. + + +5. The advise_index table + ====================== + + The Index Adviser inserts it's advisory into a table named advise_index. This +table should be in the excuting user's search_path, and she should have INSERT +permission on it. The user can create this table using the +advise_index.create.sql script provided in this contrib module. + + In this table, the index recommendations are collected for each query +running under control of the Index Adviser. The table is structured as +follows: + +column | type | meaning +-------------+-----------+------------------------------------------------- +reloid | oid | the oid of the base table for this index +attrs | integer[] | an array containing the indexed column numbers +profit | real | the estimated profit of this index for this query +index_size | integer | the estimated size of the index (in disk-pages) +backend_pid | integer | pid of the backend to uniquely identify the source. +timestamp | timestamp | Can be used in conjunction with backend_pid. + +Note: The profit of an index is estimated as the fraction of the overall profit +of all recommended index candidates for a given query + +(i.e. overall_profit = cost(without-virtual-indexes) + - cost(query-with-virtual-indexes)) + +based on the size of this index compared to the overall size of all indexes +recommended for this query. + + Here's a sample of the contents of advise_index table: + + select * from advise_index where backend_pid = pg_backend_pid(); + + reloid | attrs | profit | index_size | backend_pid | timestamp +--------+-------+---------+------------+-------------+------------------------ + 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:54.25 + + +6. Backend source code modifications + ================================= + + This section gives a brief description of the modifiations made in +PostgreSQL backend. The following modifications were necessary: + +.) Enable the planner.c to call a post-planner-plugin at the end of planner(). + src\backend\optimizer\plan\planner.c + src\include\optimizer\planner.h + +.) Enable the optimizer to get estimated number of pages of v-index from plugin. + src\backend\optimizer\util\plancat.c + +.) Made a few misc. changes to help the Index Adviser, like a new function to + get string representation of a Plan, exporting a few global variables for the + Windows platform. + src\backend\commands\explain.c + src\include\commands\explain.h + src\include\utils\guc.h + src\include\miscadmin.h + + +7. Index Adviser Internals + ======================= + +1. If the plugin is loaded, then, at the end of the planner() function, backend +calls the main (Driver) function of the plugin: index_adviser(query, plan...); +where 'plan' is the already computed plan for the query. + +2. Then, the query is analyzed (scan_query()) in order to get a list of +potential index candidates, i.e. indexes on columns used in query predicates or +for joining or grouping or sorting. This includes multi-column indexes used in +predicates containing AND conditions. + +3. Next, all irrelevant candidates are removed +(remove_irrelevant_index_candidates()), e.g. indexes which already exist, or that +are trying to index system tables or temporary tables. + +4. The remaining candidates are created as virtual indexes without populating +them with the data (create_virtual_indexes()). + +5. Then, the planner is called and the costs are compared with those of the +previously estimated Plan. + +6. If the cost difference exceeds a given limit, the plan is scanned +(scan_plan()) in order to extract the virtual indexes that were used in this +plan. The referenced virtual indexes represent the index recommendation. + +7. The virtual indexes are removed from the catalog (drop_virtual_indexes()). + +8. Finally, the profit per index is estimated and the recommendation is written +to the advise_index table (save_advise()). + +Note: A major portion of the Index Adviser runs inside a SubTransaction, so that +just rolling the transation back helps in easy reversal of all the catalog +changes that were made as a side effect of creating virtual indexes. + +Also, as of now, the adviser creates only non-unique B-Tree virtual indexes. + +i. Credits + ======= + + Kai-Uwe Sattler (k.sattler@computer.org): + Design. + Multicolumn indexes + pg_advise (now called pg_advise_index tool) + Mario Stiffel (mario.stiffel@ms-ray.de): + Initial implementation. The algorithm worked as follows: + In ExplainOneQuery(), call indexadvisor() before normal call to planner(). + indexadvisor() creates a copy of the Query* and runs planner() on it. + Save the costs returned by planner(). + Invoke scan_query() on the Query* to create index candidates. + Prune irrelevant candidates using remove_irrelevant_index_candidates(). + Create indexes using create_virtual_indexes(); these were real indexes. + Create another copy of the original Query* and call planner() on it. + Calculate the costs saved by using (supposedly) virtual indexes. + Call scan_plan() to mark the virtual indexes used in the new plan. + Call drop_virtual_indexes() to destroy the indexes previously created. + Remove the candidates from the list that are not marked as 'used'. + Call save_advise_to_catalog() to save the advisory (details about the + 'used' candidates) into a catalog table named pg_indexadvisor. + Martin Luehring (mar.lue@web.de): + Improvements for multicolumn indexes. + Bug fixes for memory leaks. + Gurjeet Singh (gurjeet.singh@enterprisedb.com): + Ported code from 7.4.8 to 8.2. + Major code refactoring, to bring it in line with coding practices at PG. + Removed an extra call to planner() and an extra copy of Query*. + Added code to identify BitmapHeapScan, BitmapIndexScan, BitmapAnd, BitmapOr. + Now we also show the new plan with the suggested indexes in EXPLAIN output. + Now the index is not created on disk; no time spent in creating virt-index. + All the catalog changes are done and rolled-back within a sub-transaction. + Eliminated the need for a catalog table. advise_index can be a user table. + Added a keyword to EXPLAIN command: EXPLAIN ADVISE . {reverted} + Removed all GUC variables; replaced with elog messages at DEBUG levels. + Added the ability to advise in execution (non-EXPLAIN, GUC var controlled). diff --git a/index_adviser/TODO.txt b/index_adviser/TODO.txt new file mode 100644 index 0000000..401ba5e --- /dev/null +++ b/index_adviser/TODO.txt @@ -0,0 +1,102 @@ + +TODO: +===== +.) Remove the newly added parameter to DefineIndex(); Done. +.) Remove the newly added member to IndexInfo{}; Done. +.) Remove the newly added member to IndexStmt{}; Done. +.) Order the #include lines alphabetically, in new files. Done. +.) Analyze if we need timestamp column in pg_indexadvisor table. +.) Analyze if we need nattrs column in pg_indexadvisor table. Done; removed. +.) See why we shouldn't consider GROUP-BY clause in the presence on a WHERE + clause (scan_query_for_relevant_attributes())? +.) Eliminate warnings from newly added sources. +.) Write a pl/pgsql function that interprets the results in pg_indexadvisor. + Done. +.) Sanitize the liked-list logic in various functions. + Done for remove_irrelevant_candidates(). + Pending: merge_candidates(), build_composite_candidates(). +.) Somehow recommend to drop indexes that are not much used (Heikki's idea). +.) Investigate the difference in costs (in sample_psql*) across patch versions + 15 and 17; both are based on REL8_2_STABLE! +.) Do not try to insert advisory into advise_index if XactReadOnly (in xact.c) + is true; probably, raise a warning instead! +.) Do not create a virtual index if the base relation has less than a specific + number of pages (eg 2) and/or less than a specific number of tuples (eg 10). +.) Reduce the number of heap_open()s by attaching the Relation to the candidate + structure and heap_close() only when the candidate is being pfree()d. +.) Create new memory context and do everything within that. +.) Propose that _compile_assert() macro be added to the core. +.) Mention that get_relation_info_hook and explain_get_index_name_hook are + registered only when required and are unregistered immediately after that. + This avoids overhead in normal routes. + +.) Also, the 'static global' index_candidates is reset based on the above + assumption, else, we have to reset it at the top of planner_callback() and + ExplainOneQuery_callback(), otherwise the is_virtual_index() calls will + behave abnormally if there was and ERROR thrown for the previous command! + +BUGS: +===== +.) The SELECTs in the pg_advise are returning wrong results, when the same index + is suggested twice, because of the SUM() aggregates. Expected behaviour. Not a bug; intended behaviour. +.) I doubt that on a table t(a,b), for a suggestion of idx(b,a), pg_advise will + suggest idx(a,b). True; bug reproduced, fixed and verified. +.) explain + select * + from t + where b = (select b + from t + where a = (select a + from t + where b = 100 )); +.) explain + select * + from t + where b = (select b + from t + where a = (select a + from t + where b = (select b + from t + where a = (select a + from t + where b = 100)))); + +Wish-list: +========== +.) Make pg_indexadvisor a user table. Done; used SPI interface. + Reason: a normal user cannot do "delete from pg_indexadvisor". + Difficulty: Need to know how to do + "insert into pg_indexadvisor values( 1, ...)" + from within the backend; that is, need to study/invent RSI + (Recursive SQL Interface). + Trial code can be seen by searching for: + exec_simple_query( "insert into index_advisor values( 10 )", + "advisor" /*portal name*/ ); + +.) Make it plugin-based. + Reason: so that someone else with a better idea can replace + this advisor, without having to recompile the server. + Difficulty: This code calls many internal functoions: + index_create(), index_drop(), planner(), etc. + That makes it impossible to compile it standalone. + +.) Remove the dependency on the global variable "index_candidates"; used for + communication between indexadvisor.c and plancat.c. + Reason: Bad coding practice. + Difficulty: Even though I was successful in updating pg_class.relpages for + the virtual indexes, the planner is still calling smgr.c code to + get the number of pages occupied by the index! Hence, I had to + use the global the way I did. + +Files that shouldn't be in final patch: (done. this list is obsolete now) +======================================= +bootparse.y +defrem.h +tablecmds.c +src/backend/commands/indexcmds.c +src/backend/parser/gram.y +utility.c +src/backend/utils/mb/Unicode/euc_kr_to_utf8.map +src/include/nodes/execnodes.h +src/include/nodes/parsenodes.h diff --git a/index_adviser/index_adviser.c b/index_adviser/index_adviser.c new file mode 100644 index 0000000..aa0c062 --- /dev/null +++ b/index_adviser/index_adviser.c @@ -0,0 +1,2357 @@ +/*------------------------------------------------------------------------- + * + * index_adviser.c + * Plugin to recommend potentially useful indexes in a query. + * + * too much time? you do not know what to do next? then search for + * "TODO:" and "FIXME:" comments! + * + * created by Mario Stiffel + * modified and partly reimplemented by Martin Lhring + * Almost completely rewritten by gurjeet.singh@enterprisedb.com + * + *------------------------------------------------------------------------- + */ + +/* ------------------------------------------------------------------------ + * includes (ordered alphabetically) + * ------------------------------------------------------------------------ + */ +#include + +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/itup.h" +#include "access/nbtree.h" +#include "access/xact.h" +#include "index_adviser.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "commands/defrem.h" +#include "commands/explain.h" +#include "executor/execdesc.h" +#include "executor/spi.h" +#include "fmgr.h" /* for PG_MODULE_MAGIC */ +#include "miscadmin.h" +#include "nodes/pg_list.h" +#include "nodes/print.h" +#include "optimizer/planner.h" +#include "optimizer/plancat.h" +#include "parser/parse_coerce.h" +#include "parser/parsetree.h" +#include "storage/lmgr.h" +#include "storage/proc.h" +#include "tcop/dest.h" +#include "tcop/tcopprot.h" +#include "utils/builtins.h" +#include "utils/elog.h" +#include "utils/lsyscache.h" +#include "utils/relcache.h" +#include "utils/syscache.h" + +/* mark this dynamic library to be compatible with PG */ +PG_MODULE_MAGIC; + +#define CREATE_V_INDEXES 1 + +/* ***************************************************************************** + * DEBUG Level : Information dumped + * ------------ ------------------ + * DEBUG1 : code level logging. What came in and what went out of a + * function. candidates generated, cost estimates, etc. + * DEBUG2 : DEBUG1 plus : Profiling info. Time consumed in each of the + * major functions. + * DEBUG3 : Above plus : function enter/leave info. + * ****************************************************************************/ + +#define DEBUG_LEVEL_COST (log_min_messages >= DEBUG2) +#define DEBUG_LEVEL_PROFILE (log_min_messages >= DEBUG2) +#define DEBUG_LEVEL_CANDS (log_min_messages >= DEBUG2) + +/* Index Adviser output table */ +#define IND_ADV_TABL "index_advisory" + +/* IND_ADV_TABL does Not Exist */ +#define IND_ADV_ERROR_NE "relation \""IND_ADV_TABL"\" does not exist." + +/* IND_ADV_TABL is Not a Table or a View */ +#define IND_ADV_ERROR_NTV "\""IND_ADV_TABL"\" is not a table or view." + +#define IND_ADV_ERROR_DET \ + "Index Adviser uses \""IND_ADV_TABL"\" table to store it's advisory. You" \ + " should have INSERT permissions on a table or an (INSERT-able) view named"\ + " \""IND_ADV_TABL"\". Also, make sure that you are NOT running the Index" \ + " Adviser under a read-only transaction." + +#define IND_ADV_ERROR_HINT \ + "Please create the \""IND_ADV_TABL"\" table using the script provided in" \ + " pg_advise_index contrib module." + +/* ***************************************************************************** + * forward declarations of local-only functions + * ****************************************************************************/ + +/* scan_* functions go looking for relevant attributes in the query */ +static List* scan_query( const Query* const query, + List* const opnos, + List* rangeTableStack ); + +static List* scan_generic_node( const Node* const root, + List* const opnos, + List* const rangeTableStack ); + +static List* scan_group_clause( List* const groupList, + List* const targtList, + List* const opnos, + List* const rangeTblStack ); + +static List* build_composite_candidates( List* l1, List* l2 ); + +static List* remove_irrelevant_candidates( List* candidates ); + +static void mark_used_candidates( const Node* const plan, + List* const candidates ); + +static int compare_candidates( const IndexCandidate* c1, + const IndexCandidate* c2 ); + +static List* merge_candidates( List* l1, List* l2 ); + +static List* create_virtual_indexes( List* candidates ); + +static void drop_virtual_indexes( List* candidates ); + +static void save_advise( List* candidates ); + +static void log_candidates( const char* text, List* candidates ); + +/* function used for estimating the size of virtual indexes */ +static int4 estimate_index_pages(Oid rel_oid, Oid ind_oid ); + +static PlannedStmt* planner_callback( Query* query, + int cursorOptions, + ParamListInfo boundParams); + +static void ExplainOneQuery_callback( Query *query, + ExplainStmt *stmt, + const char *queryString, + ParamListInfo params, + TupOutputState *tstate); + +static void get_relation_info_callback( PlannerInfo* root, + Oid relationObjectId, + bool inhparent, + RelOptInfo* rel); + +static const char * +explain_get_index_name_callback( Oid indexId ); + +static PlannedStmt* index_adviser( Query *query, + int cursorOptions, + ParamListInfo boundParams, + PlannedStmt *actual_plan, + bool doingExplain); + +static void resetSecondaryHooks(); +static bool is_virtual_index( Oid oid, IndexCandidate **cand_out ); + +/* ------------------------------------------------------------------------ + * implementations: types and functions for profiling + * ------------------------------------------------------------------------ + */ + +typedef struct { + bool running; + struct timeval start; + struct timeval stop; + unsigned long usec; +} Timer; + +/* Need this to remeber the virtual indexes generated. */ +static List *index_candidates; + +/* Timer for logCandiates; global, since it is called from different places */ +static Timer tLogCandidates; + +/* Global variable to hold a value across calls to mark_used_candidates() */ +static PlannedStmt *plannedStmtGlobal; + +static void +startTimer( Timer* const timer ) +{ + gettimeofday( &(timer->start), NULL ); + + timer->usec = 0; + timer->running = true; +} + +static void +continueTimer( Timer* const timer ) +{ + if( timer->running == false ) + { + gettimeofday( &(timer->start), NULL ); + timer->running = true; + } +} + +static void +stopTimer( Timer* const timer ) +{ + if( timer->running == true ) + { + gettimeofday( &(timer->stop), NULL ); + + timer->usec += ( (unsigned long)timer->stop.tv_sec + - (unsigned long)timer->start.tv_sec ) + * (unsigned long)1000000 + + (unsigned long)timer->stop.tv_usec + - (unsigned long)timer->start.tv_usec; + + timer->running = false; + } +} + +static void +t_reset( Timer *const timer ) +{ + timer->usec = 0; + timer->running = false; +} + +/* + * Since gettimeofday() is expensive, we don't collect profiling data unless + * elog is going to log this. + */ +#define t_start(x) do{ \ + if( DEBUG_LEVEL_PROFILE ) \ + startTimer( &(x) ); \ + }while(0) + +#define t_continue(x) do{ \ + if( DEBUG_LEVEL_PROFILE ) \ + continueTimer( &(x) ); \ + }while(0) + +#define t_stop(x) do{ \ + if( DEBUG_LEVEL_PROFILE ) \ + stopTimer( &(x) ); \ + }while(0) + +/* ------------------------------------------------------------------------ + * implementations: index adviser + * ------------------------------------------------------------------------ + */ + +/* PG calls this func when loading the plugin */ +void +_PG_init(void) +{ + planner_hook = planner_callback; + ExplainOneQuery_hook = ExplainOneQuery_callback; + + resetSecondaryHooks(); + + elog( NOTICE, "IND ADV: plugin loaded" ); +} + +/* PG calls this func when un-loading the plugin (if ever) */ +void +_PG_fini(void) +{ + planner_hook = NULL; + ExplainOneQuery_hook = NULL; + + resetSecondaryHooks(); + + elog( NOTICE, "IND ADV: plugin unloaded." ); +} + +/* Assert that Cost datatype can represent negative values */ +compile_assert( ((Cost)-1) < 0 ); + +/** + * index_adviser + * Takes a query and the actual plan generated by the standard planner for + * that query. It then creates virtual indexes, for the columns used in the + * query, and asks the standard planner to generate a new plan for the query. + * + * If the new plan appears to be cheaper than the actual plan, then it + * saves the information about the virtual indexes, that were used by the + * planner, into an advisory table. + * + * If it is called by the Explain-hook, then it returns the newly generated + * plan (allocated in caller's memory context), so that ExplainOnePlan() can + * generate and send a string representation of the plan to the client. + */ +static PlannedStmt* +index_adviser( Query* queryCopy, + int cursorOptions, + ParamListInfo boundParams, + PlannedStmt *actual_plan, + bool doingExplain) +{ + static int8 SuppressRecursion = 0; /* suppress recursive calls */ + bool saveCandidates = false; + int i; + ListCell *prev, /* temps for list manipulation*/ + *cell, + *next; + List* opnos = NIL; /* contains all vailid operator-ids */ + List* candidates = NIL; /* the resulting candidates */ + + Timer tAdviser; + Timer tRePlan; + Timer tBTreeOperators; + Timer tGenCands; + Timer tMarkUsedCands; + Timer tCreateVInds; + Timer tDropVInds; + Timer tSaveAdvise; + + Cost actualStartupCost; + Cost actualTotalCost; + Cost newStartupCost; + Cost newTotalCost; + Cost startupCostSaved; + Cost totalCostSaved; + float4 startupGainPerc; /* in percentages */ + float4 totalGainPerc; + + ResourceOwner oldResourceOwner; + PlannedStmt *new_plan; + MemoryContext outerContext; + + char *BTreeOps[] = { "=", "<", ">", "<=", ">=", }; + + elog( DEBUG3, "IND ADV: Entering" ); + + /* We work only in Normal Mode, and non-recursively */ + if( IsBootstrapProcessingMode() || SuppressRecursion++ > 0 ) + { + new_plan = NULL; + goto DoneCleanly; + } + + /* remeber the memory context; we might need it to supply the candidate + * list to the transaction that encloses the inner transaction. + */ + outerContext = CurrentMemoryContext; + + /* reset these globals; since an ERROR might have left them unclean */ + t_reset( &tLogCandidates ); + index_candidates = NIL; + + /* save the start-time */ + t_start( tAdviser ); + + /* get the costs without any virtual index */ + actualStartupCost = actual_plan->planTree->startup_cost; + actualTotalCost = actual_plan->planTree->total_cost; + + /* create list containing all operators supported by B-tree */ + t_start( tBTreeOperators ); + for( i=0; i < lengthof(BTreeOps); ++i ) + { + FuncCandidateList opnosResult; + + List* btreeop = list_make1( makeString( BTreeOps[i] ) ); + + /* get the operator-id's to the operator, and collect the operator-id's + * into an array. + */ + /* TODO: find out if the memory of opnosResult is ever freed. */ + for( opnosResult = OpernameGetCandidates( btreeop, '\0' ); + opnosResult != NULL; + opnosResult = lnext(opnosResult) ) + { + opnos = lappend_oid( opnos, opnosResult->oid ); + } + + /* free the Value* (T_String) and the list */ + pfree( linitial( btreeop ) ); + list_free( btreeop ); + } + t_stop( tBTreeOperators ); + + /* Generate index candidates */ + t_start( tGenCands ); + candidates = scan_query( queryCopy, opnos, NULL ); + t_stop( tGenCands ); + + /* the list of operator oids isn't needed anymore */ + list_free( opnos ); + + if (list_length(candidates) == 0) + goto DoneCleanly; + + log_candidates( "Generated candidates", candidates ); + + /* remove all irrelevant candidates */ + candidates = remove_irrelevant_candidates( candidates ); + + if (list_length(candidates) == 0) + goto DoneCleanly; + + log_candidates( "Relevant candidates", candidates ); +#if CREATE_V_INDEXES + /* + * We need to restore the resource-owner after RARCST(), only if we are + * called from the executor; but we do it all the time because, + * (1) Its difficult to determine if we are being called by the executor. + * (2) It is not harmful. + * (3) It is not much of an overhead! + */ + oldResourceOwner = CurrentResourceOwner; + + /* + * Setup an SPI frame around the BeginInternalSubTransaction() and + * RollbackAndReleaseCurrentSubTransaction(), since xact.c assumes that + * BIST()/RARCST() infrastructure is used only by PL/ interpreters (like + * pl/pgsql), and hence it calls AtEOSubXact_SPI(), and that in turn frees + * all the execution context memory of the SPI (which _may_ have invoked the + * adviser). By setting up our own SPI frame here, we make sure that + * AtEOSubXact_SPI() frees this frame's memory. + */ + if( SPI_connect() != SPI_OK_CONNECT ) + { + elog( WARNING, "IND ADV: SPI_connect() call failed" ); + goto DoneCleanly; + } + + /* + * DO NOT access any data-structure allocated between BEGIN/ROLLBACK + * transaction, after the ROLLBACK! All the memory allocated after BEGIN is + * freed in ROLLBACK. + */ + BeginInternalSubTransaction( "index_adviser" ); + + /* now create the virtual indexes */ + t_start( tCreateVInds ); + candidates = create_virtual_indexes( candidates ); + t_stop( tCreateVInds ); +#endif + /* update the global var */ + index_candidates = candidates; + + /* + * Setup the hook in the planner that injects information into base-tables + * as they are prepared + */ + get_relation_info_hook = get_relation_info_callback; + + /* do re-planning using virtual indexes */ + /* TODO: is the plan ever freed? */ + t_start( tRePlan ); + new_plan = standard_planner(queryCopy, cursorOptions, boundParams); + t_stop( tRePlan ); + + /* reset the hook */ + get_relation_info_hook = NULL; +#if CREATE_V_INDEXES + /* remove the virtual-indexes */ + t_start( tDropVInds ); + drop_virtual_indexes( candidates ); + t_stop( tDropVInds ); +#endif + newStartupCost = new_plan->planTree->startup_cost; + newTotalCost = new_plan->planTree->total_cost; + + /* calculate the saved costs */ + startupGainPerc = + actualStartupCost == 0 ? 0 : + (1 - newStartupCost/actualStartupCost) * 100; + + totalGainPerc = + actualTotalCost == 0 ? 0 : + (1 - newTotalCost/actualTotalCost) * 100; + + startupCostSaved = actualStartupCost - newStartupCost; + + totalCostSaved = actualTotalCost - newTotalCost; + + if( startupCostSaved >0 || totalCostSaved > 0 ) + { + /* scan the plan for virtual indexes used */ + t_start( tMarkUsedCands ); + plannedStmtGlobal = new_plan; + + mark_used_candidates( (Node*)new_plan->planTree, candidates ); + + plannedStmtGlobal = NULL; + t_stop( tMarkUsedCands ); + } + + /* Remove unused candidates from the list. */ + for( prev = NULL, cell = list_head(candidates); + cell != NULL; + cell = next ) + { + IndexCandidate *cand = (IndexCandidate*)lfirst( cell ); + + next = lnext( cell ); + + if( !cand->idxused ) + { + pfree( cand ); + candidates = list_delete_cell( candidates, cell, prev ); + } + else + prev = cell; + } + + /* update the global var */ + index_candidates = candidates; + + /* log the candidates used by the planner */ + log_candidates( "Used candidates", candidates ); + + if( list_length( candidates ) > 0 ) + saveCandidates = true; + + /* calculate the share of cost saved by each index */ + if( saveCandidates ) + { + int4 totalSize = 0; + IndexCandidate *cand; + + foreach( cell, candidates ) + totalSize += ((IndexCandidate*)lfirst( cell ))->pages; + + foreach( cell, candidates ) + { + cand = (IndexCandidate*)lfirst( cell ); + + cand->profit = (float4)totalCostSaved + * ((float4)cand->pages/totalSize); + } + } + + /* Print the new plan if debugging. */ + if( saveCandidates && Debug_print_plan ) + elog_node_display( DEBUG1, "plan (using Index Adviser suggestions)", + new_plan, Debug_pretty_print ); + + if( saveCandidates && doingExplain ) + { + MemoryContext oldContext = MemoryContextSwitchTo( outerContext ); + + new_plan = copyObject( new_plan ); + + MemoryContextSwitchTo( oldContext ); + } + else + { + /* TODO1: try to free the new plan node */ + new_plan = NULL; + } +#if CREATE_V_INDEXES + /* + * Undo the metadata changes; for eg. pg_depends entries will be removed + * (from our view). + * + * Again: DO NOT access any data-structure allocated between BEGIN/ROLLBACK + * transaction, after the ROLLBACK! All the memory allocated after BEGIN is + * freed in ROLLBACK. + */ + RollbackAndReleaseCurrentSubTransaction(); + + /* restore the resource-owner */ + CurrentResourceOwner = oldResourceOwner; + + if( SPI_finish() != SPI_OK_FINISH ) + elog( WARNING, "IND ADV: SPI did not FINISH properly." ); +#endif + /* save the advise into the table */ + if( saveCandidates ) + { + t_start( tSaveAdvise ); + + /* catch any ERROR */ + PG_TRY(); + { + save_advise(candidates); + } + PG_CATCH(); + { + /* reset our 'running' state... */ + --SuppressRecursion; + + /* + * Add a detailed explanation to the ERROR. Note that these function + * calls will overwrite the DETAIL and HINT that are already + * associated (if any) with this ERROR. XXX consider errcontext(). + */ + errdetail( IND_ADV_ERROR_DET ); + errhint( IND_ADV_ERROR_HINT ); + + /* ... and re-throw the ERROR */ + PG_RE_THROW(); + } + PG_END_TRY(); + + t_stop( tSaveAdvise ); + } + + /* remove the candidates-list */ + elog( DEBUG1, "IND ADV: Deleting candidate list." ); + if( !saveCandidates || !doingExplain ) + { + foreach( cell, index_candidates ) + pfree( (IndexCandidate*)lfirst( cell ) ); + + list_free( index_candidates ); + + index_candidates = NIL; + } + + t_stop( tAdviser ); + + /* emit debug info */ + elog( DEBUG1, "IND ADV: old cost %.2f..%.2f", actualStartupCost, + actualTotalCost ); + elog( DEBUG1, "IND ADV: new cost %.2f..%.2f", newStartupCost, newTotalCost); + elog( DEBUG1, "IND ADV: cost saved %.2f..%.2f, these are %lu..%lu percent", + startupCostSaved, + totalCostSaved, + (unsigned long)startupGainPerc, + (unsigned long)totalGainPerc ); + + /* print profiler information */ + elog( DEBUG2, "IND ADV: [Prof] * Query String : %s", + debug_query_string ); + elog( DEBUG2, "IND ADV: [Prof] * indexAdviser : %10lu usec", + tAdviser.usec ); + elog( DEBUG2, "IND ADV: [Prof] |-- replanning : %10lu usec", + tRePlan.usec ); + elog( DEBUG2, "IND ADV: [Prof] |-- getBTreeOperators : %10lu usec", + tBTreeOperators.usec ); + elog( DEBUG2, "IND ADV: [Prof] |-- scanQuery : %10lu usec", + tGenCands.usec ); + elog( DEBUG2, "IND ADV: [Prof] |-- scanPlan : %10lu usec", + tMarkUsedCands.usec ); + elog( DEBUG2, "IND ADV: [Prof] |-- createVirtualIndexes : %10lu usec", + tCreateVInds.usec ); + elog( DEBUG2, "IND ADV: [Prof] |-- dropVirtualIndexes : %10lu usec", + tDropVInds.usec ); + elog( DEBUG2, "IND ADV: [Prof] |-- saveAdviseToCatalog : %10lu usec", + ( saveCandidates == true ) ? tSaveAdvise.usec : 0 ); + elog( DEBUG2, "IND ADV: [Prof] |-- log_candidates : %10lu usec", + tLogCandidates.usec ); + +DoneCleanly: + /* allow new calls to the index-adviser */ + --SuppressRecursion; + + elog( DEBUG3, "IND ADV: EXIT" ); + + return doingExplain && saveCandidates ? new_plan : NULL; +} + +/* + * This callback is registered immediately upon loading this plugin. It is + * responsible for taking over control from the planner. + * + * It calls the standard planner and sends the resultant plan to + * index_adviser() for comparison with a plan generated after creating + * hypothetical indexes. + */ +static PlannedStmt* +planner_callback( Query* query, + int cursorOptions, + ParamListInfo boundParams) +{ + Query *queryCopy; + PlannedStmt *actual_plan; + PlannedStmt *new_plan; + + resetSecondaryHooks(); + + /* TODO1 : try to avoid making a copy if the index_adviser() is not going + * to use it; Index Adviser may not use the query copy at all if we are + * running in BootProcessing mode, or if the Index Adviser is being called + * recursively. + */ + + /* planner() scribbles on it's input, so make a copy of the query-tree */ + queryCopy = copyObject( query ); + + /* Generate a plan using the standard planner */ + actual_plan = standard_planner( query, cursorOptions, boundParams ); + + /* send the actual plan for comparison with a hypothetical plan */ + new_plan = index_adviser( queryCopy, cursorOptions, boundParams, + actual_plan, false ); + + /* TODO1: try to free the redundant new_plan */ + + return actual_plan; +} + +/* + * This callback is registered immediately upon loading this plugin. It is + * responsible for taking over control from the ExplainOneQuery() function. + * + * It calls the standard planner and sends the resultant plan to + * index_adviser() for comparison with a plan generated after creating + * hypothetical indexes. + */ +static void +ExplainOneQuery_callback( Query *query, + ExplainStmt *stmt, + const char *queryString, + ParamListInfo params, + TupOutputState *tstate) +{ + Query *queryCopy; + PlannedStmt *actual_plan; + PlannedStmt *new_plan; + ListCell *cell; + + resetSecondaryHooks(); + + /* planner() scribbles on it's input, so make a copy of the query-tree */ + queryCopy = copyObject( query ); + + /* plan the query */ + actual_plan = standard_planner( query, 0, params ); + + /* run it (if needed) and produce output */ + ExplainOnePlan( actual_plan, params, stmt, tstate ); + + /* re-plan the query */ + new_plan = index_adviser( queryCopy, 0, params, actual_plan, true ); + + if ( new_plan ) + { + bool analyze = stmt->analyze; + + stmt->analyze = false; + + explain_get_index_name_hook = explain_get_index_name_callback; + + do_text_output_oneline(tstate, ""); /* separator line */ + do_text_output_oneline(tstate, "** Plan with hypothetical indexes **"); + ExplainOnePlan( new_plan, params, stmt, tstate ); + + explain_get_index_name_hook = NULL; + + stmt->analyze = analyze; + } + + /* The candidates were not destroyed by the Index Adviser, do it now */ + foreach( cell, index_candidates ) + pfree( (IndexCandidate*)lfirst( cell ) ); + + list_free( index_candidates ); + + index_candidates = NIL; + + /* TODO1: try to free the now-redundant new_plan */ +} + +/* + * get_relation_info() calls this callback after it has prepared a RelOptInfo + * for a relation. + * + * The Job of this callback is to fill in the information about the virtual + * index, that get_rel_info() could not load from the catalogs. As of now, the + * number of disk-pages that might be occupied by the virtual index (if created + * on-disk), is the only information that needs to be updated. + */ +static void +get_relation_info_callback( PlannerInfo *root, + Oid relationObjectId, + bool inhparent, + RelOptInfo *rel) +{ +#if CREATE_V_INDEXES + ListCell *cell1; + IndexCandidate *cand; + + foreach( cell1, rel->indexlist ) + { + IndexOptInfo *info = (IndexOptInfo*)lfirst( cell1 ); + + /* We call estimate_index_pages() here, instead of immediately after + * index_create() API call, since rel has been run through + * estimate_rel_size() by the caller! + */ + + if( is_virtual_index( info->indexoid, &cand ) ) + { + /* estimate the size */ + cand->pages = estimate_index_pages(cand->reloid, cand->idxoid); + + info->pages = cand->pages; + } + } +#else + /* This needs implimentation */ + compile_assert( false ); + ListCell *cell1; + + foreach( cell1, index_candidates ) + { + IndexCandidate *cand = (IndexCandidate*)lfirst( cell1 ); + + Form_pg_index index; + IndexOptInfo *info; + int ncolumns; + int i; + + /* + * Extract info from the relation descriptor for the index. + */ + index = indexRelation->rd_index; + + info = makeNode(IndexOptInfo); + + info->indexoid = InvalidOid; + info->rel = rel; + info->ncolumns = ncolumns = cand->ncols; + + /* + * Allocate per-column info arrays. To save a few palloc cycles + * we allocate all the Oid-type arrays in one request. Note that + * the opfamily array needs an extra, terminating zero at the end. + * We pre-zero the ordering info in case the index is unordered. + */ + info->indexkeys = (int *) palloc(sizeof(int) * ncolumns); + info->opfamily = (Oid *) palloc0(sizeof(Oid) * (4 * ncolumns + 1)); + info->opcintype = info->opfamily + (ncolumns + 1); + info->fwdsortop = info->opcintype + ncolumns; + info->revsortop = info->fwdsortop + ncolumns; + info->nulls_first = (bool *) palloc0(sizeof(bool) * ncolumns); + + for (i = 0; i < ncolumns; i++) + { + info->indexkeys[i] = cand->varattno[i]; + info->opfamily[i] = InvalidOid; + info->opcintype[i] = InvalidOid; + } + + info->relam = InvalidOid; + info->amcostestimate = InvalidOid; + info->amoptionalkey = false; + info->amsearchnulls = false; + + for (i = 0; i < ncolumns; i++) + { + if (opt & INDOPTION_DESC) + { + fwdstrat = BTGreaterStrategyNumber; + revstrat = BTLessStrategyNumber; + } + else + { + fwdstrat = BTLessStrategyNumber; + revstrat = BTGreaterStrategyNumber; + } + /* + * Index AM must have a fixed set of strategies for it + * to make sense to specify amcanorder, so we + * need not allow the case amstrategies == 0. + */ + if (fwdstrat > 0) + { + Assert(fwdstrat <= nstrat); + info->fwdsortop[i] = indexRelation->rd_operator[i * nstrat + fwdstrat - 1]; + } + if (revstrat > 0) + { + Assert(revstrat <= nstrat); + info->revsortop[i] = indexRelation->rd_operator[i * nstrat + revstrat - 1]; + } + info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0; + } + + /* + * Fetch the index expressions and predicate, if any. We must + * modify the copies we obtain from the relcache to have the + * correct varno for the parent relation, so that they match up + * correctly against qual clauses. + */ + info->indexprs = RelationGetIndexExpressions(indexRelation); + info->indpred = RelationGetIndexPredicate(indexRelation); + if (info->indexprs && varno != 1) + ChangeVarNodes((Node *) info->indexprs, 1, varno, 0); + if (info->indpred && varno != 1) + ChangeVarNodes((Node *) info->indpred, 1, varno, 0); + info->predOK = false; /* set later in indxpath.c */ + info->unique = false; + + /* + * Estimate the index size. If it's not a partial index, we lock + * the number-of-tuples estimate to equal the parent table; if it + * is partial then we have to use the same methods as we would for + * a table, except we can be sure that the index is not larger + * than the table. + */ + if (info->indpred == NIL) + { + info->pages = RelationGetNumberOfBlocks(indexRelation); + info->tuples = rel->tuples; + } + else + { + estimate_rel_size(indexRelation, NULL, + &info->pages, &info->tuples); + if (info->tuples > rel->tuples) + info->tuples = rel->tuples; + } + + index_close(indexRelation, NoLock); + + indexinfos = lcons(info, indexinfos); + } +#endif +} + +/* Use this function to reset the hooks that are not required to be registered + * all the time; these may have been left registered by the previous call, in + * case of an ERROR. + */ +static void +resetSecondaryHooks() +{ + get_relation_info_hook = NULL; + explain_get_index_name_hook = NULL; +} + +static bool +is_virtual_index( Oid oid, IndexCandidate **cand_out ) +{ + ListCell *cell1; + + foreach( cell1, index_candidates ) + { + IndexCandidate *cand = (IndexCandidate*)lfirst( cell1 ); + + if( cand->idxoid == oid ) + { + if( cand_out ) + *cand_out = cand; + return true; + } + } + + return false; +} + +static const char * +explain_get_index_name_callback(Oid indexId) +{ + StringInfoData buf; + IndexCandidate *cand; + + if( is_virtual_index( indexId, &cand ) ) + { + initStringInfo(&buf); + + appendStringInfo( &buf, ":%d", cand->idxoid ); + + return buf.data; + } + + return NULL; /* allow default behavior */ +} + +/** + * save_advise + * for every candidate insert an entry into IND_ADV_TABL + */ +static void +save_advise( List* candidates ) +{ + StringInfoData query; /* string for Query */ + StringInfoData cols; /* string for Columns */ + Oid advise_oid; + ListCell *cell; + + elog( DEBUG3, "IND ADV: save_advise: ENTER" ); + + Assert( list_length(candidates) != 0 ); + + /* + * Minimal check: check that IND_ADV_TABL is atleast visible to us. There + * are a lot more checks we should do in order to not let the INSERT fail, + * like permissions, datatype mis-match, etc., but we leave those checks + * upto the executor. + */ + + /* find a relation named IND_ADV_TABL on the search path */ + advise_oid = RelnameGetRelid( IND_ADV_TABL ); + + if (advise_oid != InvalidOid) + { +#if 1 + Relation advise_rel = relation_open(advise_oid, AccessShareLock); + + if (advise_rel->rd_rel->relkind != RELKIND_RELATION + && advise_rel->rd_rel->relkind != RELKIND_VIEW) + { + relation_close(advise_rel, AccessShareLock); + + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg( IND_ADV_ERROR_NTV ))); + } + + relation_close(advise_rel, AccessShareLock); +#else + /* + * heap_open() makes sure that the oid does not represent an INDEX or a + * COMPOSITE type, else it will raise an ERROR, which is exactly what we + * want. The comments above heap_open() ask the caller not to assume any + * storage since the returned relation may be a VIEW; but we don't mind, + * since the user may have defined some rules on it to make the INSERTs + * work smoothly! If not, we leave it upto the executor to raise ERROR. + */ + PG_TRY(); + { + heap_close(heap_open(advise_oid, AccessShareLock), AccessShareLock); + } + PG_CATCH(); + { + errmsg( IND_ADV_ERROR_NTV ); + PG_RE_THROW(); + } + PG_END_TRY(); +#endif + } + else + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_TABLE), + errmsg( IND_ADV_ERROR_NE ))); + } + + initStringInfo( &query ); + initStringInfo( &cols ); + + foreach( cell, candidates ) + { + int i; + IndexCandidate* idxcd = (IndexCandidate*)lfirst( cell ); + + if( !idxcd->idxused ) + continue; + + if( cols.len > 0 ) pfree( cols.data ); + + initStringInfo( &cols ); + + for (i = 0; i < idxcd->ncols; ++i) + appendStringInfo( &cols, "%s%d", (i>0?",":""), idxcd->varattno[i]); + + appendStringInfo( &query, "insert into \""IND_ADV_TABL"\" values" + "( %d, array[%s], %f, %d, %d, now());", + idxcd->reloid, + cols.data, + idxcd->profit, + idxcd->pages * BLCKSZ/1024, /* in KBs */ + MyProcPid ); + } /* foreach cell in candidates */ + + if( query.len > 0 ) /* if we generated any SQL */ + { + if( SPI_connect() == SPI_OK_CONNECT ) + { + if( SPI_execute( query.data, false, 0 ) != SPI_OK_INSERT ) + elog( WARNING, "IND ADV: SPI could not INSERT." ); + + if( SPI_finish() != SPI_OK_FINISH ) + elog( WARNING, "IND ADV: SPI did not FINISH properly." ); + } + else + elog( WARNING, "IND ADV: SPI could not CONNECT." ); + } + + if ( query.len > 0 ) pfree( query.data ); + if ( cols.len > 0 ) pfree( cols.data ); + + elog( DEBUG3, "IND ADV: save_advise: EXIT" ); +} + +/** + * remove_irrelevant_candidates + * + * A candidate is irrelevant if it complies with at least one of the following + * conditions: + * (a) it indexes an unsupported relation (system-relations or temp-relations) + * (b) it matches an already present index. + * + * TODO1 Log the candidates as they are pruned, and remove the call to + * log_candidates() in index_adviser() after this function is called. + * + */ +static List* +remove_irrelevant_candidates( List* candidates ) +{ + ListCell *cell = list_head(candidates); + ListCell *prev = NULL; + + while(cell != NULL) + { + ListCell *old_cell = cell; + + Oid base_rel_oid = ((IndexCandidate*)lfirst( cell ))->reloid; + Relation base_rel = heap_open( base_rel_oid, AccessShareLock ); + + /* decide if the relation is unsupported. This check is now done before + * creating a candidate in scan_generic_node(); but still keeping the + * code here. + */ + if((base_rel->rd_istemp == true) + || IsSystemRelation(base_rel)) + { + ListCell *cell2; + ListCell *prev2; + ListCell *next; + + /* remove all candidates indexing currently unsupported relations */ + elog( DEBUG1, + "Index candidate(s) on an unsupported relation (%d) found!", + base_rel_oid ); + + /* Remove all candidates with same unsupported relation */ + for(cell2 = cell, prev2 = prev; cell2 != NULL; cell2 = next) + { + next = lnext(cell2); + + if(((IndexCandidate*)lfirst(cell2))->reloid == base_rel_oid) + { + pfree((IndexCandidate*)lfirst(cell2)); + candidates = list_delete_cell( candidates, cell2, prev2 ); + + if(cell2 == cell) + cell = next; + } + else + { + prev2 = cell2; + } + } + } + else + { + /* remove candidates that match any of already defined indexes */ + + /* get all index Oids */ + ListCell *index_cell; + List *old_index_oids = RelationGetIndexList( base_rel ); + + foreach( index_cell, old_index_oids ) + { + /* open index relation and get the index info */ + Oid old_index_oid = lfirst_oid( index_cell ); + Relation old_index_rel = index_open( old_index_oid, + AccessShareLock ); + IndexInfo *old_index_info = BuildIndexInfo( old_index_rel ); + + /* We ignore expressional indexes and partial indexes */ + if( old_index_rel->rd_index->indisvalid + && old_index_info->ii_Expressions == NIL + && old_index_info->ii_Predicate == NIL ) + { + ListCell *cell2; + ListCell *prev2; + ListCell *next; + + Assert( old_index_info->ii_Expressions == NIL ); + Assert( old_index_info->ii_Predicate == NIL ); + + /* search for a matching candidate */ + for(cell2 = cell, prev2 = prev; + cell2 != NULL; + cell2 = next) + {next = lnext(cell2);{ + + IndexCandidate* cand = (IndexCandidate*)lfirst(cell2); + + signed int cmp = (signed int)cand->ncols + - old_index_info->ii_NumIndexAttrs; + + if(cmp == 0) + { + int i = 0; + do + { + cmp = + cand->varattno[i] + - old_index_info->ii_KeyAttrNumbers[i]; + ++i; + } while((cmp == 0) && (i < cand->ncols)); + } + + if(cmp != 0) + { + + /* current candidate does not match the current + * index, so go to next candidate. + */ + prev2 = cell2; + } + else + { + elog( DEBUG1, + "A candidate matches the index oid of : %d;" + "hence ignoring it.", + old_index_oid ); + + /* remove the candidate from the list */ + candidates = list_delete_cell(candidates, + cell2, prev2); + pfree( cand ); + + if (cell2 == cell) + cell = next; + + break; /* while */ + } + }} /* for */ + } + + /* close index relation and free index info */ + index_close( old_index_rel, AccessShareLock ); + pfree( old_index_info ); + } + + /* free the list of existing index Oids */ + list_free( old_index_oids ); + + /* clear the index-list, else the planner can not see the + * virtual-indexes + * TODO: Really?? Verify this. + */ + base_rel->rd_indexlist = NIL; + base_rel->rd_indexvalid = 0; + } + + /* close the relation */ + heap_close( base_rel, AccessShareLock ); + + /* + * Move the pointer forward, only if the crazy logic above did not do it + * else, cell is already pointing to a new list-element that needs + * processing + */ + if(cell == old_cell) + { + prev = cell; + cell = lnext(cell); + } + } + + return candidates; +} + +/** + * mark_used_candidates + * runs thru the plan to find virtual indexes used by the planner + */ +static void +mark_used_candidates(const Node* const node, List* const candidates) +{ + const ListCell *cell; + bool planNode = true; /* is it a plan-node */ + + elog( DEBUG3, "IND ADV: mark_used_candidates: ENTER" ); + + switch( nodeTag( node ) ) + { + /* if the node is an indexscan */ + case T_IndexScan: + { + /* are there any used virtual-indexes? */ + const IndexScan* const idxScan = (const IndexScan*)node; + + foreach( cell, candidates ) + { + + /* is virtual-index-oid in the IndexScan-list? */ + IndexCandidate* const idxcd = (IndexCandidate*)lfirst( cell ); + const bool used = idxcd->idxoid == idxScan->indexid; + + /* connect the existing value per OR */ + idxcd->idxused = idxcd->idxused || used; + } + } + break; + + /* if the node is a bitmap-index-scan */ + case T_BitmapIndexScan: + { + /* are there any used virtual-indexes? */ + const BitmapIndexScan* const bmiScan = (const BitmapIndexScan*)node; + foreach( cell, candidates ) + { + /* is virtual-index-oid in the BMIndexScan-list? */ + IndexCandidate* const idxcd = (IndexCandidate*)lfirst( cell ); + const bool used = idxcd->idxoid == bmiScan->indexid; + + /* conntect the existing value per OR */ + idxcd->idxused = idxcd->idxused || used; + } + } + break; + + /* if the node is a bitmap-and */ + case T_BitmapAnd: + { + /* are there any used virtual-indexes? */ + const BitmapAnd* const bmiAndScan = (const BitmapAnd*)node; + + foreach( cell, bmiAndScan->bitmapplans ) + mark_used_candidates( (Node*)lfirst( cell ), candidates ); + } + break; + + /* if the node is a bitmap-or */ + case T_BitmapOr: + { + /* are there any used virtual-indexes? */ + const BitmapOr* const bmiOrScan = (const BitmapOr*)node; + + foreach( cell, bmiOrScan->bitmapplans ) + mark_used_candidates( (Node*)lfirst( cell ), candidates ); + } + break; + + case T_SubqueryScan: + { + /* scan subqueryplan */ + const SubqueryScan* const subScan = (const SubqueryScan*)node; + + mark_used_candidates( (const Node*)subScan->subplan, candidates ); + } + break; + + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + case T_Join: + { + /* scan join-quals */ + const Join* const join = (const Join*)node; + + foreach( cell, join->joinqual ) + { + const Node* const qualPlan = (const Node*)lfirst( cell ); + mark_used_candidates( qualPlan, candidates ); + } + } + break; + + case T_OpExpr: + { + planNode = false; + const OpExpr* const expr = (const OpExpr*)node; + foreach( cell, expr->args ) + mark_used_candidates( (const Node*)lfirst( cell ), candidates ); + } + break; + + case T_SubPlan: + { + /* scan the subplan */ + planNode = false; + const SubPlan* const subPlan = (const SubPlan*)node; + + mark_used_candidates( (const Node*)&plannedStmtGlobal->subplans[subPlan->plan_id], candidates ); + } + break; + + case T_BoolExpr: + { + planNode = false; + const BoolExpr* const expr = (const BoolExpr*)node; + foreach( cell, expr->args ) + { + const Node* const nodeBool = (const Node*)lfirst( cell ); + mark_used_candidates( nodeBool, candidates ); + } + } + break; + + case T_FunctionScan: + case T_Result: + case T_Append: + case T_TidScan: + case T_Material: + case T_Sort: + case T_Group: + case T_Agg: + case T_Unique: + case T_Hash: + case T_SetOp: + case T_Limit: + case T_Scan: + case T_SeqScan: + case T_BitmapHeapScan: + break; + + case T_FuncExpr: + case T_Const: + case T_Var: + planNode = false; + break; + + /* report non-considered parse-node types */ + default: + { + elog( NOTICE, "IND ADV: unhandled plan-node type: %d; Query: %s\n", + (int)nodeTag( node ), debug_query_string ); + planNode = false; /* stop scanning the tree here */ + } + break; + } + + if( planNode ) + { + const Plan* const plan = (Plan *) node; + + if( plan->initPlan ) + { + ListCell *cell; + + foreach( cell, ((Plan*)node)->initPlan ) + { + SubPlan *subPlan = (SubPlan*)lfirst( cell ); + + mark_used_candidates( (Node*)exec_subplan_get_plan( + plannedStmtGlobal, + subPlan), + candidates ); + } + } + + if( IsA(((Node*)plan), Append) ) + { + Append *appendplan = (Append *)node; + ListCell *cell; + + foreach( cell, appendplan->appendplans ) + { + Plan *child = (Plan*)lfirst( cell ); + + mark_used_candidates( (Node*)child, candidates ); + } + } + + + /* scan left- and right-tree */ + if( outerPlan(plan) ) + mark_used_candidates( (const Node*)outerPlan(plan), candidates ); + + if( innerPlan(plan) ) + mark_used_candidates( (const Node*)innerPlan(plan), candidates ); + + /* walk through the qual-list */ + foreach( cell, plan->qual ) + { + const Node* const nodeQual = (const Node*)lfirst( cell ); + mark_used_candidates( nodeQual, candidates ); + } + } + + elog( DEBUG3, "IND ADV: mark_used_candidates: EXIT" ); +} + +/** + * scan_query + * runs thru the whole query + */ +static List* +scan_query( const Query* const query, + List* const opnos, + List* rangeTableStack ) +{ + const ListCell* cell; + List* candidates = NIL; + List* newCandidates = NIL; + + elog( DEBUG3, "IND ADV: scan_query: ENTER" ); + + /* add the current rangetable to the stack */ + rangeTableStack = lcons( query->rtable, rangeTableStack ); + + /* scan sub-queries */ + foreach( cell, query->rtable ) + { + const RangeTblEntry* const rte = (const RangeTblEntry*)lfirst( cell ); + + if( rte->subquery ) + { + candidates = merge_candidates( candidates, scan_query( + rte->subquery, + opnos, + rangeTableStack)); + } + } + + /* scan "where" from the current query */ + if( query->jointree->quals != NULL ) + { + newCandidates = scan_generic_node( query->jointree->quals, opnos, + rangeTableStack ); + } + + /* if no indexcadidate found in "where", scan "group" */ + if( ( newCandidates == NIL ) && ( query->groupClause != NULL ) ) + { + newCandidates = scan_group_clause( query->groupClause, + query->targetList, + opnos, + rangeTableStack ); + } + + /* if no indexcadidate found in "group", scan "order by" */ + if( ( newCandidates == NIL ) && ( query->sortClause != NULL ) ) + { + newCandidates = scan_group_clause( query->sortClause, + query->targetList, + opnos, + rangeTableStack ); + } + + /* remove the current rangetable from the stack */ + rangeTableStack = list_delete_ptr( rangeTableStack, query->rtable ); + + /* merge indexcandiates */ + candidates = merge_candidates( candidates, newCandidates ); + + elog( DEBUG3, "IND ADV: scan_query: EXIT" ); + + return candidates; +} + +/** + * scan_group_clause + * runs thru the "group-part" + */ +static List* +scan_group_clause( List* const groupList, + List* const targetList, + List* const opnos, + List* const rangeTableStack ) +{ + const ListCell* cell; + List* candidates = NIL; + + elog( DEBUG3, "IND ADV: scan_group_clause: ENTER" ); + + /* scan every entry in the group-list */ + foreach( cell, groupList ) + { + /* convert to group-element */ + const GroupClause* const groupElm = (const GroupClause*)lfirst( cell ); + + /* get the column the group-clause is for */ + const TargetEntry* const targetElm = list_nth( targetList, + groupElm->tleSortGroupRef - 1); + + /* scan the node and get candidates */ + const Node* const node = (const Node*)targetElm->expr; + + candidates = merge_candidates( candidates, scan_generic_node( node, + opnos, + rangeTableStack)); + } + + elog( DEBUG3, "IND ADV: scan_group_clause: EXIT" ); + + return candidates; +} + +/** + * scan_generic_node + * runs thru the given nodes + */ +static List* +scan_generic_node( const Node* const root, + List* const opnos, + List* const rangeTableStack ) +{ + ListCell* cell; + List* candidates = NIL; + + elog( DEBUG3, "IND ADV: scan_generic_node: ENTER" ); + + Assert( root != NULL ); + + switch( nodeTag( root ) ) + { + /* if the node is an aggregate */ + case T_Aggref: + { + const Aggref* const aggref = (const Aggref*)root; + const Node* const list = (const Node*)aggref->args; + + /* The arg list may be NIL in case of count(*) */ + if( list != NULL ) + candidates = scan_generic_node( list, opnos, rangeTableStack ); + } + break; + + /* if the node is a boolean-expression */ + case T_BoolExpr: + { + const BoolExpr* const expr = (const BoolExpr*)root; + + if( expr->boolop != AND_EXPR ) + { + /* non-AND expression */ + foreach( cell, expr->args ) + { + const Node* const node = (const Node*)lfirst( cell ); + candidates = merge_candidates( candidates, + scan_generic_node( node, opnos, + rangeTableStack)); + } + } + else + { + /* AND expression */ + List* compositeCandidates = NIL; + + foreach( cell, expr->args ) + { + const Node* const node = (const Node*)lfirst( cell ); + List *icList; + List *cicList; + + icList = scan_generic_node( node, opnos, rangeTableStack ); + + cicList = build_composite_candidates(candidates, icList); + + candidates = merge_candidates(candidates, icList); + + compositeCandidates = merge_candidates(compositeCandidates, + cicList); + } + + /* now append the composite (multi-col) indexes to the list */ + candidates = merge_candidates(candidates, compositeCandidates); + } + } + break; + + /* if the node is list of other nodes (e.g. group-by expressions) */ + case T_List: + { + List* const list = (List*)root; + foreach( cell, list ) + { + const Node* const node = (const Node*)lfirst( cell ); + + candidates = merge_candidates( candidates, + scan_generic_node( node, opnos, + rangeTableStack)); + } + } + break; + /* if the node is an operator */ + case T_OpExpr: + { + /* get candidates if operator is supported */ + const OpExpr* const expr = (const OpExpr*)root; + + if( list_member_oid( opnos, expr->opno ) ) + { + foreach( cell, expr->args ) + { + const Node* const node = (const Node*)lfirst( cell ); + + candidates = merge_candidates( candidates, + scan_generic_node( node, opnos, + rangeTableStack)); + } + } + } + break; + + /* if this case is reached, the variable is an index-candidate */ + case T_Var: + { + const Var* const expr = (const Var*)root; + List* rt = list_nth( rangeTableStack, expr->varlevelsup ); + const RangeTblEntry* rte = list_nth( rt, expr->varno - 1 ); + + /* only relations are source for index-candidates */ + if( rte->rtekind == RTE_RELATION ) + { + Relation base_rel = heap_open( rte->relid, AccessShareLock ); + + /* We do not support catalog tables and temporary tables */ + if( base_rel->rd_istemp != true + && !IsSystemRelation(base_rel) + /* and don't recommend indexes on hidden/system columns */ + && expr->varattno > 0 + /* and it should have at least two tuples */ + && base_rel->rd_rel->relpages > 1 + && base_rel->rd_rel->reltuples > 1 ) + { + /* create index-candidate and build a new list */ + int i; + IndexCandidate *cand = (IndexCandidate*)palloc0( + sizeof(IndexCandidate)); + + cand->varno = expr->varno; + cand->varlevelsup = expr->varlevelsup; + cand->ncols = 1; + cand->vartype[ 0 ] = expr->vartype; + cand->varattno[ 0 ] = expr->varattno; + for( i = 1; i < INDEX_MAX_KEYS; ++i ) cand->varattno[i] = 0; + cand->reloid = rte->relid; + cand->idxused = false; + + candidates = list_make1( cand ); + } + + heap_close( base_rel, AccessShareLock ); + } + } + break; + + /* subquery in where-clause */ + case T_SubLink: + { + /* convert it to sublink-expression */ + const SubLink* const expr = (const SubLink*)root; + + candidates = scan_generic_node( expr->subselect, opnos, + rangeTableStack ); + + /* scan lefthand expression (if any); [NOT] EXISTS do not have it */ + if( expr->testexpr ) + candidates = merge_candidates(candidates, + scan_generic_node( expr->testexpr, + opnos, + rangeTableStack)); + } + break; + + case T_RelabelType: + { + const RelabelType* const relabeltype = (const RelabelType*)root; + const Node* const node = (const Node*)relabeltype->arg; + + candidates = scan_generic_node( node, opnos, rangeTableStack ); + } + break; + + /* Query found */ + case T_Query: + { + const Query* const query = (const Query*)root; + + candidates = scan_query( query, opnos, rangeTableStack ); + } + break; + + /* ignore some types */ + case T_FuncExpr: + case T_Param: + case T_Const: + break; + + /* report non-considered parse-node types */ + default: + elog( NOTICE, "IND ADV: unhandled parse-node type: %d; Query: %s\n", + (int)nodeTag( root ), debug_query_string ); + break; + } + + elog( DEBUG3, "IND ADV: scan_generic_node: EXIT" ); + + return candidates; +} + + + +/** + * compare_candidates + */ +static int +compare_candidates( const IndexCandidate* _ic1, + const IndexCandidate* _ic2 ) +{ + int result = (signed int)_ic1->reloid - (signed int)_ic2->reloid; + + if( result == 0 ) + { + result = _ic1->ncols - _ic2->ncols; + + if( result == 0 ) + { + int i = 0; + + do + { + result = _ic1->varattno[ i ] - _ic2->varattno[ i ]; + ++i; + } while( ( result == 0 ) && ( i < _ic1->ncols ) ); + } + } + + return result; +} + +/** + * log_candidates + */ +static void +log_candidates( const char* prefix, List* list ) +{ + ListCell *cell; + StringInfoData str;/* output string */ + + /* don't do anything unless we are going to log it */ + if( log_min_messages < DEBUG1 ) + return; + + t_continue( tLogCandidates ); + + initStringInfo( &str ); + + foreach( cell, list ) + { + int i; + const IndexCandidate* const cand = (IndexCandidate*)lfirst( cell ); + + appendStringInfo( &str, " %d_(", cand->reloid ); + + for( i = 0; i < cand->ncols; ++i ) + appendStringInfo( &str, "%s%d", (i>0?",":""), cand->varattno[ i ] ); + + appendStringInfo( &str, ")%c", ((lnext( cell ) != NULL)?',':' ') ); + } + + elog( DEBUG1, "IND ADV: %s: |%d| {%s}", prefix, list_length(list), + str.len ? str.data : "" ); + + if( str.len > 0 ) pfree( str.data ); + + t_stop( tLogCandidates ); +} + +/** + * merge_candidates + * It builds new list out of passed in lists, and then frees the two lists + */ +static List* +merge_candidates( List* list1, List* list2 ) +{ + List *ret; + ListCell *cell1; + ListCell *cell2; + ListCell *prev2; + + if( list_length( list1 ) == 0 && list_length( list2 ) == 0 ) + return NIL; + + elog( DEBUG3, "IND ADV: merge_candidates: ENTER" ); + + /* list1 and list2 are sorted lists of candidates in ascending order */ + + elog( DEBUG1, "IND ADV: ---merge_candidates---" ); + log_candidates( "idxcd-list1", list1 ); + log_candidates( "idxcd-list2", list2 ); + + if( list_length( list1 ) == 0 ) + return list2; + + if( list_length( list2 ) == 0 ) + return list1; + + ret = NIL; + prev2 = NULL; + + for( cell1 = list_head(list1), cell2 = list_head(list2); + (cell1 != NULL) && (cell2 != NULL); + ) + { + const int cmp = compare_candidates( (IndexCandidate*)lfirst( cell1 ), + (IndexCandidate*)lfirst( cell2 ) ); + + if( cmp <= 0 ) + { + /* next candidate comes from list 1 */ + ret = lappend( ret, lfirst( cell1 ) ); + + cell1 = lnext( cell1 ); + + /* if we have found two identical candidates then we remove the + * candidate from list 2 + */ + if( cmp == 0 ) + { + ListCell *next = lnext( cell2 ); + + pfree( (IndexCandidate*)lfirst( cell2 ) ); + list2 = list_delete_cell( list2, cell2, prev2 ); + + cell2 = next; + } + } + else + { + /* next candidate comes from list 2 */ + ret = lappend( ret, lfirst( cell2 ) ); + + prev2 = cell2; + cell2 = lnext( cell2 ); + } + } + + for( ; cell1; cell1 = lnext(cell1) ) + ret = lappend( ret, lfirst(cell1) ); + + for( ; cell2; cell2 = lnext(cell2) ) + ret = lappend( ret, lfirst(cell2) ); + + list_free( list1 ); + list_free( list2 ); + + log_candidates( "merged-list", ret ); + + elog( DEBUG3, "IND ADV: merge_candidates: EXIT" ); + + return ret; +} + +/** + * build_composite_candidates. + * + * @param [IN] list1 is a sorted list of candidates in ascending order. + * @param [IN] list2 is a sorted list of candidates in ascending order. + * + * @returns A new sorted list containing composite candidates. + */ +static List* +build_composite_candidates( List* list1, List* list2 ) +{ + ListCell *cell1 = list_head( list1 ); + ListCell *cell2 = list_head( list2 ); + IndexCandidate *cand1; + IndexCandidate *cand2; + + List* compositeCandidates = NIL; + + elog( DEBUG3, "IND ADV: build_composite_candidates: ENTER" ); + + if( cell1 == NULL || cell2 == NULL ) + goto DoneCleanly; + + elog( DEBUG1, "IND ADV: ---build_composite_candidates---" ); + log_candidates( "idxcd-list1", list1 ); + log_candidates( "idxcd-list2", list2 ); + + /* build list with composite candiates */ + while( ( cell1 != NULL ) && ( cell2 != NULL ) ) + { + int cmp ; + + cand1 = ((IndexCandidate*)lfirst( cell1 )); + cand2 = ((IndexCandidate*)lfirst( cell2 )); + + cmp = cand1->reloid - cand2->reloid; + + if( cmp != 0 ) + { + Oid relOid; + + if( cmp < 0 ) + { + /* advance in list 1 */ + relOid = cand2->reloid; + + do + cell1 = lnext( cell1 ); + while( cell1 != NULL && (relOid > cand1->reloid)); + } + else + { + /* advance in list 2 */ + relOid = cand1->reloid; + + do + cell2 = lnext( cell2 ); + while( cell2 != NULL && ( relOid > cand2->reloid )); + } + } + else + { + /* build composite candidates */ + Oid relationOid = ((IndexCandidate*)lfirst(cell1))->reloid; + ListCell* l1b; + + do + { + cand2 = lfirst( cell2 ); + + l1b = cell1; + do + { + cand1 = lfirst( l1b ); + + /* do not build a composite candidate if the number of + * attributes would exceed INDEX_MAX_KEYS + */ + if( ( cand1->ncols + cand2->ncols ) < INDEX_MAX_KEYS ) + { + + /* test that both candidates do not have any equal + * attributes + * result: i1 == cand1->ncols, if the test succeded + * i1 > cand1->ncols, if the test failed + * (correct: i1==1+cand1->ncols) + */ + int i1, i2; + bool foundCommon = false; + + for(i1 = 0; i1 < cand1->ncols && !foundCommon; ++i1) + for(i2 = 0; i2 < cand2->ncols && !foundCommon; ++i2) + if(cand1->varattno[i1] == cand2->varattno[i2]) + foundCommon = true; + + /* build composite candidates if the previous test + * succeeded + */ + if( !foundCommon ) + { + /* composite candidate 1 is a combination of + * candidates 1,2 AND + * composite candidate 2 is a combination of + * candidate 2,1 + */ + IndexCandidate* cic1 + = (IndexCandidate*)palloc( + sizeof(IndexCandidate)); + IndexCandidate* cic2 + = (IndexCandidate*)palloc( + sizeof(IndexCandidate)); + + /* init some members of composite candidate 1 */ + cic1->varno = -1; + cic1->varlevelsup = -1; + cic1->ncols = cand1->ncols + cand2->ncols; + cic1->reloid = relationOid; + cic1->idxused = false; + + /* init some members of composite candidate 2 */ + cic2->varno = -1; + cic2->varlevelsup = -1; + cic2->ncols = cand1->ncols + cand2->ncols; + cic2->reloid = relationOid; + cic2->idxused = false; + + /* copy attributes of candidate 1 to attributes of + * composite candidates 1,2 + */ + for( i1 = 0; i1 < cand1->ncols; ++i1) + { + cic1->vartype[ i1 ] + = cic2->vartype[cand2->ncols + i1] + = cand1->vartype[ i1 ]; + + cic1->varattno[ i1 ] + = cic2->varattno[cand2->ncols + i1] + = cand1->varattno[ i1 ]; + } + + /* copy attributes of candidate 2 to attributes of + * composite candidates 2,1 + */ + for( i1 = 0; i1 < cand2->ncols; ++i1) + { + cic1->vartype[cand1->ncols + i1] + = cic2->vartype[ i1 ] + = cand2->vartype[ i1 ]; + + cic1->varattno[cand1->ncols + i1] + = cic2->varattno[ i1 ] + = cand2->varattno[ i1 ]; + } + + /* set remaining attributes to null */ + for( i1 = cand1->ncols + cand2->ncols; + i1 < INDEX_MAX_KEYS; + ++i1 ) + { + cic1->varattno[ i1 ] = 0; + cic2->varattno[ i1 ] = 0; + } + + /* add new composite candidates to list */ + signed int cmp = compare_candidates(cic1, cic2); + + if( cmp == 0 ) + { + compositeCandidates = + merge_candidates( list_make1( cic1 ), + compositeCandidates ); + pfree( cic2 ); + } + else + { + List* l; + + if( cmp < 0 ) + l = lcons( cic1, list_make1( cic2 ) ); + else + l = lcons( cic2, list_make1( cic1 ) ); + + compositeCandidates = + merge_candidates(l, compositeCandidates); + } + } + } + + l1b = lnext( l1b ); + + } while( ( l1b != NULL ) && + ( relationOid == ((IndexCandidate*)lfirst( l1b ))->reloid)); + + cell2 = lnext( cell2 ); + + } while( ( cell2 != NULL ) && + ( relationOid == ((IndexCandidate*)lfirst( cell2 ))->reloid ) ); + cell1 = l1b; + } + } + + log_candidates( "composite-l", compositeCandidates ); + +DoneCleanly: + elog( DEBUG3, "IND ADV: build_composite_candidates: EXIT" ); + + return compositeCandidates; +} + +#if CREATE_V_INDEXES +/** + * create_virtual_indexes + * creates an index for every entry in the index-candidate-list. + * + * It may delete some candidates from the list passed in to it. + */ +static List* +create_virtual_indexes( List* candidates ) +{ + ListCell *cell; /* an entry from the candidate-list */ + ListCell *prev, *next; /* for list manipulation */ + char idx_name[ 16 ]; /* contains the name of the current index */ + int idx_count = 0; /* number of the current index */ + IndexInfo* indexInfo; + Oid op_class[INDEX_MAX_KEYS];/* needed for creating indexes */ + + elog( DEBUG3, "IND ADV: create_virtual_indexes: ENTER" ); + + /* fill index-info */ + indexInfo = makeNode( IndexInfo ); + + indexInfo->ii_Expressions = NIL; + indexInfo->ii_ExpressionsState = NIL; + indexInfo->ii_Predicate = NIL; + indexInfo->ii_PredicateState = NIL; + indexInfo->ii_Unique = false; + indexInfo->ii_Concurrent = true; + + /* create index for every list entry */ + for( prev = NULL, cell = list_head(candidates); + (cell && (next = lnext(cell))) || cell != NULL; + cell = next) + { + int i; + + IndexCandidate* const cand = (IndexCandidate*)lfirst( cell ); + + indexInfo->ii_NumIndexAttrs = cand->ncols; + + for( i = 0; i < cand->ncols; ++i ) + { + /* prepare op_class[] */ + op_class[i] = GetDefaultOpClass( cand->vartype[ i ], BTREE_AM_OID ); + + if( op_class[i] == InvalidOid ) + /* don't create this index if couldn't find a default operator*/ + break; + + /* ... and set indexed attribute number */ + indexInfo->ii_KeyAttrNumbers[i] = cand->varattno[i]; + } + + /* if we decided not to create the index above, try next candidate */ + if( i < cand->ncols ) + { + candidates = list_delete_cell( candidates, cell, prev ); + continue; + } + + /* generate indexname */ + sprintf( idx_name, "idx_adv_%d", idx_count ); + + /* create the index without data */ + cand->idxoid = index_create( cand->reloid, idx_name, + InvalidOid, indexInfo, BTREE_AM_OID, + InvalidOid, op_class, NULL, (Datum)0, + false, false, false, true, true ); + + elog( DEBUG1, "IND ADV: virtual index created: oid=%d name=%s size=%d", + cand->idxoid, idx_name, cand->pages ); + + /* increase count for the next index */ + ++idx_count; + prev = cell; + } + + pfree( indexInfo ); + + /* do CCI to make the new metadata changes "visible" */ + CommandCounterIncrement(); + + elog( DEBUG3, "IND ADV: create_virtual_indexes: EXIT" ); + + return candidates; +} +#endif + +#if CREATE_V_INDEXES +/** + * drop_virtual_indexes + * drops all virtual-indexes + */ +static void +drop_virtual_indexes( List* candidates ) +{ + ListCell* cell; /* a entry from the index-candidate-list */ + + elog( DEBUG3, "IND ADV: drop_virtual_indexes: ENTER" ); + + /* drop index for every listentry */ + foreach( cell, candidates ) + { + /* TODO: have a look at implementation of index_drop! citation: + * "NOTE: this routine should now only be called through + * performDeletion(), else associated dependencies won't be cleaned up." + */ + + /* disabling index_drop() call, since it acquires AccessExclusiveLock + * on the base table, and hence causing a deadlock when multiple + * clients are running the same query + */ + +/* IndexCandidate* cand = (IndexCandidate*)lfirst( cell ); + + index_drop( cand->idxoid ); + elog( DEBUG1, "IND ADV: virtual index dropped: oid=%d", cand->idxoid ); +*/ } + + /* do CCI to make the new metadata changes "visible" */ + CommandCounterIncrement(); + + elog( DEBUG3, "IND ADV: drop_virtual_indexes: EXIT" ); +} +#endif + +static int4 +estimate_index_pages(Oid rel_oid, Oid ind_oid ) +{ + Size data_length; + int i; + int natts; + int2 var_att_count; + int4 rel_pages; /* diskpages of heap relation */ + float4 rel_tuples; /* tupes in the heap relation */ + double idx_pages; /* diskpages in index relation */ + + TupleDesc ind_tup_desc; + Relation base_rel; + Relation index_rel; + Form_pg_attribute *atts; + + base_rel = heap_open( rel_oid, AccessShareLock ); + index_rel = index_open( ind_oid, AccessShareLock ); + + rel_pages = base_rel->rd_rel->relpages; + rel_tuples = base_rel->rd_rel->reltuples; + + ind_tup_desc = RelationGetDescr( index_rel ); + + atts = ind_tup_desc->attrs; + natts = ind_tup_desc->natts; + + /* + * These calculations are heavily borrowed from index_form_tuple(), and + * heap_compute_data_size(). The only difference is that, that they have a + * real tuple being inserted, and hence all the VALUES are available, + * whereas, we don't have any of them available here. + */ + + /* + * First, let's calculate the contribution of fixed size columns to the size + * of index tuple + */ + var_att_count = 0; + data_length = 0; + for( i = 0; i < natts; ++i) + { + /* the following is based on att_addlength() macro */ + if( atts[i]->attlen > 0 ) + { + /* no +=; RHS is incrementing data_length by including it in the sum */ + data_length = att_align_nominal(data_length, atts[i]->attalign); + + data_length += atts[i]->attlen; + } + else + if( atts[i]->attlen == -1 ) + { + data_length += atts[i]->atttypmod + VARHDRSZ; + } + else + { /* null terminated data */ + Assert( atts[i]->attlen == -2 ); + ++var_att_count; + } + } + + /* + * Now, estimate the average space occupied by variable-length columns, per + * tuple. This is calculated as: + * Total 'available' space + * minus space consumed by ItemIdData + * minus space consumed by fixed-length columns + */ + if( var_att_count ) + data_length += (((float)rel_pages * (BLCKSZ - (sizeof(PageHeaderData) + - sizeof(ItemIdData) + ) ) ) + - (rel_tuples * sizeof(ItemIdData)) + - (data_length * rel_tuples) + ) + /rel_tuples; + + /* Take into account the possibility that we might have NULL values */ + data_length += IndexInfoFindDataOffset( INDEX_NULL_MASK ); + + idx_pages = (rel_tuples * (data_length + sizeof(ItemIdData))) + /((BLCKSZ - SizeOfPageHeaderData + - sizeof(BTPageOpaqueData) + ) + * ((float)BTREE_DEFAULT_FILLFACTOR/100)); + + idx_pages = ceil( idx_pages ); + + heap_close( base_rel, AccessShareLock ); + index_close( index_rel, AccessShareLock ); + + return (int4)idx_pages; +} diff --git a/index_adviser/index_adviser.h b/index_adviser/index_adviser.h new file mode 100644 index 0000000..8b7b07c --- /dev/null +++ b/index_adviser/index_adviser.h @@ -0,0 +1,41 @@ +/*------------------------------------------------------------------------- + * + * index_advisor.h + * Prototypes for indexadvisor.h + * + *------------------------------------------------------------------------- + */ + +#ifndef INDEX_ADVISER_H +#define INDEX_ADVISER_H 1 + + +#include "postgres.h" + +#include "nodes/print.h" +#include "parser/parsetree.h" +#include "catalog/namespace.h" +#include "executor/executor.h" + +typedef struct { + + Index varno; /* index into the rangetable */ + Index varlevelsup; /* points to the correct rangetable */ + int2 ncols; /* number of indexed columns */ + Oid vartype[INDEX_MAX_KEYS];/* type of the column(s) */ + AttrNumber varattno[INDEX_MAX_KEYS];/* attribute number of the column(s) */ + Oid reloid; /* the table oid */ +//TODO1 remove this member + Oid idxoid; /* the virtual index oid */ + BlockNumber pages; /* the estimated size of index */ + bool idxused; /* was this used by the planner? */ + float4 profit; /* profit made by using this cand */ + +} IndexCandidate; + +extern void _PG_init(void); +extern void _PG_fini(void); + +#define compile_assert(x) extern int _compile_assert_array[(x)?1:-1] + +#endif /* INDEX_ADVISER_H */ diff --git a/pg_advise/Makefile b/pg_advise/Makefile new file mode 100644 index 0000000..2574ae8 --- /dev/null +++ b/pg_advise/Makefile @@ -0,0 +1,12 @@ + +subdir = contrib/pgadviser/pg_advise +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +PROGRAM = pg_advise +OBJS = advise_index.o util_funcs.o + +PG_CPPFLAGS = -I$(libpq_srcdir) +PG_LIBS = $(libpq) + +include $(top_srcdir)/contrib/contrib-global.mk diff --git a/pg_advise/advise_index.c b/pg_advise/advise_index.c new file mode 100644 index 0000000..76dee63 --- /dev/null +++ b/pg_advise/advise_index.c @@ -0,0 +1,452 @@ +/* + * pg_advise: the frontend to the indexadvisor + * + */ + +#include +#include +#include +#include +#include + +#include "libpq-fe.h" +#include "advise_index.h" + +#define ADV_MAX_COLS 32 + +static PGconn *init_connection(const char *dbname, const char *host, int port, + const char *user, const char *password) +{ + char conn_str[1024]; + PGconn *conn; + + sprintf(conn_str, "dbname = %s host = %s port = %d user = %s password = %s" + " options = '-c local_preload_libraries=libpg_index_adviser'", + dbname, host, port, user, password); + + conn = PQconnectdb(conn_str); + + if (PQstatus(conn) != CONNECTION_OK) + { + fprintf(stderr, "ERROR: %s", PQerrorMessage(conn)); + return NULL; + } + + return conn; +} + +static int prepare_advisor(PGconn *conn) +{ +#if 0 + PGresult *res; + + res = PQexec(conn, "SET enable_advise_index TO true"); + + if (res == NULL) + { + fprintf(stderr, "%s", PQerrorMessage(conn)); + return -1; + } + + res = PQexec(conn, + "DELETE FROM advise_index WHERE backend_pid = pg_backend_pid()"); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "ERROR: %s", PQerrorMessage(conn)); + return -1; + } + else + PQclear(res); +#endif + return 0; +} + +static int analyse_workload(PGconn *conn, FILE *file) +{ + PGresult *res; + char *query = NULL; + char line[1024]; + + printf("Analyzing queries "); + + for(;;) + { + if (fgets(line, 1024, file) == NULL) + break; + if (query == NULL) + { + query = (char *)malloc(10*1024); + strcpy(query, "EXPLAIN "); + strcat(query, line); + } + else + { + if (strlen(query) + strlen(line) > 10*1024) + { + fprintf(stderr, "ERROR: Query string too long.\n"); + return -1; + } + + strcat(query, line); + } + + if (strchr(query, ';') != NULL) + { + // printf("query \#%d: %s\n", ++lno, query); + res = PQexec(conn, query); + printf("."); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "ERROR: %s", PQerrorMessage(conn)); + return -1; + } + else + PQclear(res); + + free(query); + query = NULL; + } + } + printf(" done.\n"); + return 0; +} + +static int read_advisor_output(PGconn *conn, AdvIndexList *index_list) +{ + PGresult *res; + int i; + int num_indexes = 0; + char stmt[1024]; + + res = PQexec(conn, "BEGIN"); + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "ERROR: BEGIN failed:\n %s", PQerrorMessage(conn)); + PQclear(res); + return 0; + } + + snprintf(stmt, sizeof(stmt), + "SELECT c.relname," + "attrs AS colids," + "MAX(index_size) AS size_in_pages," + "SUM(profit) AS benefit," + "SUM(profit)/MAX(index_size) AS gain " + "FROM advise_index a," + "pg_class c " + "WHERE a.backend_pid = pg_backend_pid() " + "AND a.reloid = c.oid " + "GROUP BY c.relname, colids " + "ORDER BY gain" + " DESC"); + + res = PQexec(conn, stmt); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "ERROR: %s", PQerrorMessage(conn)); + PQclear(res); + return 0; + } + + *index_list = (AdvIndexInfo **)malloc(PQntuples(res)*sizeof(AdvIndexInfo*)); + + for (i = 0; i < PQntuples(res); ++i) + { + AdvIndexInfo *index = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo)); + + index->table = strdup(PQgetvalue( res, i, 0)); + index->col_ids = strdup(PQgetvalue( res, i, 1)); + /* + * size returned by the query is in number of pages. + * TODO: change the backend to dump size in KBs. Done. + */ + index->size = atol(PQgetvalue( res, i, 2)); + index->benefit = atof(PQgetvalue( res, i, 3)); + index->used = false; + + (*index_list)[i] = index; + + printf("size = %d KB, benefit = %f\n", index->size, index->benefit); + ++num_indexes; + } + + PQclear(res); + res = PQexec(conn, "END"); + PQclear(res); + + return num_indexes; +} + +static char* get_column_names(PGconn *conn, const char *table, char *column_ids) +{ + PGresult *res; + int len, colno; + char stmt[512]; + char *idxdef; + char *colnames[ADV_MAX_COLS] = {0}; + char *tok; + + res = PQexec(conn, "BEGIN"); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + { + fprintf(stderr, "ERROR: BEGIN failed.\n %s", PQerrorMessage(conn)); + PQclear(res); + return NULL; + } + + for( colno = 0, tok = strtok(column_ids, "{,}"); + tok; + ++colno, tok = strtok(NULL, "{,}")) + { + snprintf(stmt, sizeof(stmt), + "SELECT a.attname," + "a.attnum " + "FROM pg_class c," + "pg_attribute a " + "WHERE c.relname = '%s' " + "AND a.attrelid = c.oid " + "AND a.attnum = %s", + table, tok); + + res = PQexec(conn, stmt); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + { + fprintf(stderr, "ERROR: %s", PQerrorMessage(conn)); + PQclear(res); + return NULL; + } + + if (PQntuples(res) != 1) + { + fprintf(stderr, "ERROR: an internal query failed at %s:%d.", + __FILE__, __LINE__ ); + PQclear(res); + return NULL; + } + + colnames[colno] = strdup(PQgetvalue(res, 0, 0)); + } + + res = PQexec(conn, "END"); + PQclear(res); + + len = 1; /* 1 for null terminator */ + for (colno = 0; colno < ADV_MAX_COLS && colnames[colno]; ++colno) + { + len += strlen(colnames[colno]); + if (colno > 0) len += 1; /* for a ',' */ + } + + idxdef = (char *)malloc(len); + idxdef[0] = '\0'; + + for (colno = 0; colno < ADV_MAX_COLS && colnames[colno]; ++colno) + { + if (colno > 0) strcat(idxdef, ","); + strcat(idxdef, colnames[colno]); + free(colnames[colno]); + } + + return idxdef; +} + +static void output_recommendation(PGconn *conn, AdvIndexList index_list, + int len, FILE *sqlfile) +{ + int i; + long size = 0; + + for (i = 0; i < len; ++i) + { + AdvIndexInfo *info = index_list[i]; + + if (!info->used) + continue; + + char *idxdef = get_column_names(conn, info->table, info->col_ids); + + printf("/* %d. %s(%s): size=%d KB, profit=%.2f */\n", + i+1, info->table, idxdef, info->size, info->benefit); + + size += info->size; + + if (sqlfile) + fprintf(sqlfile, "create index idx_%s_%d on %s (%s);\n", + info->table, i+1, info->table, idxdef); + free(idxdef); + } + + printf("/* Total size = %ldKB */\n", size); +} + +static void usage() +{ + puts("This is pg_advise_index, the PostgreSQL index advisor frontend.\n"); + puts("Usage:\n\tadvise_index [options] [workload file]\n"); + puts("Options:"); + puts("\t-d DBNAME specify database name to connect to"); + puts("\t-h HOSTNAME database server host or socket directory " + "(default: \"local socket\")"); + puts("\t-p PORT database server port"); + puts("\t-U NAME database user name"); + puts("\t-o FILENAME name of output file for create index statements"); + puts("\t-s SIZE specify max size of space to be used for indexes " + "(in bytes, opt. with G, M or K)"); +} + +/* return the size (-s option) converted into KBs */ +static long strtosize(const char *s) +{ + long size = 0; + char l = s[strlen(s) - 1]; + + if (l == 'G' || l == 'M' || l == 'k' || l == 'K') + { + char *ns = (char *)malloc(strlen(s) - 1); + strncpy(ns, s, strlen(s) - 1); + size = atol(ns); + free(ns); + } + + switch(l) + { + case 'G': + size *= 1024; + case 'M': + size *= 1024; + case 'k': + case 'K': + break; + default: + size /= 1024; + } + + return size; +} + +int main(int argc, char **argv) +{ + char *dbname = NULL, + *host = NULL, + *user = NULL, + *password = NULL; + + int port = 5432; + PGconn *conn; + long pool_size = 0; + FILE *workload = stdin, + *sqlfile = NULL; + int i, + num_indexes; + char *output_filename = NULL; + + AdvIndexList suggested_indexes; + + /* check arguments */ + int ch; + + while ((ch = getopt(argc, argv, "d:h:p:U:s:o:W:")) != -1) + switch(ch) + { + case 'd': /* database name */ + dbname = optarg; + break; + case 'h': /* database server host */ + host = optarg; + break; + case 'p': /* port */ + port = atoi(optarg); + break; + case 'U': /* username */ + user = optarg; + break; + case 's': /* index pool size */ + pool_size = strtosize(optarg); + printf("poolsize = %ld KB\n", pool_size); + break; + case 'o': /* output file */ + output_filename = optarg; + break; + case 'W': /* TODO: prompt for password */ + break; + case '?': + usage(); + return 0; + break; + default: + usage(); + return 1; + break; + } + + if (dbname == NULL || user == NULL) + { + usage(); + return 1; + } + + argc -= optind; + argv += optind; + + if (argc == 1) + { + workload = fopen(argv[argc-1], "r"); + + if (workload == NULL) + { + fprintf(stderr, "ERROR: cannot open file %s\n", argv[argc-1]); + return 1; + } + + printf("load workload from file '%s'\n", argv[argc-1]); + } + + /* connect to the backend */ + conn = init_connection(dbname, host, port, user, password); + + if (conn == NULL) + return 1; + + if (prepare_advisor(conn) != 0) + { + fprintf(stderr, "ERROR: this PostgreSQL server doesn't support the " + "index advisor.\n"); + PQfinish(conn); + return 1; + } + + analyse_workload(conn, workload); + + if (workload != stdin) + fclose(workload); + + num_indexes = read_advisor_output(conn, &suggested_indexes); + + if (pool_size > 0 && + compute_config_size(suggested_indexes, num_indexes) > pool_size) + { + find_optimal_configuration_greedy(suggested_indexes, num_indexes, + pool_size / 4); + } + else + { + for (i = 0; i < num_indexes; ++i) + suggested_indexes[i]->used = true; + } + + if (output_filename != NULL) + sqlfile = fopen(output_filename, "w"); + else + sqlfile = stdout; + + output_recommendation(conn, suggested_indexes, num_indexes, sqlfile); + + if (output_filename != NULL) + fclose(sqlfile); + + PQfinish(conn); + + return 0; +} diff --git a/pg_advise/advise_index.h b/pg_advise/advise_index.h new file mode 100644 index 0000000..c1bb987 --- /dev/null +++ b/pg_advise/advise_index.h @@ -0,0 +1,27 @@ + +#ifndef ADVISE_INDEX_H +#define ADVISE_INDEX_H + +typedef unsigned char bool; +#define true 1 +#define false 0 + +typedef struct { + char *table; + char *col_ids; /* space saparated column numbers */ + int size; /* in KBs */ + double benefit; + bool used; +} AdvIndexInfo; + +typedef AdvIndexInfo** AdvIndexList; + +extern long compute_config_size(AdvIndexList index_list, int len); + +extern void find_optimal_configuration_greedy(AdvIndexList index_list, int len, + long size_limit); + +extern void find_optimal_configuration_dp(AdvIndexList index_list, int len, + long size_limit); + +#endif /* ADVISE_INDEX_H */ diff --git a/pg_advise/sample_advise_index_session.txt b/pg_advise/sample_advise_index_session.txt new file mode 100644 index 0000000..0714482 --- /dev/null +++ b/pg_advise/sample_advise_index_session.txt @@ -0,0 +1,24 @@ + +$ cat workload.sql +select * from t where a = 100; +select * from t where b = 100; +select * from t where a = 100 and b = 100; +select * from t where a = 100 or b = 100; +select * from t1 where a = 100; +select * from t1 where b = 100; +select * from t1 where a = 100 and b = 100; +select * from t1 where a = 100 or b = 100; +select * from t, t1 where t.a = 100 and t1.a = 100 or t.b = 100 and t1.b = 100; + +$ db/bin/pg_advise_index.exe -d postgres -h localhost -p 5432 -U test -o create_index.sql workload.sql +load workload from file 'workload.sql' +Analyzing queries ......... done. +size = 2608 KB, benefit = 184483000.000000 +size = 2608 KB, benefit = 184481000.000000 +/* 1. t(b): size=2608 KB, profit=184483000.00 */ +/* 2. t(a): size=2608 KB, profit=184481000.00 */ +/* Total size = 5216KB */ + +$ cat create_index.sql +create index idx_t_1 on t (b); +create index idx_t_2 on t (a); diff --git a/pg_advise/util_funcs.c b/pg_advise/util_funcs.c new file mode 100644 index 0000000..76a868b --- /dev/null +++ b/pg_advise/util_funcs.c @@ -0,0 +1,110 @@ +/* + * util_funcs.c + * + */ + +#include +#include +#include "advise_index.h" + +long compute_config_size(AdvIndexList index_list, int len) +{ + int size = 0; + int i; + + for (i = 0; i < len; ++i) + size += index_list[i]->size; + + return size; +} + +/* Note: it sets the 'used' member */ +void find_optimal_configuration_greedy(AdvIndexList index_list, int len, + long size_limit) +{ + int current_size = 0; + double sum_benefit = 0.0; + int i = 0; + + for (i = 0; i < len && current_size <= size_limit; ++i) + if (current_size + index_list[i]->size <= size_limit) + { + index_list[i]->used = true; + sum_benefit += index_list[i]->benefit; + current_size += index_list[i]->size; + } +} + +/* TODO: not called from anywhere. Find a use for it */ +void find_optimal_configuration_dp(AdvIndexList index_list, int len, + long size_limit) +{ + int **cost; + int w, i; + + printf("w = %ld\n", size_limit); + cost = (int **)malloc(sizeof(int) * (len + 1)); + + for (i = 0; i < len+1; ++i) + cost[i] = (int *)malloc(sizeof(int *) * (size_limit * 1)); + for (w = 0; w <= size_limit; ++w) + cost[0][w] = 0; + for (i = 1; i <= len; ++i) + { + cost[i][0] = 0; + + for (w = 1; w <= size_limit; ++w) + { + if (index_list[i-1]->size <= w) + { + if (index_list[i-1]->benefit + + cost[i-1][w - index_list[i-1]->size] + > cost[i-1][w]) + { + cost[i][w] = index_list[i-1]->benefit + + cost[i-1][w - index_list[i-1]->size]; + } + else + cost[i][w] = cost[i-1][w]; + } + else + cost[i][w] = cost[i-1][w]; + } + } + + for ( i = len, w = size_limit; + i > 0 && w > 0; + --i) + { + if (cost[i][w] != cost[i-1][w]) + { + index_list[i-1]->used = true; + w -= index_list[i-1]->size; + } + } + /* + for (i = 0; i < len+1; ++i) + free(cost[i]); + free(cost); + */ +} + +#if DEBUG +void test_optimize() { + AdvIndexList conf = (AdvIndexInfo **)malloc(3 * sizeof(AdvIndexInfo *)); + + conf[0] = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo)); + conf[0]->benefit = 60; + conf[0]->size = 1; + conf[0]->used = false; + conf[1] = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo)); + conf[1]->benefit = 100; + conf[1]->size = 2; + conf[1]->used = false; + conf[2] = (AdvIndexInfo *)malloc(sizeof(AdvIndexInfo)); + conf[2]->benefit = 120; + conf[2]->size = 3; + conf[2]->used = false; + optimize_indexset(conf, 3, 5); +} +#endif diff --git a/resources/Makefile b/resources/Makefile new file mode 100644 index 0000000..1c09a91 --- /dev/null +++ b/resources/Makefile @@ -0,0 +1,14 @@ + +DATA = index_advisory.create.sql \ + show_index_advisory.create.sql \ + select_index_advisory.create.sql + +ifdef USE_PGXS +PGXS := $(shell pg_config --pgxs) +include $(PGXS) +else +subdir = contrib/pgadviser/resources +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/resources/index_advisory.create.sql b/resources/index_advisory.create.sql new file mode 100644 index 0000000..1317107 --- /dev/null +++ b/resources/index_advisory.create.sql @@ -0,0 +1,10 @@ + +create table index_advisory( reloid oid, + attrs integer[], + profit real, + index_size integer, + backend_pid integer, + timestamp timestamptz); + +create index IA_reloid on index_advisory( reloid ); +create index IA_backend_pid on index_advisory( backend_pid ); diff --git a/resources/sample_error_messages.sql b/resources/sample_error_messages.sql new file mode 100644 index 0000000..2ef008d --- /dev/null +++ b/resources/sample_error_messages.sql @@ -0,0 +1,80 @@ + +\c postgres test + +drop table if exists advise_index; +drop view if exists advise_index; + +/* create the advise_index same as provided in the contrib module */; +create table advise_index( reloid oid, attrs integer[], profit real, + index_size integer, backend_pid integer, + timestamp timestamptz); + +/* set the client to see the log messages generated by the Adviser */; +set client_min_messages to log; + +/* As expected, the EXPLAIN will work */; +explain select * from t where a = 100; + +select * from advise_index; + +/* Now lets drop the advise_index and see what ERROR it throws */; +drop table if exists advise_index; +drop view if exists advise_index; + +explain select * from t where a = 100; + +/* create another object by the same name (in the same namespace) */; +create index advise_index on t1(a); + +/* advise_index does exist, but its not a table or view! */; +explain select * from t where a = 100; + +/* now create a table named advise_index, but with a different signature! */; +drop index advise_index; + +create table advise_index(a int); + +/* This ERROR comes from the executor, but we still see our DETAIL and HINT */; +explain select * from t where a = 100; + +/* create a table with same signature but different name */; +drop table if exists advise_index; +drop view if exists advise_index; + +drop table if exists advise_index_data cascade; + +create table advise_index_data( reloid oid, attrs integer[], profit real, + index_size integer, backend_pid integer, + timestamp timestamptz); + +/* and a view on that table */; +create view advise_index as select * from advise_index_data; + +/* now try to insert into the view, and notice the ERROR, DETAIL and HINT from executor */; +explain select * from t where a = 100; + +/* now create a RULE on the view that redirects the INSERTs into the table */; +create or replace rule advise_index_insert as +ON INSERT to advise_index +do instead +INSERT into advise_index_data values (new.reloid, new.attrs, new.profit, + new.index_size, new.backend_pid, + new.timestamp) ; + +/* and voila, (internal) INSERT into the view succeeds! */; +explain select * from t where a = 100; + +/* Now, lets try what happens under a read-only transaction */; +begin; + +set transaction_read_only=t; + +show transaction_read_only; + +explain select * from t where a = 100; + +end; + +select * from advise_index; + +select * from advise_index_data; diff --git a/resources/sample_error_messages.txt b/resources/sample_error_messages.txt new file mode 100644 index 0000000..db6dc77 --- /dev/null +++ b/resources/sample_error_messages.txt @@ -0,0 +1,192 @@ + +postgres=> \c postgres test +You are now connected to database "postgres". +postgres=> +postgres=> drop table if exists advise_index; +NOTICE: table "advise_index" does not exist, skipping +DROP TABLE +postgres=> drop view if exists advise_index; +NOTICE: view "advise_index" does not exist, skipping +DROP VIEW +postgres=> +postgres=> /* create the advise_index same as provided in the contrib module */; +postgres=> create table advise_index( reloid oid, attrs integer[], profit real, +postgres(> index_size integer, backend_pid integer, +postgres(> timestamp timestamptz); +CREATE TABLE +postgres=> +postgres=> /* set the client to see the log messages generated by the Adviser */; +postgres=> set client_min_messages to log; +SET +postgres=> +postgres=> /* As expected, the EXPLAIN will work */; +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + + QUERY PLAN +---------------------------------------------------- + Seq Scan on t (cost=0.00..1791.00 rows=1 width=8) + Filter: (a = 100) +(2 rows) + +postgres=> +postgres=> select * from advise_index; + reloid | attrs | profit | index_size | backend_pid | timestamp +--------+-------+---------+------------+-------------+------------------------------- + 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:53.593+05:30 +(1 row) + +postgres=> +postgres=> /* Now lets drop the advise_index and see what ERROR it throws */; +postgres=> drop table if exists advise_index; +DROP TABLE +postgres=> drop view if exists advise_index; +NOTICE: view "advise_index" does not exist, skipping +DROP VIEW +postgres=> +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + +ERROR: relation "advise_index" does not exist. +DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction. +HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module. +postgres=> +postgres=> /* create another object by the same name (in the same namespace) */; + +postgres=> create index advise_index on t1(a); +CREATE INDEX +postgres=> +postgres=> /* advise_index does exist, but its not a table or view! */; +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + +ERROR: "advise_index" is not a table or view. +DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction. +HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module. +postgres=> +postgres=> /* now create a table named advise_index, but with a different signature! */; +postgres=> drop index advise_index; +DROP INDEX +postgres=> +postgres=> create table advise_index(a int); +CREATE TABLE +postgres=> +postgres=> /* This ERROR comes from the executor, but we still see our DETAIL and HINT */; +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + +ERROR: INSERT has more expressions than target columns +DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction. +HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module. +CONTEXT: SQL statement "insert into "advise_index" values( 16395, array[1], 1782.719482, 2608, 5256, now());" +postgres=> +postgres=> /* create a table with same signature but different name */; +postgres=> drop table if exists advise_index; +DROP TABLE +postgres=> drop view if exists advise_index; +NOTICE: view "advise_index" does not exist, skipping +DROP VIEW +postgres=> +postgres=> drop table if exists advise_index_data cascade; +NOTICE: table "advise_index_data" does not exist, skipping +DROP TABLE +postgres=> +postgres=> create table advise_index_data( reloid oid, attrs integer[], profit real, +postgres(> index_size integer, backend_pid integer, +postgres(> timestamp timestamptz); +CREATE TABLE +postgres=> +postgres=> /* and a view on that table */; +postgres=> create view advise_index as select * from advise_index_data; +CREATE VIEW +postgres=> +postgres=> /* now try to insert into the view, and notice the ERROR, DETAIL and HINT from executor */; +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + +ERROR: cannot insert into a view +DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction. +HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module. +CONTEXT: SQL statement "insert into "advise_index" values( 16395, array[1], 1782.719482, 2608, 5256, now());" +postgres=> +postgres=> /* now create a RULE on the view that redirects the INSERTs into the +table */; +postgres=> create or replace rule advise_index_insert as +postgres-> ON INSERT to advise_index +postgres-> do instead +postgres-> INSERT into advise_index_data values (new.reloid, new.attrs, new.profit, +postgres(> new.index_size, new.backend_pid, +postgres(> new.timestamp) ; +CREATE RULE +postgres=> +postgres=> /* and voila, (internal) INSERT into the view succeeds! */; +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + + QUERY PLAN +---------------------------------------------------- + Seq Scan on t (cost=0.00..1791.00 rows=1 width=8) + Filter: (a = 100) +(2 rows) + +postgres=> +postgres=> /* Now, lets try what happens under a read-only transaction */; +postgres=> begin; +BEGIN +postgres=> +postgres=> set transaction_read_only=t; +SET +postgres=> +postgres=> show transaction_read_only; + transaction_read_only +----------------------- + on +(1 row) + +postgres=> +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + +ERROR: transaction is read-only +DETAIL: Index Adviser uses "advise_index" table to store it's advisory. You should have INSERT permissions on a table or an (INSERT-able) view named "advise_index". Also, make sure that you are not running the Index Adviser under a read-only transaction. +HINT: Please create the "advise_index" table using the script provided in pg_advise_index contrib module. +CONTEXT: SQL statement "insert into "advise_index" values( 16395, array[1], 1782.719482, 2608, 5256, now());" +postgres=> +postgres=> end; +ROLLBACK +postgres=> +postgres=> select * from advise_index; + reloid | attrs | profit | index_size | backend_pid | timestamp +--------+-------+---------+------------+-------------+------------------------------ + 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:54.25+05:30 +(1 row) + +postgres=> +postgres=> select * from advise_index_data; + reloid | attrs | profit | index_size | backend_pid | timestamp +--------+-------+---------+------------+-------------+------------------------------ + 16395 | {1} | 1782.72 | 2608 | 5256 | 2007-01-13 13:08:54.25+05:30 +(1 row) + +postgres=> diff --git a/resources/sample_psql_session.sql b/resources/sample_psql_session.sql new file mode 100644 index 0000000..f3ba473 --- /dev/null +++ b/resources/sample_psql_session.sql @@ -0,0 +1,80 @@ + +create language plpgsql; + +drop schema if exists test cascade; + +drop user if exists test; + +create user test; + +create schema test authorization test; + +\c postgres test + +load '$libdir/plugins/index_adviser'; + +drop view if exists select_index_advisory; +drop function if exists show_index_advisory(index_advisory.backend_pid%type); +drop table if exists index_advisory; + +\i contrib/pgadviser/resources/index_advisory.create.sql + +\i contrib/pgadviser/resources/show_index_advisory.create.sql + +\i contrib/pgadviser/resources/select_index_advisory.create.sql + +drop table if exists t, t1; + +create table t( a int, b int ); + +insert into t select s, 99999-s from generate_series(0,99999) as s; + +analyze t; + +create table t1 as select * from t; + +/* notice no ANALYZE for T1 */; + +select count(*) from t; + +select count(*) from t1; + +/* stop here if you wish to run only pg_advise tool */; + +/* set the client to see the log messages generated by the Adviser */; +set client_min_messages to log; + +explain select * from t where a = 100; + +explain select * from t where b = 100; + +explain select * from t where a = 100 and b = 100; + +explain select * from t where a = 100 or b = 100; + +/* now we shall test with T1 */; + +explain select * from t1 where a = 100; + +explain select * from t1 where b = 100; + +explain select * from t1 where a = 100 and b = 100; + +explain select * from t1 where a = 100 or b = 100; + +/* let's do some sensible join over these two tables */; +explain +select * +from t, + t1 +where t.a = 100 +and t1.a = 100 + or t.b = 100 + and t1.b = 100; + +/* following are the contents of the advise_index table */; +select * from index_advisory; + +/* And following is the summary of the advices generated */; + +select * from select_index_advisory; diff --git a/resources/sample_psql_session.txt b/resources/sample_psql_session.txt new file mode 100644 index 0000000..149451f --- /dev/null +++ b/resources/sample_psql_session.txt @@ -0,0 +1,271 @@ + +Welcome to psql 8.2.1, the PostgreSQL interactive terminal. + +Type: \copyright for distribution terms + \h for help with SQL commands + \? for help with psql commands + \g or terminate with semicolon to execute query + \q to quit + +Warning: Console code page (437) differs from Windows code page (1252) + 8-bit characters may not work correctly. See psql reference + page "Notes for Windows users" for details. + +postgres=# /* +postgres*# You must start the session with something like: +postgres*# +postgres*# env PGOPTIONS='-c local_preload_libraries=libpg_index_adviser' psql postgres +postgres*# */; +postgres=# +postgres=# create language plpgsql; +CREATE LANGUAGE +postgres=# +postgres=# drop schema if exists test cascade; +NOTICE: schema "test" does not exist, skipping +DROP SCHEMA +postgres=# +postgres=# drop user if exists test; +NOTICE: role "test" does not exist, skipping +DROP ROLE +postgres=# +postgres=# create user test; +CREATE ROLE +postgres=# +postgres=# create schema test authorization test; +CREATE SCHEMA +postgres=# +postgres=# \c postgres test +You are now connected to database "postgres" as user "test". +postgres=> +postgres=> drop table if exists advise_index; +NOTICE: table "advise_index" does not exist, skipping +DROP TABLE +postgres=> +postgres=> \i contrib/pg_index_adviser/advise_index.create.sql +CREATE TABLE +postgres=> +postgres=> \d advise_index; + Table "test.advise_index" + Column | Type | Modifiers +-------------+--------------------------+----------- + reloid | oid | + attrs | integer[] | + profit | real | + index_size | integer | + backend_pid | integer | + timestamp | timestamp with time zone | + +postgres=> +postgres=> \i contrib/pg_index_adviser/advise_index_show.create.sql +CREATE FUNCTION +postgres=> +postgres=> select * from advise_index; + reloid | attrs | profit | index_size | backend_pid | timestamp +--------+-------+--------+------------+-------------+----------- +(0 rows) + +postgres=> +postgres=> drop table if exists t, t1; +NOTICE: table "t" does not exist, skipping +NOTICE: table "t1" does not exist, skipping +DROP TABLE +postgres=> +postgres=> create table t( a int, b int ); +CREATE TABLE +postgres=> +postgres=> insert into t select s, 99999-s from generate_series(0,99999) as s; +INSERT 0 100000 +postgres=> +postgres=> analyze t; +ANALYZE +postgres=> +postgres=> create table t1 as select * from t; +SELECT +postgres=> +postgres=> /* notice no ANALYZE for T1 */; +postgres=> +postgres=> select count(*) from t; + count +-------- + 100000 +(1 row) + +postgres=> +postgres=> select count(*) from t1; + count +-------- + 100000 +(1 row) + +postgres=> +postgres=> /* stop here if you wish to run only pg_advise_index tool */; +postgres=> +postgres=> -- set enable_advise_index to true; +postgres=> /* set the client to see the log messages generated by the Adviser */ +; +postgres=> set client_min_messages to log; +SET +postgres=> +postgres=> explain select * from t where a = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (a = 100) + + QUERY PLAN +---------------------------------------------------- + Seq Scan on t (cost=0.00..1791.00 rows=1 width=8) + Filter: (a = 100) +(2 rows) + +postgres=> +postgres=> explain select * from t where b = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_0 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (b = 100) + + QUERY PLAN +---------------------------------------------------- + Seq Scan on t (cost=0.00..1791.00 rows=1 width=8) + Filter: (b = 100) +(2 rows) + +postgres=> +postgres=> explain select * from t where a = 100 and b = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Index Scan using idx_adv_1 on t (cost=0.00..8.28 rows=1 width=8) + Index Cond: (b = 100) + Filter: (a = 100) + + QUERY PLAN +---------------------------------------------------- + Seq Scan on t (cost=0.00..2041.00 rows=1 width=8) + Filter: ((a = 100) AND (b = 100)) +(2 rows) + +postgres=> +postgres=> explain select * from t where a = 100 or b = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Bitmap Heap Scan on t (cost=8.54..16.21 rows=2 width=8) + Recheck Cond: ((a = 100) OR (b = 100)) + -> BitmapOr (cost=8.54..8.54 rows=2 width=0) + -> Bitmap Index Scan on idx_adv_0 (cost=0.00..4.27 rows=1 width=0) + Index Cond: (a = 100) + -> Bitmap Index Scan on idx_adv_1 (cost=0.00..4.27 rows=1 width=0) + Index Cond: (b = 100) + + QUERY PLAN +---------------------------------------------------- + Seq Scan on t (cost=0.00..2041.00 rows=2 width=8) + Filter: ((a = 100) OR (b = 100)) +(2 rows) + +postgres=> +postgres=> /* now we shall test with T1 */; +postgres=> +postgres=> explain select * from t1 where a = 100; + QUERY PLAN +------------------------------------------------------- + Seq Scan on t1 (cost=0.00..1852.93 rows=525 width=8) + Filter: (a = 100) +(2 rows) + +postgres=> +postgres=> explain select * from t1 where b = 100; + QUERY PLAN +------------------------------------------------------- + Seq Scan on t1 (cost=0.00..1852.93 rows=525 width=8) + Filter: (b = 100) +(2 rows) + +postgres=> +postgres=> explain select * from t1 where a = 100 and b = 100; + QUERY PLAN +----------------------------------------------------- + Seq Scan on t1 (cost=0.00..2115.31 rows=3 width=8) + Filter: ((a = 100) AND (b = 100)) +(2 rows) + +postgres=> +postgres=> explain select * from t1 where a = 100 or b = 100; + QUERY PLAN +-------------------------------------------------------- + Seq Scan on t1 (cost=0.00..2115.31 rows=1047 width=8) + Filter: ((a = 100) OR (b = 100)) +(2 rows) + +postgres=> +postgres=> /* let's do some sensible join over these two tables */; +postgres=> explain +postgres-> select * +postgres-> from t, +postgres-> t1 +postgres-> where t.a = 100 +postgres-> and t1.a = 100 +postgres-> or t.b = 100 +postgres-> and t1.b = 100; +LOG: Index Adviser: Plan using estimates for suggested indexes: + +Nested Loop (cost=8.54..7395.45 rows=1050 width=16) + Join Filter: (((t.a = 100) AND (t1.a = 100)) OR ((t.b = 100) AND (t1.b = 100))) + -> Bitmap Heap Scan on t (cost=8.54..16.21 rows=2 width=8) + Recheck Cond: ((a = 100) OR (b = 100)) + -> BitmapOr (cost=8.54..8.54 rows=2 width=0) + -> Bitmap Index Scan on idx_adv_0 (cost=0.00..4.27 rows=1 width=0) + Index Cond: (a = 100) + -> Bitmap Index Scan on idx_adv_1 (cost=0.00..4.27 rows=1 width=0) + Index Cond: (b = 100) + -> Seq Scan on t1 (cost=0.00..1590.54 rows=104954 width=8) + + QUERY PLAN +----------------------------------------------------------------------------------- + Nested Loop (cost=0.00..368963541.00 rows=1050 width=16) + Join Filter: (((t.a = 100) AND (t1.a = 100)) OR ((t.b = 100) AND (t1.b = 100))) + -> Seq Scan on t (cost=0.00..1541.00 rows=100000 width=8) + -> Seq Scan on t1 (cost=0.00..1590.54 rows=104954 width=8) +(4 rows) + +postgres=> +postgres=> /* following are the contents of the advise_index table */; +postgres=> select * from advise_index; + reloid | attrs | profit | index_size | backend_pid | timestamp +--------+-------+--------------+------------+-------------+------------------------------- + 16395 | {1} | 1782.72 | 2608 | 2272 | 2007-01-13 12:30:02.39+05:30 + 16395 | {2} | 1782.72 | 2608 | 2272 | 2007-01-13 12:30:02.531+05:30 + 16395 | {2} | 2032.72 | 2608 | 2272 | 2007-01-13 12:30:02.578+05:30 + 16395 | {1} | 1012.4 | 2608 | 2272 | 2007-01-13 12:30:02.64+05:30 + 16395 | {2} | 1012.4 | 2608 | 2272 | 2007-01-13 12:30:02.64+05:30 + 16395 | {1} | 1.84478e+008 | 2608 | 2272 | 2007-01-13 12:30:17.984+05:30 + 16395 | {2} | 1.84478e+008 | 2608 | 2272 | 2007-01-13 12:30:17.984+05:30 +(7 rows) + +postgres=> +postgres=> /* And following is the summary of the advices generated */; +postgres=> +postgres=> select E'backend_pid\n' +postgres-> || E'===========\n' +postgres-> || backend_pid, +postgres-> advise_index_show( backend_pid ) +postgres-> from (select distinct backend_pid +postgres(> from advise_index as adv +postgres(> where adv.reloid in (select oid +postgres(> from pg_class +postgres(> where relkind = 'r') +postgres(> ) as v; + ?column? | advise_index_show +-------------+-------------------------------------------------------------------- + backend_pid | /* Index Adviser */ + =========== : /* ============= */ + 2272 : + : /* size: 2608 KB, benefit: 1.84483e+008, gain: 70737.3067484663 */ + : create index idx_t_b on t(b); + : + : /* size: 2608 KB, benefit: 1.84481e+008, gain: 70736.527607362 */ + : create index idx_t_a on t(a); + : +(1 row) + +postgres=> diff --git a/resources/select_index_advisory.create.sql b/resources/select_index_advisory.create.sql new file mode 100644 index 0000000..0a762a2 --- /dev/null +++ b/resources/select_index_advisory.create.sql @@ -0,0 +1,13 @@ + +create view select_index_advisory as +select E'backend_pid\n' + || E'===========\n' + || backend_pid, + show_index_advisory( backend_pid ) +from (select distinct backend_pid + from index_advisory as adv + /* do not consider tables that no longer exist */ + where exists (select oid + from pg_class + where oid = adv.reloid) + ) as v; diff --git a/resources/show_index_advisory.create.sql b/resources/show_index_advisory.create.sql new file mode 100644 index 0000000..5764a90 --- /dev/null +++ b/resources/show_index_advisory.create.sql @@ -0,0 +1,100 @@ + +create function show_index_advisory(p_backend_pid index_advisory.backend_pid%type) returns text +as +$$ +declare + pid p_backend_pid%type; + q_advice text; + r_advice record; + q_column text; + r_column record; + ret text; + + NAMEDATALEN int := 64; + + collist_w_C text; /* Column name list with commas */ + collist_w_U text; /* Column name list with underscores */ + colidlist_w_U text; /* Column id list with underscores */ +begin + if p_backend_pid is null then + pid = pg_backend_pid(); + else + pid = p_backend_pid; + end if; + + ret := '/* Index Adviser */' || E'\n' || + '/* ============= */' || E'\n'; + + q_advice := 'SELECT c.relname, + c.oid as reloid, + a.attrs AS colids, + MAX( a.index_size ) AS size_in_KB, + SUM( a.profit ) AS benefit, + SUM( a.profit )/MAX( a.index_size ) AS gain + FROM index_advisory a, + pg_class c + WHERE a.backend_pid = ' || pid || ' + AND a.reloid = c.oid + GROUP BY c.relname, c.oid, a.attrs + ORDER BY gain + DESC'; + + for r_advice in execute q_advice loop + + ret := ret || + E'\n/* size: ' || r_advice.size_in_KB || ' KB, ' + || 'benefit: ' || r_advice.benefit || ', ' + || 'gain: ' || r_advice.gain || E' */\n'; + + collist_w_C := ''; + collist_w_U := ''; + colidlist_w_U := ''; + + for i in array_lower( r_advice.colids, 1 ) + .. array_upper( r_advice.colids, 1 ) + loop + + q_column := 'SELECT a.attname as name, + a.attnum as id + FROM pg_class c, + pg_attribute a + WHERE c.oid = ' || r_advice.reloid || ' + AND a.attrelid = c.oid + AND a.attnum = ' || r_advice.colids[i] || ' + '; + + execute q_column into r_column; + +-- if ROW_COUNT > 1 then +-- raise EXCEPTION 'an internal query failed'; +-- end if; + + if i <> 1 then + collist_w_C := collist_w_C || ', '; + collist_w_U := collist_w_U || '_'; + colidlist_w_U := colidlist_w_U || '_'; + end if; + + collist_w_C := collist_w_C || r_column.name; + collist_w_U := collist_w_U || r_column.name; + colidlist_w_U := colidlist_w_U || r_column.id; + + end loop; + + ret := ret || 'create index '; + + if (length('idx_' || r_advice.relname || '_' || collist_w_U) + <= NAMEDATALEN) + then + ret := ret || 'idx_' || r_advice.relname || '_' || collist_w_U; + else + ret := ret || 'idx_' || r_advice.reloid || '_' || colidlist_w_U; + end if; + + ret := ret || ' on ' || r_advice.relname || '(' || collist_w_C || E');\n'; + + end loop; + + return ret; +end; +$$ language plpgsql;