Archive

Monthly Archives: December 2014

Cache Invalidation
My previous post with the same title leaves a question: how to invalidate cache and deal with race condition. Colin ‘t Hart suggested to implement “on commit trigger” which is not available in PostgreSQL currently.

In a good book “PostgreSQL Server Programming” by Jim Mlodgenski, Hannu Krosing and Kirk Roybal, there is a statement that we can register C-language function to be called on COMMIT or ROLLBACK events.

I have an idea to have such a C function by which I can store keys for invalidation and later call pgmemcache’s memcache_delete in respond to COMMIT event:

CREATE OR REPLACE FUNCTION mcache_delete(IN t_keys text[])
RETURNS void
AS '$libdir/mcache', 'mcache_delete' LANGUAGE C IMMUTABLE;
psql=# begin;
psql=# ... some work ...
psql=# select mcache_delete(ARRAY['dcb50cad', '1f19bee7']); --store keys for invalidation
psql=# ... other work ...
psql=# select mcache_delete(ARRAY['91f29028']); --store other keys for invalidation
psql=# commit; --delete memcache keys

My code:

#include <postgres.h>
#include <fmgr.h>
#include <utils/array.h>
#include <utils/builtins.h>
#include <catalog/pg_type.h>
#include <access/xact.h>
#include <utils/memutils.h>

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

//declare pgmemcahe functionality
extern Datum memcache_delete(PG_FUNCTION_ARGS);

void _PG_init(void);
void _PG_fini(void);
static void mcache_xact_callback(XactEvent event, void *arg);
static void mcache_reset(void);

static int mcache_count;
static text** mcache_keys;

PG_FUNCTION_INFO_V1(mcache_delete);
Datum mcache_delete(PG_FUNCTION_ARGS);

Datum mcache_delete(PG_FUNCTION_ARGS) {
  ArrayType* keys;
  Datum* elems;
  bool* nulls;
  int n, i;
  MemoryContext oldCtx;

  if(PG_NARGS() != 1)
    elog(ERROR, "1 argument expected");

  if( !PG_ARGISNULL(0) ){
    keys = PG_GETARG_ARRAYTYPE_P(0);
    deconstruct_array(
    keys, TEXTOID, -1, false, 'i',
    &elems, &nulls, &n);
    if(n>0){
      oldCtx = MemoryContextSwitchTo(TopTransactionContext);
      if(mcache_count == 0)
        mcache_keys = (text**)palloc(n * sizeof(text*));
      else
        mcache_keys = (text**)repalloc( (void*)mcache_keys, (mcache_count+n) * sizeof(text*));
      for(i=0; i<n ; i++)
        mcache_keys[mcache_count+i] = nulls[i] ? NULL : DatumGetTextPCopy(elems[i]);
      mcache_count += n;
      MemoryContextSwitchTo(oldCtx);
    }

  }

  PG_RETURN_VOID();
}

void _PG_init(void) {
  mcache_reset();
  RegisterXactCallback(mcache_xact_callback, NULL);
}

void _PG_fini(void) {
  mcache_reset();
}

static void mcache_xact_callback(XactEvent event, void *arg) {
  int i;
  if( event == XACT_EVENT_COMMIT ) {//commit
    for(i=0; i<mcache_count; i++)
      if(mcache_keys[i])
        DirectFunctionCall1(//call pgmemcache function
          memcache_delete,
          PointerGetDatum(mcache_keys[i]) );
    //note: mcache_keys are allocated in TopTransactionContext
    //and implicitly released by internal Postgresql framework
    mcache_reset();
  } else if ( event == XACT_EVENT_ABORT ) //rollback
    mcache_reset();
}

static void mcache_reset(void) {
  mcache_count=0;
  mcache_keys=NULL;
}

I apologize for my cancelled post titled “Linux System Programming in C”. It is supposed to be posted under other category.

To alleviate database load, my first pick is pgpool-II with memory_cache_enabled turned on. Unfortunately, it does not cache query result from stored procedure.

The alternative is pgmemcache. It provides interface functions to memcached and can be called from within stored procedure.

Thanks to pgpool-II idea, SQL command and result is cached into memcached hash:

  • Taking md5 of SQL as key.
  • Forming set of records to string as value.

Assumption:

  • memcahed server is up and running.
  • pgmemcahe extension is installed in postgersql server.
  • postgresql.conf has been set accordingly.

My case is query result for chart of account (coa) table joining 3 tables:

SELECT
  coa.id,
  tipe.label AS type_label,
  coa.kode,
  coa.label AS coa_label,
  dbcr.label AS dbcr_label
FROM acct.coa coa
INNER JOIN acct.tipe tipe ON coa.tipe=tipe.id
INNER JOIN acct.dbcr dbcr ON coa.dbcr=dbcr.id
OFFSET 0 LIMIT 25;

I wrap it insided plpgsql stored procedure. Here is my custom data type:

CREATE TYPE acct.coa_cache AS
(i_id integer,
t_tipe text,
t_kode text,
t_label text,
t_dbcr text);

Stored Procedure:

CREATE OR REPLACE FUNCTION acct.coa_query_page_cache(OUT d acct.coa_cache)
RETURNS SETOF acct.coa_cache AS
$BODY$
DECLARE
  _sql text;
  _key text;
  _v text;
  _ds acct.coa_cache[];
BEGIN
  _sql:= 'SELECT coa.id'
    || ',tipe.label'
    || ',coa.kode'
    || ',coa.label'
    || ',dbcr.label'
    || ' FROM acct.coa coa'
    || ' INNER JOIN acct.tipe tipe ON coa.tipe=tipe.id'
    || ' INNER JOIN acct.dbcr dbcr ON coa.dbcr=dbcr.id'
    || ' OFFSET 0 LIMIT 10';
  _key:=md5(_sql); --taking md5 as key
  _v:=memcache_get(_key); --get hash value by key
  IF _v IS NOT NULL THEN --hit
    raise notice 'hit';
    _ds:=NULLIF(_v,'')::acct.coa_cache[];--convert string to acct.coa_cache array
    RETURN QUERY SELECT * FROM UNNEST(_ds);
  ELSE --miss
    raise notice 'miss';
    FOR d IN EXECUTE _sql
    LOOP
      _ds:= _ds || d;
      RETURN NEXT;
    END LOOP;
    PERFORM memcache_set(_key, _ds::text);--register key/value to memcached
  END IF;
  RETURN;
END;$BODY$
  LANGUAGE plpgsql STABLE SECURITY DEFINER;

Let us check how it works with psql.

psql# \timing on
Timing is on.

First, check the raw SQL performance:

psql# SELECT coa.id,
tipe.label AS tipe_label,
coa.kode,
coa.label,
dbcr.label AS dbcr_label
FROM acct.coa coa
INNER JOIN acct.tipe tipe ON coa.tipe=tipe.id
INNER JOIN acct.dbcr dbcr ON coa.dbcr=dbcr.id
OFFSET 0 LIMIT 10;
id | tipe_label | kode | label | dbcr_label
----+------------+-----------+-----------------------------+------------
1 | AKTIVA | 1 | AKTIVA | DEBET
2 | AKTIVA | 1.1 | AKTIVA LANCAR | DEBET
3 | AKTIVA | 1.1.1 | KAS | DEBET
4 | AKTIVA | 1.1.1.1 | KAS DANA ZAKAT | DEBET
5 | AKTIVA | 1.1.1.2 | Kas Dana Infaq | DEBET
6 | AKTIVA | 1.1.1.2.1 | Kas Dana Infaq Umum | DEBET
7 | AKTIVA | 1.1.1.2.2 | Kas Dana Infaq Yatim Dhuafa | DEBET
8 | AKTIVA | 1.1.1.2.3 | KAS DANA INFAQ QURBAN | DEBET
9 | AKTIVA | 1.1.1.3 | Kas Dana Kemanusiaan | DEBET
10 | AKTIVA | 1.1.1.3.1 | Kas Dana Kemanusiaan Umum | DEBET
(10 rows)

Time: 6.652 ms

Second run: 1.290 ms
Third run: 1.248 ms
Fourth run: 1.238 ms
Fifth run: 1.263 ms
Looks it get saturated at 1.2 ms for long run.

How it looks like for memcached version:

psql# select * from acct.coa_query_page_cache();
NOTICE: miss
i_id | t_tipe | t_kode | t_label | t_dbcr
------+--------+-----------+-----------------------------+--------
1 | AKTIVA | 1 | AKTIVA | DEBET
2 | AKTIVA | 1.1 | AKTIVA LANCAR | DEBET
3 | AKTIVA | 1.1.1 | KAS | DEBET
4 | AKTIVA | 1.1.1.1 | KAS DANA ZAKAT | DEBET
5 | AKTIVA | 1.1.1.2 | Kas Dana Infaq | DEBET
6 | AKTIVA | 1.1.1.2.1 | Kas Dana Infaq Umum | DEBET
7 | AKTIVA | 1.1.1.2.2 | Kas Dana Infaq Yatim Dhuafa | DEBET
8 | AKTIVA | 1.1.1.2.3 | KAS DANA INFAQ QURBAN | DEBET
9 | AKTIVA | 1.1.1.3 | Kas Dana Kemanusiaan | DEBET
10 | AKTIVA | 1.1.1.3.1 | Kas Dana Kemanusiaan Umum | DEBET
(10 rows)

Time: 3.488 ms
siazcenter=# select * from acct.coa_query_page_cache();
NOTICE: hit
i_id | t_tipe | t_kode | t_label | t_dbcr
------+--------+-----------+-----------------------------+--------
1 | AKTIVA | 1 | AKTIVA | DEBET
2 | AKTIVA | 1.1 | AKTIVA LANCAR | DEBET
3 | AKTIVA | 1.1.1 | KAS | DEBET
4 | AKTIVA | 1.1.1.1 | KAS DANA ZAKAT | DEBET
5 | AKTIVA | 1.1.1.2 | Kas Dana Infaq | DEBET
6 | AKTIVA | 1.1.1.2.1 | Kas Dana Infaq Umum | DEBET
7 | AKTIVA | 1.1.1.2.2 | Kas Dana Infaq Yatim Dhuafa | DEBET
8 | AKTIVA | 1.1.1.2.3 | KAS DANA INFAQ QURBAN | DEBET
9 | AKTIVA | 1.1.1.3 | Kas Dana Kemanusiaan | DEBET
10 | AKTIVA | 1.1.1.3.1 | Kas Dana Kemanusiaan Umum | DEBET
(10 rows)

Time: 0.846 ms

Third run: 0.826 ms (hit)
Fourth run: 0.838 ms (hit)
Fifth run: 0.815 ms (hit)

The longest 3.488 ms is for the first run when I miss memcached key and start register query key/value. Next run is faster, saturated at 0.8 ms for long run. The more important thing is that my database load is reduced in write once, read many times scenario.

Invalidation
How to invalidate memcached hash when underlying tables are updated. In my case, I write statement trigger and assign it to underlying tables. It will delete key/value from memcached whenever any underlying tables are modified.