Provided by: libcdb-dev_0.78_amd64 bug

NAME

       cdb - Constant DataBase library

SYNOPSYS

        #include <cdb.h>
        cc ... -lcdb

DESCRIPTION

       cdb  is  a  library to create and access Constant DataBase files.  File stores (key,value)
       pairs and used to quickly find a value based on a given key.  Cdb  files  are  create-once
       files, that is, once created, file cannot be updated but recreated from scratch -- this is
       why database is called constant.  Cdb file is optimized for quick access.  Format of  such
       file described in cdb(5) manpage.  This manual page corresponds to version 0.78 of tinycdb
       package.

       Library defines two non-interlaced interfaces: for querying existing cdb file data  (read-
       only  mode)  and  for  creating such a file (almost write-only).  Strictly speaking, those
       modes allows very limited set of opposite operation as well (i.e. in  query  mode,  it  is
       possible to update key's value).

       All  routines  in  this  library  are  thread-safe as no global data used, except of errno
       variable for error indication.

       cdb datafiles may be moved between  systems  safely,  since  format  does  not  depend  on
       architecture.

QUERY MODE

       There  are  two  query  modes  available.   First  uses  a structure that represents a cdb
       database, just like FILE  structure  in  stdio  library,  and  another  works  with  plain
       filedescriptor.   First  mode  is  more  sophisticated  and flexible, and usually somewhat
       faster.  It uses mmap(2) internally.  This mode may look more "natural" or object-oriented
       compared to second one.

       The following routines works with any mode:

       unsigned cdb_unpack(buf)
          const unsigned char buf[4];
              helper  routine  to  convert 32-bit integer from internal representation to machine
              format.  May be used to handle application integers in a portable way.  There is no
              error return.

   Query Mode 1
       All  query  operations  in  first  more  deals  with  common  data  structure, struct cdb,
       associated with an open file descriptor.  This structure is opaque to application.

       The following routines exists for accessing cdb database:

       int cdb_init(cdbp, fd)
          struct cdb *cdbp;
          int fd;
              initializes structure given by cdbp pointer and  associates  it  with  opened  file
              descriptor  fd.   Memory  allocation  for  structure itself if needed and file open
              operation should be done by application.  File fd should be opened at  least  read-
              only,  and  should  be seekable.  Routine returns 0 on success or negative value on
              error.

       void cdb_free(cdbp)
          struct cdb *cdbp;
              frees internal resources held by structure.  Note that this routine does not closes
              a file.

       int cdb_fileno(cdbp)
         const struct cdb *cdbp;
              returns filedescriptor associated with cdb (as was passed to cdb_init()).

       int cdb_read(cdbp, buf, len, pos)
       int cdb_readdata(cdbp, buf, len, pos)
       int cdb_readkey(cdbp, buf, len, pos)
          const struct cdb *cdbp;
          void *buf;
          unsigned len;
          unsigned pos;
              reads  a data from cdb file, starting at position pos of length len, placing result
              to buf.  This routine may be used to get actual value found by cdb_find() or  other
              routines  that  returns  position  and  length  of a data.  Returns 0 on success or
              negative value on error.  Routines cdb_readdata() and cdb_readkey() are  shorthands
              to   read  current  (after  e.g.  cdb_find())  data  and  key  respectively,  using
              cdb_read().

       const void *cdb_get(cdbp, len, pos)
       const void *cdb_getdata(cdbp)
       const void *cdb_getkey(cdbp)
          const struct cdb *cdbp;
          unsigned len;
          unsigned pos;
              Internally, cdb library uses memory-mmaped region to access the  on-disk  database.
              cdb_get()  allows  to  access  internal  memory  in a way similar to cdb_read() but
              without extra copying and buffer allocation.  Returns pointer  to  actual  data  on
              success  or  NULL  on error (position points to outside of the database).  Routines
              cdb_getdata() and  cdb_getkey()  are  shorthands  to  access  current  (after  e.g.
              cdb_find()) data and key respectively, using cdb_get().

       int cdb_find(cdbp, key, klen)
       unsigned cdb_datapos(cdbp)
       unsigned cdb_datalen(cdbp)
       unsigned cdb_keypos(cdbp)
       unsigned cdb_keylen(cdbp)
          struct cdb *cdbp;
          const void *key;
          unsigned klen;
              attempts  to find a key given by (key,klen) parameters.  If key exists in database,
              routine returns 1 and places position and length of value associated with this  key
              to internal fields inside cdbp structure, to be accessible by cdb_datapos(cdbp) and
              cdb_datalen(cdbp) routines.  If key is not in database, cdb_find() returns  0.   On
              error,  negative value is returned.  Data pointers (available via cdb_datapos() and
              cdb_datalen()) gets updated only in case of successful  search.   Note  that  using
              cdb_find() it is possible to lookup only first record with a given key.

       int cdb_findinit(cdbfp, cdbp, key, klen)
       int cdb_findnext(cdbfp)
         struct cdb_find *cdbfp;
         const struct cdb *cdbp;
         const void *key;
         unsigned klen;
              sequential-find routines that used separate structure.  It is possible to have more
              than one record with the same key in a  database,  and  these  routines  allows  to
              enumerate  all  them.   cdb_findinit()  initializes  search structure pointed to by
              cdbfp.  It will return negative value on error or non-negative  value  on  success.
              cdb_findnext() attempts to find next (first when called right after cdb_findinit())
              matching key, setting value position and length in cdbfp structure.  It will return
              positive  value  if  given  key  was  found,  0 if there is no more such key(s), or
              negative value on error.  To access value position and length after successful call
              to  cdb_findnext()  (when  it  returned positive result), use cdb_datapos(cdbp) and
              cdb_datalen(cdbp) routines.  It is error to continue using cdb_findnext() after  it
              returned  0  or  error  condition (cdb_findinit() should be called again).  Current
              data pointers (available via cdb_datapos() and cdb_datalen()) gets updated only  on
              successful search.

       void cdb_seqinit(cptr, cdbp)
       int cdb_seqnext(cptr, cdbp)
         unsigned *cptr;
         struct cdb *cdbp;
              sequential   enumeration   of  all  records  stored  in  cdb  file.   cdb_seqinit()
              initializes access current data pointer cptr to point before first record in a  cdb
              file.  cdb_seqnext()  updates data pointers in cdbp to point to the next record and
              updates cptr, returning positive value on success, 0 on end of data  condition  and
              negative  value  on  error.   Current  record  will  be  available after successful
              operation  using  cdb_datapos(cdbp)  and  cdb_datalen(cdbp)  (for  the  data)   and
              cdb_keypos(cdbp)  and  cdb_keylen(cdbp) (for the key of the record).  Data pointers
              gets updated only in case of successful operation.

   Query Mode 2
       In this mode, one need to open a cdb file using one of  standard  system  calls  (such  as
       open(2))  to  obtain  a filedescriptor, and then pass that filedescriptor to cdb routines.
       Available methods to query a cdb database using only a filedescriptor include:

       int cdb_seek(fd, key, klen, dlenp)
         int fd;
         const void *key;
         unsigned klen;
         unsigned *dlenp;
              searches a cdb database (as pointed to by fd filedescriptor) for  a  key  given  by
              (key,  klen),  and positions file pointer to start of data associated with that key
              if found, so that next read operation  from  this  filedescriptor  will  read  that
              value,  and  places  length  of  value,  in bytes, to variable pointed to by dlenp.
              Returns positive value if operation was successful, 0 if  key  was  not  found,  or
              negative  value  on  error.   To read the data from a cdb file, cdb_bread() routine
              below can be used.

       int cdb_bread(fd, buf, len)
         int fd;
         void *buf;
         int len;
              reads data from a file (as pointed to by fd filedescriptor) and  places  len  bytes
              from  this  file to a buffer pointed to by buf.  Returns 0 if exactly len bytes was
              read, or a negative value in case of error or end-of-file.   This  routine  ignores
              interrupt  errors  (EINTR).   Sets  errno  variable  to  EIO in case of end-of-file
              condition (when there is less than len bytes available to read).

   Notes
       Note that value of any given key may be updated in place by  another  value  of  the  same
       size,  by  writing  to  file  at  position found by cdb_find() or cdb_seek().  However one
       should be very careful when doing so, since write operation may not  succeed  in  case  of
       e.g.  power  failure,  thus leaving corrupted data.  When database is (re)created, one can
       guarantee that no incorrect data will be written to database, but not with inplace update.
       Note also that it is not possible to update any key or to change length of value.

CREATING MODE

       cdb  database  file  should usually be created in two steps: first, temporary file created
       and written to disk, and second, that temporary file is renamed to permanent place.   Unix
       rename(2)  call is atomic operation, it removes destination file if any AND renaes another
       file in one step.  This way  it  is  guaranteed  that  readers  will  not  see  incomplete
       database.  To prevent multiple simultaneous updates, locking may also be used.

       All  routines used to create cdb database works with struct cdb_make object that is opaque
       to application.  Application may assume  that  struct  cdb_make  has  at  least  the  same
       member(s) as published in struct cdb above.

       int cdb_make_start(cdbmp, fd)
          struct cdb_make *cdbmp;
          int fd;
              initializes  structure  to  create a database.  File fd should be opened read-write
              and should be seekable.  Returns 0 on success or negative value on error.

       int cdb_make_add(cdbmp, key, klen, val, vlen)
          struct cdb_make *cdbmp;
          const void *key, *val;
          unsigned klen, vlen;
              adds record with key (key,klen) and value (val,vlen) to a database.  Returns  0  on
              success  or  negative  value  on  error.  Note that this routine does not checks if
              given key already exists, but cdb_find() will not see second record with  the  same
              key.  It is not possible to continue building a database if cdb_make_add() returned
              error indicator.

       int cdb_make_finish(cdbmp)
          struct cdb_make *cdbmp;
              finalizes  database  file,  constructing  all  needed  indexes,  and  frees  memory
              structures.   It  does not closes filedescriptor.  Returns 0 on success or negative
              value on error.

       int cdb_make_exists(cdbmp, key, klen)
          struct cdb_make *cdbmp;
          const void *key;
          unsigned klen;
              This routine attempts to  find  given  by  (key,klen)  key  in  a  not-yet-complete
              database.   It  may  significantly  slow  down  the whole process, and currently it
              flushes internal buffer to disk on every call with key  those  hash  value  already
              exists  in db.  Returns 0 if such key doesn't exists, 1 if it is, or negative value
              on error.  Note that database file should be opened read-write (not write-only)  to
              use  this  routine.  If cdb_make_exists() returned error, it may be not possible to
              continue constructing database.

       int cdb_make_find(cdbmp, key, klen, mode)
          struct cdb_make *cdbmp;
          const void *key;
          unsigned klen;
          int mode;
              This routine attempts to find  given  by  (key,klen)  key  in  the  database  being
              created.   If  the given key is already exists, it an action specified by mode will
              be performed:

              CDB_FIND
                     checks whenever the given record is already in the database.

              CDB_FIND_REMOVE
                     removes all matching records by re-writing the database file accordingly.

              CDB_FIND_FILL0
                     fills all matching records with zeros and removes them from  index  so  that
                     the  records  in  question  will  not  be findable with cdb_find().  This is
                     faster than CDB_FIND_REMOVE, but leaves zero "gaps" in the database.  Lastly
                     inserted records, if matched, are always removed.

              If  no matching keys was found, routine returns 0.  In case at least one record has
              been found/removed, positive value will be returned.  On error, negative value will
              be  returned  and  errno  will be set appropriately.  When cdb_make_find() returned
              negative value in case of error, it is not possible to  continue  constructing  the
              database.

              cdb_make_exists() is the same as calling cdb_make_find() with mode set to CDB_FIND.

       int cdb_make_put(cdbmp, key, klen, val, vlen, mode)
          struct cdb_make *cdbmp;
          const void *key, *val;
          unsigned klen, vlen;
          int mode;
              This  is  a  somewhat combined cdb_make_exists() and cdb_make_add() routines.  mode
              argument controls how repeated (already existing) keys will be treated:

              CDB_PUT_ADD
                     no duplicate  checking  will  be  performed.   This  mode  is  the  same  as
                     cdb_make_add() routine does.

              CDB_PUT_REPLACE
                     If  the  key  already  exists,  it  will be removed from the database before
                     adding new key,value pair.  This requires moving data in the file,  and  can
                     be  quite  slow  if  the  file  is  large.  All matching old records will be
                     removed this  way.   This  is  the  same  as  calling  cdb_make_find()  with
                     CDB_FIND_REMOVE mode argument followed by calling cdb_make_add().

              CDB_PUT_REPLACE0
                     If  the  key  already  exists  and it isn't the last record in the file, old
                     record will be zeroed out before adding new key,value pair.   This  is  alot
                     faster  than  CDB_PUT_REPLACE,  but some extra data will still be present in
                     the file.  The data -- old record  --  will  not  be  accessible  by  normal
                     searches,  but  will  appear  in sequential database traversal.  This is the
                     same as calling cdb_make_find() with CDB_FIND_FILL0 mode  argument  followed
                     by cdb_make_add().

              CDB_PUT_INSERT
                     add  key,value  pair  only  if such key does not exists in a database.  Note
                     that since query (see query mode above) will find first added  record,  this
                     mode  is  somewhat  useless  (but  allows to reduce database size in case of
                     repeated keys).  This is the same as calling cdb_make_exists(), followed  by
                     cdb_make_add() if the key was not found.

              CDB_PUT_WARN
                     add  key,value  pair  unconditionally,  but  also  check if this key already
                     exists.  This is equivalent of cdb_make_exists() to check existence  of  the
                     given key, unconditionally followed by cdb_make_add().

              If  any  error occurred during operations, the routine will return negative integer
              and will set global variable errno to indicate  reason  of  failure.   In  case  of
              successful  operation  and  no  duplicates  found,  routine  will return 0.  If any
              duplicates has been found or  removed  (which,  in  case  of  CDB_PUT_INSERT  mode,
              indicates  that  the new record was not added), routine will return positive value.
              If an error occurred and cdb_make_put() returned negative error, it is not possible
              to continue database construction process.

              As  with  cdb_make_exists() and cdb_make_find(), usage of this routine with any but
              CDB_PUT_ADD mode can significantly slow down database creation process,  especially
              when mode is equal to CDB_PUT_REPLACE0.

       void cdb_pack(num, buf)
          unsigned num;
          unsigned char buf[4];
              helper  routine  that used internally to convert machine integer n to internal form
              to be stored in datafile.  32-bit integer is stored in  4  bytes  in  network  byte
              order.  May be used to handle application data.  There is no error return.

       unsigned cdb_hash(buf, len)
          const void *buf;
          unsigned len;
              helper routine that calculates cdb hash value of given bytes.  CDB hash function is
                hash[n] = (hash[n-1] + (hash[n-1] << 5)) ^ buf[n]
              starting with
                hash[-1] = 5381

ERRORS

       cdb library may set errno to following on error:

       EPROTO database file is corrupted in some way

       EINVAL the same as EPROTO above if system lacks EPROTO constant

       EINVAL flag argument for cdb_make_put() is invalid

       EEXIST flag argument for cdb_make_put() is CDB_PUT_INSERT, and key already exists

       ENOMEM not enough memory to complete operation (cdb_make_finish and cdb_make_add)

       EIO    set  by  cdb_bread and cdb_seek if a cdb file is shorter than expected or corrupted
              in some other way.

EXAMPLES

       Note: in all examples below, error checking is not shown for brewity.

   Query Mode
        int fd;
        struct cdb cdb;
        char *key, *data;
        unsigned keylen, datalen;

        /* opening the database */
        fd = open(filename, O_RDONLY);
        cdb_init(&cdb, fd);
        /* initialize key and keylen here */

        /* single-record search. */
        if (cdb_find(&cdb, key, keylen) > 0) {
          datalen = cdb_datalen(&cdb);
          data = malloc(datalen + 1);
          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
          data[datalen] = '\0';
          printf("key=%s data=%s\n", key, data);
          free(data);
        }
        else
          printf("key=%s not found\n", key);

        /* multiple record search */
        struct cdb_find cdbf;
        int n;
        cdb_findinit(&cdbf, &cdb, key, keylen);
        n = 0;
        while(cdb_findnext(&cdbf) > 0) {
          datalen = cdb_datalen(&cdb);
          data = malloc(datalen + 1);
          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
          data[datalen] = '\0';
          printf("key=%s data=%s\n", key, data);
          free(data);
          ++n;
        }
        printf("key=%s %d records found\n", n);

        /* sequential database access */
        unsigned pos;
        int n;
        cdb_seqinit(&pos, &cdb);
        n = 0;
        while(cdb_seqnext(&pos, &cdb) > 0) {
          keylen = cdb_keylen(&cdb);
          key = malloc(keylen + 1);
          cdb_read(&cdb, key, keylen, cdb_keypos(&cdb));
          key[keylen] = '\0';
          datalen = cdb_datalen(&cdb);
          data = malloc(datalen + 1);
          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
          data[datalen] = '\0';
          ++n;
          printf("record %n: key=%s data=%s\n", n, key, data);
          free(data); free(key);
        }
        printf("total records found: %d\n", n);

        /* close the database */
        cdb_free(&cdb);
        close(fd);

        /* simplistic query mode */
        fd = open(filename, O_RDONLY);
        if (cdb_seek(fd, key, keylen, &datalen) > 0) {
          data = malloc(datalen + 1);
          cdb_bread(fd, data, datalen);
          data[datalen] = '\0';
          printf("key=%s data=%s\n", key, data);
        }
        else
          printf("key=%s not found\n", key);
        close(fd);

   Create Mode
        int fd;
        struct cdb_make cdbm;
        char *key, *data;
        unsigned keylen, datalen;

        /* initialize the database */
        fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, 0644);
        cdb_make_start(&cdbm, fd);

        while(have_more_data()) {
          /* initialize key and data */
          if (cdb_make_exists(&cdbm, key, keylen) == 0)
            cdb_make_add(&cdbm, key, keylen, data, datalen);
          /* or use cdb_make_put() with appropriate flags */
        }

        /* finalize and close the database */
        cdb_make_finish(&cdbm);
        close(fd);

SEE ALSO

       cdb(5), cdb(1), dbm(3), db(3), open(2).

AUTHOR

       The tinycdb package written by Michael Tokarev <mjt@corpit.ru>, based on ideas and  shares
       file format with original cdb library by Dan Bernstein.

LICENSE

       Public domain.

                                             Jun 2006                                      cdb(3)