Provided by: libcdb-dev_0.77build1_i386 bug

NAME

       cdb - Constant DataBase library

SYNOPSYS

        #include <cdb.h>
        cc ... -lcdb

DESCRIPTION

       cdb  is  a  library to create and access Constant DataBase files.  File
       stores (key,value) pairs and used to quickly find a value  based  on  a
       given  key.   Cdb  files  are create-once files, that is, once created,
       file cannot be updated but  recreated  from  scratch  --  this  is  why
       database  is  called constant.  Cdb file is optimized for quick access.
       Format of such file described in  cdb(5)  manpage.   This  manual  page
       corresponds to version 0.77 of tinycdb package.

       Library  defines  two  non-interlaced interfaces: for querying existing
       cdb file data (read-only mode) and for creating  such  a  file  (almost
       write-only).  Strictly speaking, those modes allows very limited set of
       opposite operation as well (i.e. in  query  mode,  it  is  possible  to
       update key's value).

       All  routines  in  this library are thread-safe as no global data used,
       except of errno variable for error indication.

       cdb datafiles may be moved between systems safely,  since  format  does
       not depend on architecture.

QUERY MODE

       There  are  two  query  modes  available.   First uses a structure that
       represents a cdb database, just like FILE structure in  stdio  library,
       and  another  works  with  plain  filedescriptor.   First  mode is more
       sophisticated and flexible,  and  usually  somewhat  faster.   It  uses
       mmap(2)  internally.   This  mode  may  look  more "natural" or object-
       oriented compared to second one.

       The following routines works with any mode:

       unsigned cdb_unpack(buf)
          const unsigned char buf[4];
              helper  routine  to  convert  32-bit   integer   from   internal
              representation  to  machine  format.   May  be  used  to  handle
              application integers in a  portable  way.   There  is  no  error
              return.

   Query Mode 1
       All  query  operations  in first more deals with common data structure,
       struct cdb, associated with an open file descriptor.  This structure is
       opaque to application.

       The following routines exists for accessing cdb database:

       int cdb_init(cdbp, fd)
          struct cdb *cdbp;
          int fd;
              initializes  structure  given  by cdbp pointer and associates it
              with opened file descriptor fd.  Memory allocation for structure
              itself  if  needed  and  file  open  operation should be done by
              application.  File fd should be opened at least  read-only,  and
              should  be  seekable.   Routine returns 0 on success or negative
              value on error.

       void cdb_free(cdbp)
          struct cdb *cdbp;
              frees internal resources held  by  structure.   Note  that  this
              routine does not closes a file.

       int cdb_fileno(cdbp)
         const struct cdb *cdbp;
              returns  filedescriptor  associated  with  cdb (as was passed to
              cdb_init()).

       int cdb_read(cdbp, buf, len, pos)
       int cdb_readdata(cdbp, buf, len, pos)
       int cdb_readkey(cdbp, buf, len, pos)
          const struct cdb *cdbp;
          void *buf;
          unsigned len;
          unsigned pos;
              reads a data from cdb file, starting at position pos  of  length
              len,  placing  result  to  buf.  This routine may be used to get
              actual value found by cdb_find() or other routines that  returns
              position and length of a data.  Returns 0 on success or negative
              value on error.  Routines cdb_readdata() and  cdb_readkey()  are
              shorthands  to read current (after e.g. cdb_find()) data and key
              respectively, using cdb_read().

       const void *cdb_get(cdbp, len, pos)
       const void *cdb_getdata(cdbp)
       const void *cdb_getkey(cdbp)
          const struct cdb *cdbp;
          unsigned len;
          unsigned pos;
              Internally, cdb library uses memory-mmaped region to access  the
              on-disk database.  cdb_get() allows to access internal memory in
              a way similar to cdb_read() but without extra copying and buffer
              allocation.   Returns  pointer to actual data on success or NULL
              on error (position points to outside of the database).  Routines
              cdb_getdata()  and cdb_getkey() are shorthands to access current
              (after  e.g.  cdb_find())  data  and  key  respectively,   using
              cdb_get().

       int cdb_find(cdbp, key, klen)
       unsigned cdb_datapos(cdbp)
       unsigned cdb_datalen(cdbp)
       unsigned cdb_keypos(cdbp)
       unsigned cdb_keylen(cdbp)
          struct cdb *cdbp;
          const void *key;
          unsigned klen;
              attempts  to  find a key given by (key,klen) parameters.  If key
              exists in database, routine returns 1 and  places  position  and
              length  of  value  associated  with  this key to internal fields
              inside cdbp structure, to be accessible by cdb_datapos(cdbp) and
              cdb_datalen(cdbp)   routines.    If  key  is  not  in  database,
              cdb_find() returns 0.  On error,  negative  value  is  returned.
              Data  pointers  (available  via cdb_datapos() and cdb_datalen())
              gets updated only in case of successful search.  Note that using
              cdb_find()  it  is  possible  to lookup only first record with a
              given key.

       int cdb_findinit(cdbfp, cdbp, key, klen)
       int cdb_findnext(cdbfp)
         struct cdb_find *cdbfp;
         const struct cdb *cdbp;
         const void *key;
         unsigned klen;
              sequential-find routines that used separate  structure.   It  is
              possible  to  have  more  than one record with the same key in a
              database, and these  routines  allows  to  enumerate  all  them.
              cdb_findinit() initializes search structure pointed to by cdbfp.
              It will return negative value on error or non-negative value  on
              success.   cdb_findnext()  attempts  to  find  next  (first when
              called right after cdb_findinit()) matching key,  setting  value
              position and length in cdbfp structure.  It will return positive
              value if given key was found, 0 if there is no more such key(s),
              or negative value on error.  To access value position and length
              after  successful  call  to  cdb_findnext()  (when  it  returned
              positive  result),  use  cdb_datapos(cdbp) and cdb_datalen(cdbp)
              routines.  It is error to continue using cdb_findnext() after it
              returned  0  or error condition (cdb_findinit() should be called
              again).  Current data pointers (available via cdb_datapos()  and
              cdb_datalen()) gets updated only on successful search.

       void cdb_seqinit(cptr, cdbp)
       int cdb_seqnext(cptr, cdbp)
         unsigned *cptr;
         struct cdb *cdbp;
              sequential  enumeration  of  all  records  stored  in  cdb file.
              cdb_seqinit() initializes access current data  pointer  cptr  to
              point  before  first record in a cdb file. cdb_seqnext() updates
              data pointers in cdbp to point to the next  record  and  updates
              cptr,  returning  positive  value  on  success, 0 on end of data
              condition and negative value on error.  Current record  will  be
              available after successful operation using cdb_datapos(cdbp) and
              cdb_datalen(cdbp)  (for  the  data)  and  cdb_keypos(cdbp)   and
              cdb_keylen(cdbp)  (for  the  key  of the record).  Data pointers
              gets updated only in case of successful operation.

   Query Mode 2
       In this mode, one need to open a cdb file using one of standard  system
       calls  (such as open(2)) to obtain a filedescriptor, and then pass that
       filedescriptor to cdb routines.   Available  methods  to  query  a  cdb
       database using only a filedescriptor include:

       int cdb_seek(fd, key, klen, dlenp)
         int fd;
         const void *key;
         unsigned klen;
         unsigned *dlenp;
              searches a cdb database (as pointed to by fd filedescriptor) for
              a key given by (key, klen), and positions file pointer to  start
              of  data  associated  with  that key if found, so that next read
              operation from this filedescriptor will  read  that  value,  and
              places  length  of  value,  in  bytes, to variable pointed to by
              dlenp.  Returns positive value if operation was successful, 0 if
              key was not found, or negative value on error.  To read the data
              from a cdb file, cdb_bread() routine below can be used.

       int cdb_bread(fd, buf, len)
         int fd;
         void *buf;
         int len;
              reads data from a file (as pointed to by fd filedescriptor)  and
              places  len  bytes from this file to a buffer pointed to by buf.
              Returns 0 if exactly len bytes was read, or a negative value  in
              case  of  error  or end-of-file.  This routine ignores interrupt
              errors (EINTR).  Sets errno variable to EIO in case  of  end-of-
              file  condition  (when there is less than len bytes available to
              read).

   Notes
       Note that value of any given key may be updated  in  place  by  another
       value  of  the  same  size,  by  writing  to  file at position found by
       cdb_find() or cdb_seek().  However one  should  be  very  careful  when
       doing  so,  since write operation may not succeed in case of e.g. power
       failure, thus leaving corrupted data.  When  database  is  (re)created,
       one  can  guarantee that no incorrect data will be written to database,
       but not with inplace update.  Note also that  it  is  not  possible  to
       update any key or to change length of value.

CREATING MODE

       cdb  database  file  should  usually  be  created  in two steps: first,
       temporary file created and written to disk, and second, that  temporary
       file  is  renamed  to  permanent  place.  Unix rename(2) call is atomic
       operation, it removes destination file if any AND renaes  another  file
       in  one  step.   This  way  it  is guaranteed that readers will not see
       incomplete database.  To prevent multiple simultaneous updates, locking
       may also be used.

       All  routines  used  to  create cdb database works with struct cdb_make
       object that is opaque to  application.   Application  may  assume  that
       struct  cdb_make has at least the same member(s) as published in struct
       cdb above.

       int cdb_make_start(cdbmp, fd)
          struct cdb_make *cdbmp;
          int fd;
              initializes structure to create a database.  File fd  should  be
              opened  read-write and should be seekable.  Returns 0 on success
              or negative value on error.

       int cdb_make_add(cdbmp, key, klen, val, vlen)
          struct cdb_make *cdbmp;
          const void *key, *val;
          unsigned klen, vlen;
              adds record with  key  (key,klen)  and  value  (val,vlen)  to  a
              database.   Returns  0  on  success  or negative value on error.
              Note that this routine does not  checks  if  given  key  already
              exists,  but cdb_find() will not see second record with the same
              key.  It is not possible to  continue  building  a  database  if
              cdb_make_add() returned error indicator.

       int cdb_make_finish(cdbmp)
          struct cdb_make *cdbmp;
              finalizes  database  file,  constructing all needed indexes, and
              frees memory structures.  It  does  not  closes  filedescriptor.
              Returns 0 on success or negative value on error.

       int cdb_make_exists(cdbmp, key, klen)
          struct cdb_make *cdbmp;
          const void *key;
          unsigned klen;
              This  routine attempts to find given by (key,klen) key in a not-
              yet-complete database.  It may significantly slow down the whole
              process,  and  currently  it  flushes internal buffer to disk on
              every call with key those  hash  value  already  exists  in  db.
              Returns  0  if  such key doesn't exists, 1 if it is, or negative
              value on error.  Note that database file should be opened  read-
              write    (not    write-only)    to   use   this   routine.    If
              cdb_make_exists() returned error, it  may  be  not  possible  to
              continue constructing database.

       int cdb_make_find(cdbmp, key, klen, mode)
          struct cdb_make *cdbmp;
          const void *key;
          unsigned klen;
          int mode;
              This  routine  attempts  to  find given by (key,klen) key in the
              database being created.  If the given key is already exists,  it
              an action specified by mode will be performed:

              CDB_FIND
                     checks  whenever  the  given  record  is  already  in the
                     database.

              CDB_FIND_REMOVE
                     removes all matching records by re-writing  the  database
                     file accordingly.

              CDB_FIND_FILL0
                     fills  all  matching  records with zeros and removes them
                     from index so that the records in question  will  not  be
                     findable   with   cdb_find().    This   is   faster  than
                     CDB_FIND_REMOVE, but leaves zero "gaps" in the  database.
                     Lastly inserted records, if matched, are always removed.

              If  no  matching  keys was found, routine returns 0.  In case at
              least one record has been found/removed, positive value will  be
              returned.   On  error, negative value will be returned and errno
              will  be  set  appropriately.   When  cdb_make_find()   returned
              negative  value in case of error, it is not possible to continue
              constructing the database.

              cdb_make_exists() is the same as  calling  cdb_make_find()  with
              mode set to CDB_FIND.

       int cdb_make_put(cdbmp, key, klen, val, vlen, mode)
          struct cdb_make *cdbmp;
          const void *key, *val;
          unsigned klen, vlen;
          int mode;
              This is a somewhat combined cdb_make_exists() and cdb_make_add()
              routines.   mode  argument  controls   how   repeated   (already
              existing) keys will be treated:

              CDB_PUT_ADD
                     no  duplicate  checking  will be performed.  This mode is
                     the same as cdb_make_add() routine does.

              CDB_PUT_REPLACE
                     If the key already exists, it will be  removed  from  the
                     database before adding new key,value pair.  This requires
                     moving data in the file, and can be  quite  slow  if  the
                     file  is large.  All matching old records will be removed
                     this way.  This is the same  as  calling  cdb_make_find()
                     with  CDB_FIND_REMOVE  mode  argument followed by calling
                     cdb_make_add().

              CDB_PUT_REPLACE0
                     If the key already exists and it isn't the last record in
                     the file, old record will be zeroed out before adding new
                     key,value   pair.    This    is    alot    faster    than
                     CDB_PUT_REPLACE,  but  some  extra  data  will  still  be
                     present in the file.  The data -- old record -- will  not
                     be  accessible  by  normal  searches,  but will appear in
                     sequential database  traversal.   This  is  the  same  as
                     calling cdb_make_find() with CDB_FIND_FILL0 mode argument
                     followed by cdb_make_add().

              CDB_PUT_INSERT
                     add key,value pair only if such key does not exists in  a
                     database.   Note  that since query (see query mode above)
                     will find first  added  record,  this  mode  is  somewhat
                     useless  (but  allows  to reduce database size in case of
                     repeated  keys).    This   is   the   same   as   calling
                     cdb_make_exists(),  followed by cdb_make_add() if the key
                     was not found.

              CDB_PUT_WARN
                     add key,value pair unconditionally,  but  also  check  if
                     this   key   already   exists.   This  is  equivalent  of
                     cdb_make_exists() to check existence of  the  given  key,
                     unconditionally followed by cdb_make_add().

              If any error occurred during operations, the routine will return
              negative integer and will set global variable errno to  indicate
              reason  of  failure.   In  case  of  successful operation and no
              duplicates found, routine will return 0.  If any duplicates  has
              been  found  or  removed (which, in case of CDB_PUT_INSERT mode,
              indicates that the new  record  was  not  added),  routine  will
              return  positive value.  If an error occurred and cdb_make_put()
              returned negative error, it is not possible to continue database
              construction process.

              As  with  cdb_make_exists()  and  cdb_make_find(), usage of this
              routine with any but CDB_PUT_ADD  mode  can  significantly  slow
              down database creation process, especially when mode is equal to
              CDB_PUT_REPLACE0.

       void cdb_pack(num, buf)
          unsigned num;
          unsigned char buf[4];
              helper routine that used internally to convert machine integer n
              to  internal  form  to be stored in datafile.  32-bit integer is
              stored in 4 bytes in network byte order.  May be used to  handle
              application data.  There is no error return.

       unsigned cdb_hash(buf, len)
          const void *buf;
          unsigned len;
              helper  routine  that  calculates cdb hash value of given bytes.
              CDB hash function is
                hash[n] = (hash[n-1] + (hash[n-1] << 5)) ^ buf[n]
              starting with
                hash[-1] = 5381

ERRORS

       cdb library may set errno to following on error:

       EPROTO database file is corrupted in some way

       EINVAL the same as EPROTO above if system lacks EPROTO constant

       EINVAL flag argument for cdb_make_put() is invalid

       EEXIST flag argument for  cdb_make_put()  is  CDB_PUT_INSERT,  and  key
              already exists

       ENOMEM not  enough  memory  to  complete operation (cdb_make_finish and
              cdb_make_add)

       EIO    set by cdb_bread and cdb_seek if a  cdb  file  is  shorter  than
              expected or corrupted in some other way.

EXAMPLES

       Note: in all examples below, error checking is not shown for brewity.

   Query Mode
        int fd;
        struct cdb cdb;
        char *key, *data;
        unsigned keylen, datalen;

        /* opening the database */
        fd = open(filename, O_RDONLY);
        cdb_init(&cdb, fd);
        /* initialize key and keylen here */

        /* single-record search. */
        if (cdb_find(&cdb, key, keylen) > 0) {
          datalen = cdb_datalen(&cdb);
          data = malloc(datalen + 1);
          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
          data[datalen] = '\0';
          printf("key=%s data=%s\n", key, data);
          free(data);
        }
        else
          printf("key=%s not found\n", key);

        /* multiple record search */
        struct cdb_find cdbf;
        int n;
        cdb_findinit(&cdbf, &cdb, key, keylen);
        n = 0;
        while(cdb_findnext(&cdbf) > 0) {
          datalen = cdb_datalen(&cdb);
          data = malloc(datalen + 1);
          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
          data[datalen] = '\0';
          printf("key=%s data=%s\n", key, data);
          free(data);
          ++n;
        }
        printf("key=%s %d records found\n", n);

        /* sequential database access */
        unsigned pos;
        int n;
        cdb_seqinit(&cdb, &cpos);
        n = 0;
        while(cdb_seqnext(&cdb, &cpos) > 0) {
          keylen = cdb_keylen(&cdb);
          key = malloc(keylen + 1);
          cdb_read(&cdb, key, keylen, cdb_keypos(&cdb));
          key[keylen] = '\0';
          datalen = cdb_datalen(&cdb);
          data = malloc(datalen + 1);
          cdb_read(&cdb, data, datalen, cdb_datapos(&cdb));
          data[datalen] = '\0';
          ++n;
          printf("record %n: key=%s data=%s\n", n, key, data);
          free(data); free(key);
        }
        printf("total records found: %d\n", n);

        /* close the database */
        cdb_free(&cdb);
        close(fd);

        /* simplistic query mode */
        fd = open(filename, O_RDONLY);
        if (cdb_seek(fd, key, keylen, &datalen) > 0) {
          data = malloc(datalen + 1);
          cdb_bread(fd, data, datalen);
          data[datalen] = '\0';
          printf("key=%s data=%s\n", key, data);
        }
        else
          printf("key=%s not found\n", key);
        close(fd);

   Create Mode
        int fd;
        struct cdb_make cdbm;
        char *key, *data;
        unsigned keylen, datalen;

        /* initialize the database */
        fd = open(filename, O_RDWR|O_CREAT|O_TRUNC, 0644);
        cdb_make_start(&cdbm, fd);

        while(have_more_data()) {
          /* initialize key and data */
          if (cdb_make_exists(&cdbm, key, keylen) == 0)
            cdb_make_add(&cdbm, key, keylen, data, datalen);
          /* or use cdb_make_put() with appropriate flags */
        }

        /* finalize and close the database */
        cdb_make_finish(&cdbm);
        close(fd);

SEE ALSO

       cdb(5), cdb(1), dbm(3), db(3), open(2).

AUTHOR

       The  tinycdb  package written by Michael Tokarev <mjt@corpit.ru>, based
       on ideas and shares file  format  with  original  cdb  library  by  Dan
       Bernstein.

LICENSE

       Public domain.

                                   Jun 2006                             cdb(3)