Provided by: pavuk_0.9.35-6.1_amd64 bug

NAME

       pavuk  -  HTTP,  HTTP  over SSL, FTP, FTP over SSL and Gopher recursive document retrieval
       program

SYNOPSIS

       pavuk [-mode {normal | resumeregets | singlepage | singlereget | sync | dontstore | ftpdir
       |  mirror}]  [-X]  [-runX]  [-bg/-nobg] [prefs/-noprefs] [-h] [-v] [-progress/-noprogress]
       [-stime/-nostime] [-xmaxlog $nr] [-logfile $file]  [-slogfile  $file]  [-auth_file  $file]
       [-msgcat $dir] [-language $str] [-gui_font $font] [-quiet/-verbose [-read_css/-noread_css]
       [-cdir $dir] [-scndir $dir] [-scenario $str] [-dumpscn $filename] [-lmax $nr] [-dmax  $nr]
       [-leave_level  $nr]  [-maxsize $nr] [-minsize $nr] [-asite $list] [-dsite $list] [-adomain
       $list] [-ddomain $list] [-asfx $list] [-dsfx  $list]  [-aprefix  $list]  [-dprefix  $list]
       [-amimt  $list]  [-dmimet  $list]  [-pattern  $pattern] [-url_pattern $pattern] [-rpattern
       $regexp] [-url_rpattern $regexp]  [-skip_pattern  $pattern]  [-skip_url_pattern  $pattern]
       [-skip_rpattern  $regexp]  [-skip_url_rpattern  $regexp]  [-newer_than $time] [-older_than
       $time]    [-schedule    $time]    [-reschedule     $nr]     [-dont_leave_site/-leave_site]
       [-dont_leave_dir/-leave_dir]   [-http_proxy   $site[:$port]]   [-ftp_proxy  $site[:$port]]
       [-ssl_proxy  $site[:$port]]  [-gopher_proxy   $site[:$port]]   [-ftp_httpgw/-noftp_httpgw]
       [-ftp_dirtyproxy/-noftp_dirtyproxy]     [-gopher_httpgw/-nogopher_httpgw]    [-noFTP/-FTP]
       [-noHTTP/-HTTP]  [-noSSL/-SSL]   [-noGopher/-Gopher]   [-FTPdir/-noFTPdir]   [-noCGI/-CGI]
       [-FTPlist/-noFTPlist]             [-FTPhtml/-noFTPhtml]            [-noRelocate/-Relocate]
       [-force_reget/-noforce_reget]        [-nocache/-cache]         [-check_size/-nocheck_size]
       [-noRobots/-Robots]  [-noEnc/-Enc]  [-auth_name  $user] [-auth_passwd $pass] [-auth_scheme
       1/2/3/4/user/Basic/Digest/NTLM] [-auth_reuse_nonce/-no_auth_reuse_nonce] [-http_proxy_user
       $user]    [-http_proxy_pass   $pass]   [-http_proxy_auth   1/2/3/4/user/Basic/Digest/NTLM]
       [-auth_reuse_proxy_nonce/-no_auth_reuse_proxy_nonce] [-ssl_key_file $file] [-ssl_cert_file
       $file]  [-ssl_cert_passwd $pass] [-from $email] [-send_from/-nosend_from] [-identity $str]
       [-auto_referer/-noauto_referer] [-referer/-noreferer]  [-alang  $list]  [-acharset  $list]
       [-retry  $nr]  [-nregets  $nr]  [-nredirs $nr] [-rollback $nr] [-sleep $nr] [-timeout $nr]
       [-preserve_time/-nopreserve_time]                        [-preserve_perm/-nopreserve_perm]
       [-preserve_slinks/-nopreserve_slinks]   [-bufsize   $nr]  [-maxrate  $nr]  [-minrate  $nr]
       [-user_condition     $str]     [-cookie_file     $file]      [-cookie_send/-nocookie_send]
       [-cookie_recv/-nocookie_recv]    [-cookie_update/-nocookie_update]    [-cookies_max   $nr]
       [-disabled_cookie_domains      $list]       [-disable_html_tag       $TAG,[$ATTRIB][;...]]
       [-enable_html_tag  $TAG,[$ATTRIB][;...]]   [-tr_del_chr  $str]  [-tr_str_str  $str1 $str2]
       [-tr_chr_chr   $chrset1   $chrset2]   [-index_name   $str]   [-store_index/-nostore_index]
       [-store_name  $str]  [-debug/-nodebug]  [-debug_level  $level] [-browser $str] [-urls_file
       $file] [-file_quota  $nr]  [-trans_quota  $nr]  [-fs_quota  $nr]  [-enable_js/-disable_js]
       [-fnrules    $t   $m   $r]   [-store_info/-nostore_info]   [-all_to_local/-noall_to_local]
       [-sel_to_local/-nosel_to_local]     [-all_to_remote/-noall_to_remote]      [-url_strategie
       $strategie]     [-remove_adv/-noremove_adv]    [-adv_re    $RE]    [-check_bg/-nocheck_bg]
       [-send_if_range/-nosend_if_range] [-sched_cmd $str] [-unique_log/-nounique_log] [-post_cmd
       $str]  [-ssl_version  $v] [-unique_sslid/-nounique_sslid] [-aip_pattern $re] [-dip_pattern
       $re]  [-use_http11/-nouse_http11]  [-local_ip  $addr]  [-request  $req]  [-formdata  $req]
       [-httpad  $str]  [-nthreads  $nr]  [-immesg/-noimmesg]  [-dumpfd  $nr]  [-dump_urlfd  $nr]
       [-unique_name/-nounique_name]           [-leave_site_enter_dir/-dont_leave_site_enter_dir]
       [-max_time        $nr]        [-del_after/-nodel_after]        [-singlepage/-nosinglepage]
       [-dump_after/-nodump_after]  [-dump_response/-nodump_response]  [-auth_ntlm_domain   $str]
       [-auth_proxy_ntlm_domain      $str]      [-js_pattern      $re]     [-follow_cmd     $str]
       [-retrieve_symlink/-noretrieve_symlink] [-js_transform $p $t $h $a] [-js_transform2 $p  $t
       $h  $a] [-ftp_proxy_user $str] [-ftp_proxy_pass $str] [-limit_inlines/-dont_limit_inlines]
       [-ftp_list_options               $str]               [-fix_wuftpd_list/-nofix_wuftpd_list]
       [-post_update/-nopost_update] [-info_dir $dir] [-mozcache_dir $dir] [-aport $list] [-dport
       $list]  [-hack_add_index/-nohack_add_index]  [-default_prefix  $str]   [-rsleep/-norsleep]
       [-ftp_login_handshake  $host  $handshake] [-js_script_file $file] [-dont_touch_url_pattern
       $pat] [-dont_touch_url_rpattern $pat] [-dont_touch_tag_rpattern $pat]  [-tag_pattern  $tag
       $attrib     $url]    [-tag_rpattern    $tag    $attrib    $url]    [-nss_cert_dir    $dir]
       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]
       [-nss_domestic_policy/-nss_export_policy]   [-[no]verify]  [-tlogfile  $file]  [-trelative
       {object | program}] [-transparent_proxy FQDN[:port]] [-transparent_ssl_proxy  FQDN[:port]]
       [-sdemo] [-noencode] [URLs]

       pavuk -mode {normal | singlepage | singlereget} [-base_level $nr]

       pavuk -mode sync [-ddays $nr] [-subdir $dir] [-remove_old/-noremove_old]

       pavuk -mode resumeregets [-subdir $dir]

       pavuk  -mode  linkupdate  [-X]  [-h]  [-v]  [-cdir  $dir]  [-subdir  $dir]  [-scndir $dir]
       [-scenario $str]

       pavuk -mode reminder [-remind_cmd $str]

       pavuk      -mode       mirror       [-subdir       $dir]       [-remove_old/-noremove_old]
       [-remove_before_store/-noremove_before_store] [-always_mdtm/-noalways_mdtm]

DESCRIPTION

       This  manual  page  describes  how  to  use pavuk. Pavuk can be used to mirror contents of
       internet/intranet servers and to maintain copies in a  local  tree  of  documents.   Pavuk
       stores  retrieved  documents in locally mapped disk space. The structure of the local tree
       is the same as the one on the remote server. Each supported service (protocol) has its own
       subdirectory  in the local tree.  Each referenced server has its own subdirectory in these
       protocols subdirectories; followed by the  port  number  on  which  the  service  resides,
       delimited  by  character  can  be  be changed. With the option -fnrules you can change the
       default layout of the local document tree, without losing link consistency.
       With pavuk it is possible to have up-to-date copies of remote documents in the local  disk
       space.
       As  of  version  0.3pl2,  pavuk  can  automatically  restart broken connections, and reget
       partial content from an FTP server (which must support the REST command), from a  properly
       configured HTTP/1.1 server, or from a HTTP/1.0 server which supports Ranges.
       As  of  version  0.6 it is possible to handle configurations via so called scenarios.  The
       best way to create such a configuration file is to use the X Window interface  and  simply
       save the created configuration. The other way is to use the -dumpscn switch.
       As of version 0.7pl1 it is possible to store authentification information into an authinfo
       file, which pavuk can then parse and use.
       As of version 0.8pl4 pavuk can fetch documents for  use  in  a  local  proxy/cache  server
       without storing them to local documents tree.
       As  of  version  0.9pl4  pavuk  supports  SOCKS  (4/5)  proxies  if  you have the required
       libraries.
       As of version 0.9pl12 pavuk can preserve permissions of remote files and  symbolic  links,
       so it can be used for powerful FTP mirroring.
       Pavuk  supports  SSL  connections  to  FTP  servers, if you specify ftps:// URL instead of
       ftp://.
       Pavuk can automatically handle file names with unsafe characters for filesystem.  This  is
       yet implemented only for Win32 platform and it is hard coded.
       Pavuk  can  now  use  HTTP/1.1  protocol  for communication with HTTP servers.  It can use
       persistent connections, so one TCP connection should be used to transfer several documents
       without  closing  it.  This  feature  saves  network  bandwidth  and  also speedup network
       communication.
       Pavuk can do configurable POST requests to HTTP servers and support  also  file  uploading
       via HTTP POST request.
       Pavuk  can  automatically  fill  found HTML forms, if user will supply data for its fields
       before with option -formdata.
       Pavuk can run  configurable  number  of  concurrently  running  downloading  threads  when
       compiled with multithreading support.

Format of supported URLs

       HTTP
       http://[[user][:password]@]host[:port][/document]
       [[user][:password]@]host[:port][/document]

       HTTPS
       https://[[user][:password]@]host[:port][/document]
       ssl[.domain][:port][/document]

       FTP
       ftp://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftp://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftp[.domain][:port][/document][;type=x]

       FTPS
       ftps://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftps://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftps[.domain][:port][/document][;type=x]

       Gopher
       gopher://host[:port][/type[document]]
       gopher[.domain][:port][/type[document]]

Default mapping of URLs to local filenames

       HTTP
       http://[[user][:password]@]host[:port][/document][?query]
       to
       http/host_port/[document][?query]

       HTTPS
       https://[[user][:password]@]host[:port][/document][?query]
       to
       https/host_port/[document][?query]

       FTP
       ftp://[[user][:password]@]host[:port][/path]
       to
       ftp/host_port/[path]

       FTPS
       ftps://[[user][:password]@]host[:port][/path]
       to
       ftps/host_port/[path]

       Gopher
       gopher://host[:port][/type[document]]
       to
       gopher/host_port/[type[document]]

       NOTE: Pavuk will use the string with which it queries the target server as the name of the
       results file. This file name may, in some cases, contain punctuations such as $,?,=,& etc.
       Such  punctuation  can  cause problems when you are trying to browse downloaded files with
       your browser or you are trying to process downloaded files  with  shell  scripts  or  view
       files with file management utilities which reference the name of the results file.  If you
       believe that this maybe causing problems for you, then you can remove all punctuation from
       the  result  file  name  with  the option: -tr_del_chr [:punct:] or with other options for
       adjusting filenames.

OPTIONS

        All options are case insensitive.

List of options chapters

       Mode
       Help
       Indicate/Logging/Interface options
       Netli options
       Special start
       Scenario/Task options
       Directory options
       Preserve options
       Proxy options
       Proxy Authentification
       Protocol/Download Option
       Authentification
       Site/Domain/Port Limitation Options
       Limitation Document properties
       Limitation Document name
       Limitation Protocol Option
       Other Limitation Options
       Javascript support
       Cookie
       HTML rewriting engine tuning options
       Filename/URL Conversion Option
       Other Options

Mode

       -mode {normal, linkupdate, sync, singlepage, singlereget, resumeregets}
              Set operation mode.
              normal - retrieves recursive documents
              linkupdate - update remote URLs in local HTML documents to local URLs if these URLs
              exist in the local tree
              sync  - synchronize remote documents with local tree (if a local copy of a document
              is older than remote, the document is retrieved again, otherwise nothing happens)
              singlepage - URL is retrieved as one page with all inline objects  (picture,  sound
              ...)  this mode is now obsoleted by -singlepage option.
              resumeregets  -  pavuk scans the local tree for files that were not retrieved fully
              and retrieves them again (uses partial get if possible)
              singlereget - get URL until it is retrieved in full
              dontstore - transfer page from server, but don't store it to the local tree.   This
              mode is suitable for fetching pages that are held in a local proxy/cache server.
              reminder - used to inform the user about changed documents
              ftpdir - used to list of contents of FTP directories

              default operation mode is normal mode.

Help

       -h     Print long verbose help message

       -v     Show version informations and configuration at compilation time.

Indicate/Logging/Interface options

       -quiet Don't show any messages on the screen.

       -verbose
              Force to show output messages on the screen (default)

       -progress/-noprogress
              Show retrieving progress while running in the terminal (default is progress off)

       -stime/-nostime
              Show start and end time of transfer. (default isn't this information shown)

       -xmaxlog $nr
              Maximum  number  of log lines in the Log widget. 0 means unlimited.  This option is
              available only when compiled with the GTK+ GUI. (default value is 0)

       -logfile $file
              File where all produced messages are stored.

       -unique_log/-nounique_log
              When logfile as specified with the option  -logfile  is  already  used  by  another
              process,  try to generate new unique name for the log file. (default is this option
              turned off)

       -slogfile $file
              File to store short logs in. This  file  contains  one  line  of  informations  per
              processed document.  This is meant to be used in connection with any sort of script
              to produce some statistics, for validating links on your website, or for generating
              simple  sitemaps.  Multiple pavuk processes can use this file concurrently, without
              overwriting each others entries.  Record structure:

              - PID of pavuk process
              - TIME current time
              - COUNTER in the format current/total number of URLs
              - STATUS contains the type of the error: FATAL, ERR,
                WARN or OK
              - ERRCODE is the number code of the error
                (see errcode.h in pavuk sources)
              - URL of the document
              - PARENTURL first parent document of this URL
                (when it doesn't have parent - [none])
              - FILENAME is the name of the local file the
                document is saved under
              - SIZE size of requested document if known
              - DOWNLOAD_TIME time which takes downloading of this
                document in format seconds.mili_seconds
              - HTTPRESP contains the first line of the HTTP server
                response

       -language $str
              Native language that pavuk should use for communication with its user  (works  only
              when there is a message catalog for that language) GNU gettext support (for message
              internationalization) must also be compiled in. Default language is taken from your
              NLS environment variables.

       -gui_font $font
              Font used in the GUI interface. To list available X fonts use the xlsfonts command.
              This option is available only when compiled with GTK+ GUI support.

Netli options

       -[no]read_css
              Enable or disable fetching objects mentioned in style sheets.

       -[no]verify
              Enable or disable verifying server CERTS in SSL mode.

       -tlogfile $file
              Turn on Netli logging with output to specified file.

       -trelative {object | program}
              Make Netli timings relative to the start of the first object or the program.

       -transparent_proxy FQDN[:port]
              When processing URL, send the original, but send it to the IP address at FQDN

       -transparent_ssl_proxy FQDN[:port]
              When processing HTTPS URL, send the original, but send it to the IP address at FQDN

       -sdemo Output in sdemo compatible format. This is only used by sdemo. (For now  it  simply
              means output '-1' rather than '*'  when measurements are invalid.)

       -noencode
              Do not escape characters that are "unsafe" in URLS.

Special start

       -X     Start  program  with X Window interface (if compiled with support for GTK+).  Pavuk
              as default starts without GUI, and behaves as regular commandline tool.

       -runX  When used together with the -X option, pavuk starts processing of URLs  immediately
              after  the  GUI  window is launched. Without the -X given, this option doesn't have
              any effect.  Only available when compiled with GTK+ support .

       -bg/-nobg
              This option allows pavuk to detach from its terminal and run  in  background  mode.
              Pavuk  will  not  output  any  messages  to  the  terminal than. If you want to see
              messages, you have to use the -log_file option to specify  a  file  where  messages
              will be written.  Default pavuk executes at foreground.

       -check_bg/-nocheck_bg
              Normally,  programs sent into the background after being run in foreground continue
              to output messages to the terminal.  If this option is activated, pavuk  checks  if
              it  is running as background job and will not write any messages to the terminal in
              this case. After it becomes a foreground job again, it will start writing  messages
              to  terminal  in  the  normal  way.  This option is available only when your system
              supports retrieving of terminal info via tc*() functions.

       -prefs/-noprefs
              When you turn this option on, pavuk will preserve all settings  when  exiting,  and
              when  you  run  pavuk with GUI interface again, all settings will be restored.  The
              settings will be stored in the ~./pavuk_prefs file. Default pavuk want restore  its
              option when started.  This option is available only when compiled with GTK+.

       -schedule $time
              Execute pavuk at the time specified as parameter. The Format of the $time parameter
              is YYYY.MM.DD.hh.mm.  You need a properly configured scheduling with the at command
              on  your  system  for  using  this  option.   If default configuration (at -f %f %t
              %d.%m.%Y) of scheduling command won't work on your system, try to  adjust  it  with
              -sched_cmd option.

       -reschedule $nr
              Execute  pavuk  periodically  with  $nr hours period.  You need properly configured
              scheduling with the at command on your system for using this option.

       -sched_cmd $str
              Command to use for scheduling. Pavuk explicitly supports scheduling  with  at  $str
              should  contain  regular  characters and macros, escaped by % character.  Supported
              macros are:
                 %f
                  - for script filename
                 %t
                  - for time (in format HH:MM)
                  - all macros as supported by the strftime() function

       -urls_file $file
              If you use  this  option,  pavuk  will  read  URLs  from  $file  before  it  starts
              processing.   In this file, each URL needs to be on a separate line. After the last
              URL, a single dot . followed by a LF (line-feed) character denotes the end.   Pavuk
              will  start  processing  right after all URLs have been read.  If $file is given as
              the - character, standard input will be read.

       -store_info/-nostore_info
              This option causes pavuk to store information about each document into  a  separate
              file in the .pavuk_info directory. This file is used to store the original URL from
              which the document was downloaded. For files that are downloaded via HTTP or  HTTPS
              protocols,  the whole HTTP response header is stored there. I recommend to use this
              option when you are using options that change  the  default  layout  of  the  local
              document  tree, because this info file helps pavuk to map the local filename to the
              URL. This option is also very useful when different URLs have the same filename  in
              the  local tree. When this occurs, pavuk detects this using info files, and it will
              prefix the local name with numbers. At default is disabled storing  of  this  extra
              informations.

       -info_dir $dir
              You  can set with this option location of separate directory for storing info files
              created when -store_info option is used. This is useful when you don't want to  mix
              in  destination directory the info files with regular document files. The structure
              of the info files is preserved, just are stored in different directory.

       -request $req
              With this option you can specify extended informations  for  starting  URLs.   With
              this  option  you  can specify query data for POST or GET .  Current syntax of this
              option  is  :   URL:["]$url["]   [METHOD:["]{GET|POST}["]]   [ENCODING:["]{u|m}["]]
              [FIELD:["]variable=value["]]                          [FILE:["]variable=filename["]
              [LNAME:["]local_filename["]]

              - URL: specifies request URL
              - METHOD: specifies request method for URL and is
                one of GET or POST.
              - ENCODING: specifies encoding for request body data.
                  m is for multipart/form-data encoding
                  u is for application/x-www-form-urlencoded
                  encoding
              - FIELD: specifies field of request data in format
                  variable=value. For encoding of special characters
                  in variable and value you can use same encoding
                  as is used in application/x-www-form-urlencoded
                  encoding.
              - FILE: specifies special field of query, which is
                  used to specify file for POST based file upload.
              - LNAME: specifies localname for this request
       When you need to use inside the FIELD: and FILE: fields of request  specification  special
       characters,  you  should use the application/x-www-form-urlencoded encoding of characters.
       It means all nonASCII characters, quote character ("),  space  character  (  ),  ampersand
       character (&), percent character (%) and equal character (=) should be encoded in form %xx
       where xx is hexadecimal representation of ASCII value  of  character.  So  for  example  %
       character should be encoded like %25.

       -formdata $req
              This  option  gives  you  chance  to  specify  contents for HTML forms found during
              traversing document tree.
               Current syntax of this option is same as for -request option,  but  ENCODING:  and
              METHOD: are meaningless in this option semantics.
               In  URL:  you  have to specify HTML form action URL, which will be matched against
              action URLs found in processed HTML documents. If  pavuk  finds  action  URL  which
              matches that supplied in -formdata option, pavuk will construct GET or POST request
              from data supplied in this option and from default form field  values  supplied  in
              HTML  document. Values supplied on commandline have precedence before that supplied
              in HTML file.

       -nthreads $nr
              By means of this option you can specify how many concurrent threads  will  download
              documents. Default pavuk executes 3 concurrent downloading threads.  This option is
              available only when pavuk is compiled to support multithreading.

       -immesg/-noimmesg
              Default pavuks behavior when running multiple downloading threads is to buffer  all
              output  messages  in  memory  buffer  and flush that buffered data just when thread
              finishes processing of one document. With this option you can change this  behavior
              to  see  the  messages  immediately when it is produced. It is only usable when you
              want to  debug  some  specials  in  multithreading  environment.   This  option  is
              available only when pavuk is compiled to support multithreading.

       -dumpfd $nr
              For  scripting is sometimes usable to be able to download document directly to pipe
              or variable instead of storing it to regular file. In such case you  can  use  this
              option to dump data for example to stdout ($nr = 1).

       -dump_after/-nodump_after
              While  using -dumpfd option in multithreaded pavuk, it is required to dump document
              in one moment because documents downloaded in multiple threads  can  overlap.  This
              option  is  also  useful  when  you want to dump document after pavuk adjusts links
              inside HTML documents.

       -dump_response/-nodump_response
              This option have effect only when used with -dumpfd option. It is used to dump HTTP
              response headers.

       -dump_urlfd $nr
              When  you  will use this option, pavuk will output all URLs found in HTML documents
              to file descriptor $nr. You can use this option to extract and convert all URLs  to
              absolute.

Scenario/Task options

       -scenario $str
              Name  of  scenario to load and/or run. Scenarios are files with a structure similar
              to the .pavukrc file.  Scenarios contain saved configurations. You can use  it  for
              periodical  mirroring.  Parameters from scenarios specified at the command line can
              be overwritten by command line parameters.  To be able to use this option, you need
              to specify scenario base directory with option -scndir.

       -dumpscn $filename
              Store  actual configuration into scenario file with name $filename.  This is useful
              to quickly create pre-configured scenarios for manual editing.

Directory options

       -msgcat $dir
              Directory which contains the message  catalog  for  pavuk.   If  you  do  not  have
              permission  to  store  a  pavuk message catalog in the system directory, you should
              simply create similar structure of directories in your home directory as it  is  on
              your system.

              For example:

              Your native language is German, and your home directory is /home/jano.

              You  should  at first create the directory /home/jano/locales/de/LC_MESSAGES/, then
              put the German pavuk.mo there and set -msgcat to /home/jano/locales/.  If you  have
              properly  set  locale environment values, you will see pavuk speaking German.  This
              option is available only when you compiled in  support  for  GNU  gettext  messages
              internationalization.

       -cdir $dir
              Directory  where  are  all  retrieved  documents  are stored. If not specified, the
              current directory is used. If the specified directory doesn't  exist,  it  will  be
              created.

       -scndir $dir
              Directory  in  which  your scenarios are stored.  You must use this option when you
              are loading or storing scenario files.

Preserve options

       -preserve_time/-nopreserve_time
              Store downloaded document with same  modification  time  as  on  the  remote  site.
              Modification  time  will  be  set only when such information is available (some FTP
              servers do not support the MDTM command, and some documents  on  HTTP  servers  are
              created online so pavuk can't retrieve the modification time of this document).  At
              default modification time of documents isn't preserved.

       -preserve_perm/-nopreserve_perm
              Store downloaded document with the same permissions as on the  remote  site.   This
              option  has  effect  only  when downloading a file through FTP protocol and assumes
              that the -ftplist option is used. At default permissions are not preserved.

       -preserve_slinks/-nopreserve_slinks
              Set symbolic links to point exactly to same location as on the remote server; don't
              do  any relocations.  This option has effect only when downloading file through FTP
              protocol and assumes that the -ftplist option is used.  Default symbolic links  are
              not  preserved, and are retrieved as regular documents with full contents of linked
              file.

              For example, assume that on the FTP server ftp.xx.org  there  is  a  symbolic  link
              /pub/pavuk/pavuk-current.tgz,  which  points  to /tmp/pub/pavuk-0.9pl11.tgz.  Pavuk
              will create symbolic link ftp/ftp.xx.org_21/pub/pavuk/pavuk-current.tgz
              if  option  -preserve_slinks  will  be  used  this  symbolic  link  will  point  to
              /tmp/pub/pavuk-0.9pl11.tgz
              if option -preserve_slinks want be used, this symbolic link will point to
               ../../tmp/pub/pavuk-0.9pl11.tgz

       -retrieve_symlink/-noretrieve_symlink
              Retrieve files behind symbolic links instead of replicating symlinks in local tree.

Proxy options

       -http_proxy $site[:$port]
              If  this  parameter  is  used,  then all HTTP requests are going through this proxy
              server. This is useful if your site resides behind a firewall, or if  you  want  to
              use  a  HTTP proxy cache server. The default port number is 8080.  Pavuk allows you
              to specify multiple HTTP proxies (using multiple -http_proxy options) and  it  will
              rotate proxies with roundrobin priority disabling proxies with errors.

       -nocache/-cache
              Use  this  option  whenever you want to get the document directly from the site and
              not from your HTTP proxy cache server. Default pavuk allows  transfer  of  document
              copies from cache.

       -ftp_proxy $site[:$port]
              If  this  parameter  is  used,  then  all FTP requests are going through this proxy
              server.  This is useful when your site resides behind a firewall, or if you want to
              use  FTP  proxy cache server.  The default port number is 22.  Pavuk supports three
              different types of proxies for FTP, see the options  -ftp_httpgw,  -ftp_dirtyproxy.
              If  none  of  the mentioned options is used, then pavuk assumes a regular FTP proxy
              with USER user@host connecting to remote FTP server.

       -ftp_httpgw/-noftp_httpgw
              The specified FTP proxy is a HTTP gateway for the FTP protocol. Default  FTP  proxy
              is regular FTP proxy.

       -ftp_dirtyproxy/-noftp_dirtyproxy
              The  specified  FTP  proxy  is a HTTP proxy which supports a CONNECT request (pavuk
              should use full FTP protocol, except of  active  data  connections).   Default  FTP
              proxy is regular FTP proxy.  If both -ftp_dirtyproxy and -ftp_httpgw are specified,
              -ftp_dirtyproxy is preferred.

       -gopher_proxy $site[:$port]
              Gopher gateway or proxy/cache server.

       -gopher_httpgw/-nogopher_httpgw
              The specified Gopher proxy server is a HTTP  gateway  for  Gopher  protocol.   When
              -gopher_proxy  is  set  and  this  -gopher_httpgw option isn't used, pavuk is using
              proxy as HTTP tunnel with CONNECT request to open connections to Gopher servers.

       -ssl_proxy $site[:$port]
              SSL proxy (tunneling) server [as that in CERN httpd  +  patch  or  in  Squid]  with
              enabled  CONNECT request (at least on port 443). This option is available only when
              compiled  with  SSL  support  (you  need  the  SSleay  or  OpenSSL  libraries  with
              development headers)

Proxy Authentification

       -http_proxy_user $user
              Username for HTTP proxy authentification.

       -http_proxy_pass $pass
              Password for HTTP proxy authentification.

       -http_proxy_auth {1/2/3/4/user/Basic/Digest/NTLM}
              Authentification  scheme  for  proxy  access.  Similar  meaning as the -auth_scheme
              option (see help for this option for more details).  Default is 2 (Basic scheme).

       -auth_proxy_ntlm_domain $str
              NT or  LM  domain  used  for  authorization  again  HTTP  proxy  server  when  NTLM
              authentification  scheme  is  required. This option is available only when compiled
              with OpenSSL or libdes libraries.

       -auth_reuse_proxy_nonce/-noauth_reuse_proxy_nonce
              When using HTTP Proxy Digest access  authentification  scheme  use  first  received
              nonce value in multiple following requests.

       -ftp_proxy_user $user
              Username for FTP proxy authentification.

       -ftp_proxy_pass $pass
              Password for FTP proxy authentification.

Protocol/Download Options

       -ftp_passive
              Uses passive ftp when downloading via ftp.

       -ftp_active
              Uses active ftp when downloading via ftp.

       -active_ftp_port_range $min:$max
              This  option  permits to specify the ports used for active ftp. This permits easier
              firewall configuration since the range of ports can be restricted.

              Pavuk will randomly choose a number from within the specified range until  an  open
              port  is  found.  Should  no open ports be found within the given range, pavuk will
              default to a normal kernel-assigned port,  and  a  message  (debug  level  net)  is
              output.

              The  port  range  selected must be in the non-privileged range (eg. greater than or
              equal to 1024); it is STRONGLY RECOMMENDED that the chosen range be large enough to
              handle  many  simultaneous  active connections (for example, 49152-65534, the IANA-
              registered ephemeral port range).

       -always_mdtm/-noalways_mdtm
              Force pavuk to always use "MDTM" to determine the file modification time and  never
              uses cached times determined when listing the remote files.

       -remove_before_store/-noremove_before_store
              Force  unlink'ing  of files before new content is stored to a file. This is helpful
              if the local files are hardlinked to some other directory and after  mirroring  the
              hardlinks are checked. All "broken" hardlinks indicate a file update.

       -retry $nr
              Set  the number of attempts to transfer processed document.  Default set to 1, this
              mean pavuk will retry once to get documents which failed on first attempt.

       -nregets $nr
              Set the number of allowed regets on a single document,  after  a  broken  transfer.
              Default value for this option is 2.

       -nredirs $nr
              Set  number  of  allowed HTTP redirects. (use this for prevention of loops) Default
              value for this option is 5, and conform to HTTP specification.

       -force_reget/-noforce_reget
              Force reget'ing of the whole document after  a  broken  transfer  when  the  server
              doesn't  support  retrieving of partial content.  Pavuk default behavior is to stop
              getting documents which don't allow restarting of transfer from specified position.

       -timeout $nr
              Timeout for stalled connections in minutes. This value is also used for  connection
              timeouts.  For  sub-minute  timeouts  you  can use floating point numbers.  Default
              timeout is 0, an that means timeout checking is disabled.

       -noRobots/-Robots
              This switch suppresses the use  of  the  robots.txt  standard,  which  is  used  to
              restrict  access  of  Web  robots  to  some locations on the web server. Default is
              allowed checking of robots.txt files on HTTP servers.  Enable  this  option  always
              when  you  are  downloading  huge  sets  of  pages with unpredictable layout.  This
              prevents you from upsetting server administrators :-).

       -noEnc/-Enc
              This switch suppresses using of gzip or compress or deflate encoding in transfer. I
              don't  know  if some servers are broken or what, but they are propagating that MIME
              type application/gzip or application/compress as encoded.  Turn  this  option  off,
              when  you doesn't have libz support compiled in and also gzip program which is used
              to decode document encoded this way.  At default is decoding of downloaded document
              disabled.

       -check_size/-nocheck_size
              The  option -nocheck_size should be used if you are trying to download pages from a
              HTTP server which sends a  wrong  Content-Length:  field  in  the  MIME  header  of
              response.   Default  pavuk  behavior  is  to  check  this  field  and complain when
              something is wrong.

       -maxrate $nr
              If you don't want to give all your transfer bandwidth to pavuk, use this option  to
              set  pavuk's  maximum transfer rate. This option accepts a floating point number to
              specify the transfer rate in kB/s. If you want get optimal settings, you also  have
              to  play  with the size of the read buffer (option -bufsize) because pavuk is doing
              flow control only at application level.  At default pavuk use full bandwidth.

       -minrate $nr
              If you hate slow transfer rates, this option allows you  to  break  transfers  with
              slow  speed.  You  can  set  the  minimum transfer rate, and if the connection gets
              slower than the given rate, the transfer will be stopped. The minimum transfer rate
              is given in kB/s.  At default pavuk doesn't check this limit.

       -bufsize $nr
              This  option  is  used to specify the size of the read buffer (default size: 32kB).
              If you have a very fast connection, you may increase the size of the buffer to  get
              a  better read performance. If you need to decrease the transfer rate, you may need
              to decrease the size of the buffer and set  the  maximum  transfer  rate  with  the
              -maxrate option. This option accepts the size of the buffer in kB.

       -fs_quota $nr
              If  you  are  running pavuk on a multiuser system, you may need to avoid filling up
              your file system. This option lets you specify how many space must remain free.  If
              pavuk  detects  an  underrun  of  the  free  space, it will stop downloading files.
              Specify this quota in kB. Default value is 0, and that mean  no  checking  of  this
              quota.

       -file_quota $nr
              This  option is useful when you want to limit downloading of big files, but want to
              download at least $nr kilobytes from big files.  A big file  will  be  transferred,
              and  when it reaches the specified size, transfer will break. Such document will be
              processed as properly downloaded, so be careful when using this option.  At default
              pavuk is transferring full size of documents.

       -trans_quota $nr
              If  you  are aware that your selection should address a big amount of data, you can
              use this option to limit the amount  of  transferred  data.   Default  is  by  size
              unlimited transfer.

       -max_time $nr
              Set maximum amount of time for program run. After time is exceeded, pavuk will stop
              downloading. Time is specified in  minutes.  Default  value  is  0,  and  it  means
              downloading time is not limited.

       -url_strategy $strategy
              This  option  allows  you to specify a downloading order for URLs in document tree.
              This option accepts the following strings as parameters :

              level - will order URLs as it loads it from HTML files (default)
              leveli - as previous, but inline objects URLs come first
              pre - will insert URLs from actual HTML document at start, before other
              prei - as previous, but inline objects URLs come first

       -send_if_range/-nosend_if_range
              Send If-Range: header in  HTTP  request.  I  found  out,  that  some  HTTP  servers
              (greetings,  MS  :-)) are sending different ETag: fields in different responses for
              the same, unchanged document. This causes problems when pavuk attempts to  reget  a
              document  from such a server: pavuk will remember the old ETag value and uses it it
              following requests for this document.  If the server checks it with  the  new  ETag
              value  and  it differs, it will refuse to send only part of the document, and start
              the download from scratch.

       -ssl_version $v
              Set required SSL protocol version for SSL communication.  $v is one of ssl2, ssl23,
              ssl3  or  tls1.   This  option  is  available  only when compiled with SSL support.
              Default is ssl23.

       -unique_sslid/-nounique_sslid
              This option can be used if you want to use a unique SSL ID for  all  SSL  sessions.
              Default  pavuk  behavior  is  to  negotiate  each  time  new  session  ID  for each
              connection.  This option is available only when compiled with SSL support.

       -use_http11/-nouse_http11
              This option is used to switch between HTTP/1.0 and HTTP/1.1 protocol used with HTTP
              servers.  Now  is using of HTTP/1.1 protocol not default because its implementation
              is very fresh  and  not  100%  tested.  Even  though  using  of  HTTP/1.1  is  very
              recommended, because it is faster than HTTP/1.0 and uses less network bandwidth for
              initiating connections. In any further version I will activate using of HTTP/1.1 as
              default.

       -local_ip $addr
              You  can  use  this  option  when  you  want to use specified network interface for
              communication with other hosts. This option is suitable for multihomed  hosts  with
              several  network  interfaces. Address should be entered as regular IP address or as
              host name.

       -identity $str
              This option allows you to specify content of User-Agent:  field  of  HTTP  request.
              This  is  usable,  when scripts on remote server returns different document on same
              URL for different browsers, or if some HTTP server refuse to serve document for Web
              robots like pavuk. Default pavuk sends in User-Agent: field pavuk/$VERSION string.

       -auto_referer/-noauto_referer
              This  option  forces  pavuk  to send HTTP Referer: header field with starting URLs.
              Content of this field will be self URL. Using this option is required, when  remote
              server  checks  the Referer: field.  At default pavuk wont send Referer: field with
              starting URLs.

       -referer/-noreferer
              This option allows to enable and disable the transmission of HTTP  Referer:  header
              field. At default pavuk sends Referer: field.

       -httpad $str
              In some cases you may want to add user defined fields to HTTP/HTTPS requests.  This
              option is exactly for this purpose. In $str you can  directly  specify  content  of
              additional  header.  If  you  specify  only  raw  header,  it will be used only for
              starting requests. When you want  to  use  this  header  with  each  request  while
              crawling, prefix the header with + character.

       -del_after/-nodel_after
              This  option  allows  you  to  delete  FILES  from  REMOTE server, when download is
              properly finished. At default is this option off.

       -FTPlist/-noFTPlist
              When option -FTPlist will be used, pavuk will retrieve content of  FTP  directories
              with  FTP  command  LIST  instead of NLST. So the same listing will be retrieved as
              with "ls -l" UNIX command.  This  option  is  required  if  you  need  to  preserve
              permissions of remote files or you need to preserve symbolic links.  Pavuk supports
              wide listing on FTP servers with regular  BSD  or  SYSV  style  "ls  -l"  directory
              listing,  on  FTP  servers with EPFL listing format, VMS style listing, DOS/Windows
              style listing and Novel listing format.  Default pavuk behavior is to use NLST  fro
              FTP directory listings.

       -ftp_list_options $str
              Some  FTP  servers  require to supply extra options to LIST or NLST FTP commands to
              show all files and directories properly. But be sure not to use any  extra  options
              which  can  reformat  output  of  the listing. Useful is especially -a option which
              force FTP server to show also dot files  and  directories  and  with  broken  WuFTP
              servers it also helps to produce full directory listings not just files.

       -fix_wuftpd/-nofix_wuftpd
              This  option  is  result  of  several  attempts  to  to  get  working  properly the
              -remove_old option with WuFTPd server when -ftplist option is used. The problem  is
              that  FTP  command  LIST  on  WuFTPd  don't  mind  when  trying to list nonexisting
              directory, and indicates success in FTP response  code.   When  you  activate  this
              option,  pavuk  uses extra FTP command (STAT -d dir) to check whether the directory
              really exists. Don't use this option until you are sure that you really need it!

Authentification

       -auth_file $file
              File where you have stored authentification information for access to some service.
              For file structure see below in FILES section.

       -auth_name $user
              If  you  are using this parameter, program is doing authentification with each HTTP
              access to document. Use this only if you know that only one HTTP  server  could  be
              accessed  or  use  -asite option to specify site to which you use authentification.
              Else your auth parameters will be sent to each accessed HTTP server.

       -auth_passwd $passwd
              Value of this parameter is used as password for authentification

       -auth_scheme {1/2/3/4/user/Basic/Digest/NTLM}
              This parameter specifies used authentification scheme.
              1 or user means user authentification scheme is used  as  defined  in  HTTP/1.0  or
              HTTP/1.1.  Password and user name are sent unencoded.
              2  or  Basic  means  Basic  authentification scheme is used as defined in HTTP/1.0.
              Password and user name are sent BASE64 encoded.
              3 or Digest means Digest access authentification scheme based on MD5  checksums  as
              defined in RFC2069.
              4  or  NTLM means NTLM proprietary access authentification scheme used by Microsoft
              IIS or Proxy servers.  When you use this scheme, you must also  specify  NT  or  LM
              domain  with  option -auth_ntlm_domain. This scheme is supported only when compiled
              with OpenSSL or libdes libraries.

       -auth_ntlm_domain $str
              NT or LM domain used for authorization again HTTP server when NTLM authentification
              scheme  is  required.  This  option is available only when compiled with OpenSSL or
              libdes libraries.

       -auth_reuse_nonce/-noauth_reuse_nonce
              While using HTTP Digest access authentification scheme  use  first  received  nonce
              value in more following requests.  Default pavuk negotiates nonce for each request.

       -ssl_key_file $file
              File  with  public  key  for  SSL  certificate  (learn  more from SSLeay or OpenSSL
              documentation) This option is available only when compiled with  SSL  support  (you
              need SSleay or OpenSSL libraries and development headers)

       -ssl_cert_file $file
              Certificate  file  in  PEM format (learn more from SSLeay or OpenSSL documentation)
              This option is available only when compiled with SSL support (you  need  SSleay  or
              OpenSSL libraries and development headers)

       -ssl_cer_passwd $str
              Password   used  to  generate  certificate  (learn  more  from  SSLeay  or  OpenSSL
              documentation) This option is available only when compiled with  SSL  support  (you
              need SSLeay or OpenSSL libraries and development headers)

       -nss_cert_dir $dir
              Config  directory  for  NSS  (Netscape  SSL  implementation)  certificates. Usually
              ~/.netscape (created by Netscape communicator/navigator) or profile directory below
              ~/.mozilla  (created by Mozilla browser). The directory should contain cert7.db and
              key3.db files. If you don't use Mozilla nor Netscape, you must create this files by
              utilities  distributed  with  NSS  libraries. Pavuk opens certificate database only
              readonly.  This option is available only when pavuk is compiled  with  SSL  support
              provided by Netscape NSS SSL implementation.

       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]
              By  default  will  pavuk  reject  connection to SSL server which certificate is not
              stored in local certificate database  (set  by  -nss_cert_dir  option).   You  must
              explicitly  force  pavuk  to allow connection to servers with unknown certificates.
              This option is available only when pavuk is compiled with SSL support  provided  by
              Netscape NSS SSL implementation.

       [-nss_domestic_policy/-nss_export_policy]
              Selects  sets  of  ciphers  allowed/disabled  by  USA export rules.  This option is
              available only when pavuk is compiled with SSL support provided by Netscape NSS SSL
              implementation.

       -from $email
              This  parameter  is  used  when  accessing  anonymous  FTP server as password or is
              optionally inserted into From  field  in  HTTP  request.  If  not  specified  pavuk
              discovers this from USER environment variable and from site hostname.

       -send_from/-nosend_from
              This  option  is  used  for  enabling  or disabling sending of user identification,
              entered in -from option, as FTP anonymous user password and  From:  field  of  HTTP
              request.  As default is this option off.

       -ftp_login_handshake $host $handshake
              When  you  need to use nonstandard login procedure for some of FTP servers, you can
              use this option to change default pavuk login procedure. To allow more flexibility,
              you  can  assign  the  login  procedure  to  some  server  or to all. When $host is
              specified as empty string (""), than attached login procedure is  assigned  to  all
              FTP  servers  besides those having assigned own login procedures. In the $handshake
              parameter you can specify exact login procedure specified by FTP commands  followed
              by expected FTP response codes delimited with backslash (\) characters.
              For  example  this  is  default  login procedure when logging in regular ftp server
              without going through proxy  server  :  USER  %u\331\PASS  %p\230.  There  are  two
              commands  followed  by  two  response  codes.  After USER command pavuk expects FTP
              response code 331 and after PASS command pavuk expects  from  server  FTP  response
              code  230.  In  ftp commands you can use following macros which will be replaced by
              respective values:

               %u - user name used to access FTP server
               %p - password used to access FTP server
               %U - user name used to access FTP proxy server
               %P - password used to access FTP proxy server
               %h - hostname of FTP server
               %s - port number on which FTP server listens

Site/Domain/Port Limitation Options

       -asite $list
              Specify comma separated list of allowed sites on  which  referenced  documents  are
              stored.

       -dsite $list
              Specify  comma  separated list of disallowed sites.  Previous parameter is opposite
              to this one. If both are used the last occurrence of them is used to be valid.

       -adomain $list
              Specify comma separated list of allowed domains on which referenced  documents  are
              stored.

       -ddomain $list
              Specify  comma separated list of disallowed domains. Previous parameter is opposite
              to this one. If both are used the last occurrence of them is used to be valid.

       -aport $list
              In $list, you can write comma separated list of  ports  from  which  you  allow  to
              download documents.

       -dport $list
              This  option  is  opposite  option to previous option. It is used to specify denied
              ports. If both -aport and -dport options are used the last occurrence  of  them  is
              used to be valid and all other occurrences will be omitted.

Limitation Document properties

       -amimet $list
              List  of  comma  separated  allowed  MIME  types. You can use with this option also
              wildcard patterns.

       -dmimet $list
              List of comma separated disallowed MIME types. You can use with  this  option  also
              wildcard  patterns.   Previous  parameter is opposite to this one. If both are used
              the last occurrence of them is used to be valid.

       -maxsize $nr
              Maximum allowed size of document.  This option is applied only when pavuk  is  able
              to  detect  the  document before starting the transfer.  Default value is 0, and it
              means this limit isn't applied.

       -minsize $nr
              minimal allowed size of document.  This option is applied only when pavuk  is  able
              to  detect  the  document before starting the transfer.  Default value is 0, and it
              means this limit isn't applied.

       -newer_than $time
              Allow only transfer of documents with modification time  newer  than  specified  in
              parameter  $time. Format of $time is: YYYY.MM.DD.hh:mm.  To apply this option pavuk
              must be able to detect modification time of document.

       -older_than $time
              Allow only transfer of documents with modification time  older  than  specified  in
              parameter  $time. Format of $time is: YYYY.MM.DD.hh:mm.  To apply this option pavuk
              must be able to detect modification time of document.

       -noCGI/-CGI
              this switch prevents to transfer dynamically generated parametric documents through
              CGI interface. This is detected with occurrence of ? character inside URL.  Default
              pavuk behavior is to allow transfer of URLs with query strings.

       -alang $list
              this allows you to specify  ordered  comma  separated  list  of  preferred  natural
              languages.  This  option  work  only  with  HTTP  and  HTTPS protocol using Accept-
              Language: MIME field.

       -acharset $list
              This options allows you to enter comma separated  list  of  preferred  encoding  of
              transfered  documents.  This  works  only with HTTP and HTTPS urls and only if such
              document encodings are located on destination server.
              example: -acharset iso-8859-2,windows-1250,utf8

Limitation Document name

       -asfx $list
              This parameter allows you to specify set of suffixes used to restrict selection  of
              documents which will be processed.

       -dsfx $list
              Set  of  suffixes  that  are used to specify restriction on selection of documents.
              This one is inverse to previous option. They are segregating each other.

       -aprefix $list, -dprefix $list
              This two options allow you to specify set of  allowed  or  disallowed  prefixes  of
              documents. They are segregating each other.

       -pattern $pattern
              This option allows you to specify wildcard pattern for documents. All documents are
              tested if they match this pattern.

       -rpattern $reg_exp
              This is equal option as previous, but this  uses  regular  expressions.   Available
              only on platforms which have any supported RE implementation.

       -skip_pattern $pattern
              This  option  allows  you  to specify wildcard pattern for documents that should be
              skipped.  All documents are tested if they match this pattern.

       -skip_rpattern $reg_exp
              This is equal option as previous, but this  uses  regular  expressions.   Available
              only on platforms which have any supported RE implementation.

       -url_pattern $pattern
              This option allows you to specify wildcard pattern for URLs. All URLs are tested if
              they match this pattern.
              Example:
              -url_pattern http://\*.idata.sk:\*/~ondrej/\* . this option enables all  HTTP  URLs
              from domain .idata.sk on all ports which are located under /~ondrej/.

       -url_rpattern $reg_exp
              This  is  equal  option  as previous, but this uses regular expressions.  Available
              only on platforms which have any supported RE implementation.

       -skip_url_pattern $pattern
              This option allows you to specify wildcard pattern for URLs that should be skipped.
              All URLs are tested if they match this pattern.

       -skip_url_rpattern $reg_exp
              This  is  equal  option  as previous, but this uses regular expressions.  Available
              only on platforms which have any supported RE implementation.

       -aip_pattern $re
              This option allows you to limit set of transferred documents by server IP  address.
              IP  address  can  be specified as regular expressions, so it is possible to specify
              set of IP addresses by one expression.  Available only on platforms which have  any
              supported RE implementation.

       -dip_pattern $re
              This option similar to previous option, but is used to specify set of disallowed IP
              addresses.  Available only on platforms which have any supported RE implementation.

       -tag_pattern $tag $attrib $url
              More powerful version of -url_pattern option for more precise matching  of  allowed
              URLs  based  on  HTML  tag name pattern, HTML tag attribute name pattern and on URL
              pattern. You can use in all three parameters of this option wildcard patterns, thus
              something   like   -tag_pattern  '*'  '*'  url_pattern  is  equal  to  -url_pattern
              url_pattern. The $tag and $attrib parameters are  always  matched  again  uppercase
              strings.  For example if you want just let pavuk follow only regular links ignoring
              any stylesheets, images, etc., use option -tag_pattern A HREF '*'.

       -tag_rpattern $tag $attrib $url
              This is variation on the -tag_pattern.  It  uses  regular  expression  patterns  in
              parameters instead of wildcard patterns used in the previous option.

Limitation Protocol Option

       -noHTTP/-HTTP
              This  switch  suppresses  all transfers through HTTP protocol.  Default is transfer
              trough HTTP enabled.

       -noSSL/-SSL
              This switch suppresses all transfers through HTTPS  protocol  (HTTP  protocol  over
              SSL)  .   Default  is transfer trough HTTPS enabled.  This option is available only
              when  compiled  with  SSL  support  (you  need  SSleay  or  OpenSSL  libraries  and
              development headers)

       -noGopher/-Gopher
              Suppress  all  transfers  through  Gopher  Internet  protocol.  Default is transfer
              trough Gopher enabled.

       -noFTP/-FTP
              This switch prevents processing documents allocated on all FTP servers.  Default is
              transfer trough FTP enabled.

       -noFTPS/-FTPS
              This  switch  prevents  processing  documents allocated on all FTP servers accessed
              through SSL.  Default is transfer trough FTPS enabled.  This  option  is  available
              only  when  compiled  with  SSL  support  (you need SSleay or OpenSSL libraries and
              development headers)

       -FTPhtml/-noFTPhtml
              By using of option -FTPhtml you can force pavuk to process  HTML  files  downloaded
              with FTP protocol.  At default pavuk won't parse HTML files from FTP servers.

       -FTPdir/-noFTPdir
              Force  recursive  processing  of  FTP  directories  too.   At  default is recursive
              downloading from FTP servers denied.

       -disable_html_tag $TAG,[$ATTRIB][;...]
              -enable_html_tag $TAG,[$ATTRIB][;...]  Enable or disable processing  of  particular
              HTML tags or attributes.  At default all supported HTML tags are enabled.

              For  example  if  you  don't  want  to  process  all  images  you should use option
              -disable_html_tag 'IMG,SRC;INPUT,SRC;BODY,BACKGROUND' .

Other Limitation Options

       -subdir $dir
              Subdirectory  of  local  tree  directory,  to  limit  some  of  the  modes   {sync,
              resumeregets, linkupdate} in its tree scan.

       -dont_leave_site/-leave_site
              (Don't)  leave starting site. At default pavuk can span host when recursing through
              WWW tree.

       -dont_leave_dir/-leave_dir
              (Don't) leave starting directory. If -dont_leave_dir option is used pavuk will stay
              only  in  starting  directory (including its own subdirectories).  At default pavuk
              can leave starting directories.

       -leave_site_enter_dir/-dont_leave_site_enter_dir
              If you are downloading WWW tree which spans multiple hosts with huge trees, you may
              want  to  allow  downloading  of  document  which  are in directory hierarchy below
              directory which we visited as first on  each  site.  To  obtain  this,  use  option
              -dont_leave_site_enter_dir.  As  default  pavuk  will  go  also to higher directory
              levels on that site.

       -lmax $nr
              Set maximum allowed level of tree traverse. Default is set to 0,  what  means  that
              pavuk can traverse at infinitum.  As of version 0.8pl1 inline objects of HTML pages
              are placed at same level as parent HTML page.

       -leave_level $nr
              Maximum level of documents outside from site of starting URL.  Default is set to 0,
              and 0 means that checking is not applied.

       -site_level $nr
              Maximum level of sites outside from site of starting URL.  Default is set to 0, and
              0 means that checking is not applied.

       -dmax $nr
              Set maximum allowed number of documents that are processed.  Default  value  is  0.
              That means no restrictions are used in number of processed documents.

       -singlepage/-nosinglepage
              Using  option  -singlepage  allows  you  to  transfer  just HTML pages with all its
              inlined objects (pictures, sounds, frame documents, ...).  As default  is  disabled
              single page transfer. This option makes -mode singlepage option obsolete.

       -limit_inlines/-dont_limit_inlines
              With  this  option  you  can  control whether limiting options apply also to inline
              objects (pictures, sounds, ...). This is useful when you want to download specified
              set of HTML pages with all inline options without any restrictions.

       -user_condition $str
              Script  or  program  name for users own conditions.  You can write any script which
              should with exit value decide if download URL or not.  Script gets from  pavuk  any
              number of options, with this meaning :

                 -url $url - processed URL
                 -parent $url - any number of parent URLs
                 -level $nr - level of this URL from starting URL
                 -size $nr - size of requested URL
                 -date $datenr - modification time of requested URL in format YYYYMMDDhhmmss

              The  exit  status  0 of script or program means that current URL should be rejected
              and nonzero exit status means that URL should be accepted.
              Warning : use user conditions only if required because of big slowdowns  caused  by
              forking scripts for each checked URL.

       -follow_cmd $str
              This  option  allows  you to specify script or program which can by its exit status
              decide whether to follow URLs from current  HTML  document.  This  script  will  be
              called after download of each HTML document.  The script will get following options
              as it's parameters:

                 -url $url - URL of current HTML document
                 -infile $file - local file where is stored HTML document

              The exit status 0 of script or program means that URLs from current  document  will
              be  disallowed,  other  exit status means, that pavuk can follow links from current
              HTML document.

Javascript support

       Support for scripting languages like JavaScript or VBScript in pavuk  is  done  bit  hacky
       way.  There  is  no  interpreter  for  this  languages, so not all things will work. Whole
       support which pavuk have for this scripting  languages  is  based  on  regular  expression
       patterns specified by user. Pavuk search for this patterns in DOM event attributes of HTML
       tags, in javascript:... URLs,  in  inline  scripts  in  HTML  documents  enclosed  between
       <script></script>  tags and in separate javascript files.  Support for scripting languages
       is  only  available  when  pavuk  is  compiled  with  proper  regular  expression  library
       (POSIX/GNU/PCRE).

       -enable_js/-disable_js
              This  options  are used to enable or disable processing of Javascript parts of HTML
              documents. You must enable this option to be able to use processing  of  javascript
              patterns.

       -js_pattern $re
              With  this  option  you  are  specifying  what  patterns  match interested parts of
              Javascript for extracting URLs. The parameter must be RE pattern with  exactly  one
              subpattern  which match exactly the URL part. For example to match URL in following
              type of javascript expressions :
                document.b1.src='pics/button1_pre.jpg'
              you can use this pattern
                "^document.[a-zA-Z0-9_]*.src[ ]*=[ ]*'(.*)'$"

       -js_transform $p $t $h $a
              This option is similar to previous, but you can use custom transform rules for  the
              URL  parts  of  patterns and also specify the exact HTML tag and attribute where to
              look for this pattern. The $p is the  pattern  to  match  the  interested  part  of
              script.  The  $t is transform rule for the URL, in this parameter the $x parts will
              be replaced by x-th subpattern of the $p pattern. The $h parameter  is  exact  HTML
              tag  or  "*"  when this apply to javascript: URLs or DOM event attribs or "" (empty
              string) when this apply to javascript body of HTML document or  separate  JS  file.
              The  $a  parameter  is exact HTML attrib of tag or "" (empty string) when this rule
              apply to javascript body.

       -js_transform2 $p $t $h $a
              This option is very similar to previous. The meaning of  all  parameters  is  same,
              just the pattern $p can have only one substring which will be used in the transform
              rule $t. This is required to allow rewriting of URL parts of the tags and  scripts.
              This  option can also be used to force pavuk to recognize HTML targ/attribute pairs
              which pavuk does not support.

Cookie

       -cookie_file $file
              File where are stored cookie infos. This file  must  be  in  Netscape  cookie  file
              format (generated with Netscape Navigator or Communicator ...).

       -cookie_send/-nocookie_send
              Use  collected  cookies  in  HTTP/HTTPS  requests.   Pavuk will not send at default
              cookies.

       -cookie_recv/-nocookie_recv
              Store received cookies from HTTP/HTTPS responses  into  memory  cookie  cache.   At
              default pavuk will not remember received cookies.

       -cookie_update/-nocookie_update
              Update  cookie  file on disk and synchronize it with changes made by any concurrent
              processes.  At default pavuk will not update cookie file on disk.

       -cookies_max $nr
              Maximum number of cookies in memory cookie cache.  Default value  is  0,  and  that
              means no restrictions for cookies number.

       -disabled_cookie_domains $list
              Comma-separated  list  of cookie domains which are permitted to send cookies stored
              into cookie cache

       -cookie_check/-nocookie_check
              Check when receiving cookie, if cookie domain is equal to domain  of  server  which
              sends  this  cookie.  At  default  pavuk check is server is setting cookies for its
              domain, and if it tries to set cookie for foreign domain pavuk will complain  about
              that and will reject such cookie.

HTML rewriting engine tuning options

       -noRelocate/-Relocate
              This  switch  prevents the program to rewrite relative URLs to absolute, after HTML
              document is transfered. Default pavuk behavior is to maintain link  consistence  of
              HTML  documents.  So always when HTML document is downloaded pavuk will rewrite all
              URLs to point to local document if it is available and if it is  not  available  it
              will  point  to  remote document. After document is properly downloaded, pavuk will
              update links in HTML documents, which point to this one.

       -all_to_local/-noall_to_local
              This option forces pavuk to change all URLs inside  HTML  document  to  local  URLs
              immediately after download of document. Default is this option disabled.

       -sel_to_local/-nosel_to_local
              This  option  forces  pavuk  to  change  all  URLs, which accomplish conditions for
              download, to local inside HTML document immediately after download of document.   I
              recommend  to use this option, when you are sure, that transfer will be without any
              problems. This option can save a lot of processor time.   Default  is  this  option
              disabled.

       -all_to_remote/-noall_to_remote
              This  option  forces  pavuk  to change all URLs inside HTML document to remote URLs
              immediately after download of document.  Default is this option disabled.

       -post_update/-nopost_update
              This option is especially designed to allow in -fnrules option doing rules based on
              MIME  type  of  document.  This  option  forces  pavuk  to generate local names for
              documents just after pavuk knows what is the MIME type of document. This  have  big
              impact  on  the rewriting engine of links inside HTML documents. This option causes
              disfunction of other options for controlling the link rewriting  engine.  Use  this
              option only when you know what you are doing :-)

       -dont_touch_url_pattern $pat
              This  options  serves  to  deny rewriting and processing of particular URLs in HTML
              documents by pavuk HTML rewriting engine. This option accepts wildcard patterns  to
              specify  such  URLs.  Matching  is  done  against  untouched URLs so when he URL is
              relative, you must use pattern which matches the relative URL, when it is absolute,
              you must use absolute URL.

       -dont_touch_url_rpattern $pat
              This  option  is  variation  on previous option. This one uses regular patterns for
              matching of URLs instead  of  wildcard  patterns  used  by  -dont_touch_url_pattern
              option.  This  option  is  available  only  when pavuk is compiled with support for
              regular expression patterns.

       -dont_touch_tag_rpattern $pat
              This option is variation on previous option, just matching is made on full HTML tag
              with  included <>. This option accepts regular expression patterns. It is available
              only when pavuk is compiled with support for regular expression patterns.

Filename/URL Conversion Option

       -tr_del_chr $str
              All characters found in $str will be deleted from local  name  of  document.   $str
              should contain escape sequences similar like in tr command:
              \n - newline
              \r - carriage return
              \t - horizontal tab space
              \0xXX - hexadecimal  ASCII value
              [:upper:] - all uppercase letters
              [:lower:] - all lowercase letters
              [:alpha:] - all letters
              [:alnum:] - all letters and digits
              [:digit:] - all digits
              [:xdigit:] - all hexadecimal digits
              [:space:] - all horizontal and vertical whitespace
              [:blank:] - all horizontal whitespace
              [:cntrl:] - all control characters
              [:print:] - all printable characters including space
              [:nprint:] - all non printable characters
              [:punct:] - all punctation characters
              [:graph:] - all printable characters excluding space

       -tr_str_str $str1 $str2
              String $str1 from local name of document will be replaced with $str2.

       -tr_chr_chr $chrset1 $chrset2
              Characters  from  $chrset1  from  local  name  of  document  will  be replaced with
              corresponding character from $chrset2. $charset1 and  $charset2  should  have  same
              syntax as $str in -tr_del_chr option.

       -store_name $str
              When  you  want  to  change local filename of first file downloaded with singlepage
              mode, you should use this option.

       -index_name $str
              With this option you can change directory index name. As default is used _._.html .

       -store_index/-nostore_index
              With option -nostore_index you should deny storing of directory indexes  into  HTML
              files.

       -fnrules $t $m $r
              This  is  a  very powerful option! This option is used to flexible change layout of
              local document tree. It accepts three parameters. First parameter $t is used to say
              what  type  is  following pattern.  F is used for wildcard pattern (uses fnmatch())
              and  R  is  used  for  regular  expression  pattern   (using   any   supported   RE
              implementation).  Second parameter is matching pattern used to select URLs for this
              rule.  If URL match this  pattern,  then  local  name  for  this  URL  is  computed
              following  rules  of  third  parameter.  And third parameter is local name building
              rule. Pavuk now supports two kinds of local name  building  rules.  One  is  simple
              based  only  on  simple macros and other more complicated extended rule, which also
              enables to perform several functions.  Recognition between those two kinds of rules
              is  done  by  looking  at first character of rule.  In case when first character is
              '(', rule is extended and in all other cases it is the simple kind of rule.

              Simple rule should contain literals or escaped macros.  Macros  are  escaped  by  %
              character or by $ character.

              Here is list of recognized macros:

              $x  -  where  x  is any positive number. This macro is replaced with x-th substring
              matched by RE pattern. (If you use this you need to understand RE !)
              %i - is replaced with protocol id (http, https, ftp, gopher)
              %p - is replaced with password. (use this only when usable)
              %u - is replaced with username.
              %h - is replaced with host name.
              %m - is replaced with domain name.
              %r - is replaced with port number.
              %d - is replaced with path to document.
              %n - is replaced with document name.
              %b - is replaced with basename of document (without extension).
              %e - is replaced with extension.
              %s - is replaced with searchstring.
              %M - is replaced with MIME type of document. When you are  using  this  macro,  you
              *must* use also -post_update option else it won't work.
              %E - is replaced with default extension assigned to MIME type of document. When you
              are using this macro, you *must* use also -post_update option else it won't work.
              %x - where x is positive number. This macro is replaced with  x-th  directory  from
              path to document from beginning.
              %-x  -  where x is positive number. This macro is replaced with x-th directory from
              path to document from end.

              Here is example. If you want place document into single directories  by  extension,
              you should use following fnrules option:
              -fnrules F '*' '/%e/%n'

              Extended  rule  ever  begins  with  character  ยด('.  It uses some kind of LISP like
              syntax.

              Here are base rules for writing extended rules : - the local filename  of  of  this
              kind is return value function
              - each function is enclosed inside round braces ()
              - first token right after opening brace is function name
              - each function have nonzero fixed number of parameters
              - each function returns numeric or string value
              - function parameters are separated by any number of space characters
              - parameter of function should be string, number, macro or other function
              - string is ever quoted with "
              -  each  numeric  parameter  can  be in any encoding supported by strtod() function
              (octal, decimal, hexadecimal, ...)
              - there is no implicit conversion from number to string
              - each macro is prefixed by % character and is one character long
              - each macro is replaced by its string representation from current URL
              - function parameters are typed strictly
              - toplevel function must return string value

              Extended rule supports full set of % escaped macros supported  with  simple  rules,
              plus two following addition macros :
              %U - URL string
              %o - default localname for URL

              Here is description of all supported functions

              sc - concat two string parameters
                 - accepts two string parameters
                 - returns string value
              ss - substring form string
                 - accepts three parameters.
                   - first is string from which we want to cut subpart
                   - second is number which represents starting position in string
                   - third is number which represents ending position in string
                 - returns string value
              hsh - compute modulo hash value from string with specified base
                 - accepts two parameters
                   - first is string for which we are computing the hash value
                   - second is numeric value for base of modulo hash
                 - returns numeric value
              md5 - compute MD5 checksum for string
                 - accepts one string value
                 - returns string which represents MD5 checksum
              lo - convert all characters inside string to lower case
                 - accepts ane string value
                 - returns string value
              up - convert all characters inside string to upper case
                 - accepts one string value
                 - returns string value
              ue  -  encode  unsafe  characters  in  string  with same encoding which is used for
              encoding unsafe characters inside URL (%xx) As default  are  encoded  all  nonascii
              values when this function is used.
                 - accepts two string values
                   - first is string which we want to encode
                   - second is string which contains unsafe characters
                 - return string value
              dc  -  delete  unwanted  characters  from  string  (have  similar  functionality as
              -tr_del_chr option)
                 - accepts two string values
                   - first is string from which we want delete
                   - second is string which contains characters we want to delete.
                 - returns string value
              tc - replace character with other character in string (have  similar  functionality
              as -tr_chr_chr option)
                 - accepts three string values
                   - first is string inside which we want to replace characters
                   - second is set of characters which we want to replace
                   - third is set of characters with which we are replacing
                 - returns string value
              ts  -  replace  some  string  inside  string  with  any  other string (have similar
              functionality as -tr_str_str option)
                 - accepts three string values
                   - first is string inside which we want to replace string
                   - second is the from string
                   - third is to string
                 - returns string value
              spn - calculate initial length of string  which  contains  only  specified  set  of
              characters.  (have same functionality as strspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of acceptable characters
                 - returns numeric value
              cspn  -  calculate  initial length of string which doesn't contain specified set of
              characters.  (have same functionality as strcspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of unacceptable characters
                 - returns numeric value
              sl - calculate length of string
                 - accepts one string value
                 - returns numeric value
              ns - convert number to string by format
                 - accepts two parameters
                   - first parameter is format string same as for printf() function
                   - second is number which we want to convert
                 - returns string value
              lc - return position of last occurrence of specified character inside string
                 - accepts two string parameters
                   - first string which we are searching in
                   - second string contains character for which we are looking for
                 - returns numeric value
              + - add two numeric values
                 - accepts two numeric values
                 - returns numeric value
              - - subtract two numeric values
                 - accepts two numeric values
                 - returns numeric value
              % - modulo addition
                 - accepts two numeric values
                 - returns numeric value
              * - multiple two numeric values
                 - accepts two numeric values
                 - returns numeric value
              / - divide two numeric values
                 - accepts two numeric values
                 - returns numeric value
              rmpar - remove parameter from query string
                - accepts two string
                  - first string is string which we are adjusting
                  - second parameter is name of parameter which should be removed
                - returns adjusted string
              getval - get query string parameter value
                - accepts two string
                  - first string is query string from which to get the parameter
                    value (usually %s)
                  - second string is name of parameter for which we want to get
                    the value
                - returns value of the parameter or empty string when the parameter
                  doesn't exists
              sif - logical decision
                - accepts three parameters
                  - first is numeric and when is zero than result of this decision
                    is result of second parameter, else result is result of third
                    parameter
                  - second parameter is string
                  - third parameter is string
                - returns string result of decision
              ! - logical not
                - accepts one numeric parameter
                - returns negation of parameter
              & - logical and
                - accept two numeric parameters
                - returns logical and of parameters
              | - logical or
                - accept two numeric parameters
                - returns logical or of parameters
              getext - get file extension
                - accept one sting (filename or path)
                - return string containing extension of parameter
              seq - compare two strings
                - accepts two strings for comparison
                - returns numeric value 0 - if different 1 - if equal
              jsf - execute JavaScript function
                - accepts one string parameter which holds name of
                  JavaScript function specified in script loaded with
                  -js_script_file option.
                - returns string value equal to return value of
                  JavaScript function
                - this function is available only when pavuk is compiled
                  with support for JavaScript bindings

              For example, if you are mirroring very huge number  of  internet  sites  into  same
              local  directory,  too  much  entries  in  one  directory, should cause performance
              problems. You may use for example hsh or md5 functions to generate  one  additional
              level of hash directories based on hostname whit one of following options :

              -fnrules F '*' '(sc (nc "%02d/" (hsh %h 100)) %o)'
              -fnrules F '*' '(sc (ss (md5 %h) 0 2) %o)'

       -base_level $nr
              Number of directory levels to omit in local tree.

              For  example  when downloading URL ftp://ftp.idata.sk/pub/unix/www/pavuk-0.7pl1.tgz
              you  enter  at  command  line  -base_level  4  in  local  tree  will   be   created
              www/pavuk-0.7pl1.tgz   not   ftp/ftp.idata.sk_21/pub/unix/www/pavuk-0.7pl1.tgz   as
              normally.

       -default_prefix $str
              Default prefix of mirrored directory. This option is used only when you are  trying
              to  synchronize  content of remote directory which was downloaded using -base_level
              option. Also you must use directory based synchronization  method,  not  URL  based
              synchronization  method.  This  is especially useful, when used in conjunction with
              -remove_old option.

       -remove_adv/-noremove_adv
              This option  is  used  for  turn  on/off  of  removing  HTML  tags  which  contains
              advertisement  banners.   The  banners  are  not  removed  from  HTML file, but are
              commented out.  Such URLs also will not be downloaded.   This  option  have  effect
              only  when  used  with  option  -adv_re.   Default  is  turned off.  This option is
              available only  when  your  system  have  support  for  one  of  supported  regular
              expressions implementation.

       -adv_re $RE
              This   option  is  used  to  specify  regular  expressions  for  matching  URLs  of
              advertisement banners.  For example : -adv_re http://ad.doubleclick.net/.*  is used
              to  match  all files from server ad.doubleclick.net.  This option is available only
              when your system have any supported regular expressions implementation.

       -unique_name/-nounique_name
              Pavuk as default always attempts to assign to unique URL unique local filename.  If
              this behavior is not wanted, you can use option -nounique_name to disable this.

Other Options

       -sleep $nr
              This option allows you to specify number of seconds during that the program will be
              suspended between two transfers. Useful to deny server overload.  Default value for
              this option is 0.

       -rsleep/-norsleep
              When  this  option is active, pavuk randomizes the the sleep time between transfers
              in interval between zero and value specified with -sleep option.  Default  is  this
              option inactive.

       -ddays $nr
              If  document  has  modification  time  later  as  $nr days, then in sync mode pavuk
              attempts to retrieve newer copy of document from remote server. Default value is 0.

       -remove_old/-noremove_old
              Remove improper documents (that, which doesn't exist on remote site).  This  option
              have  effect only when used in directory based sync mode.  When used with URL based
              sync mode, pavuk will not remove any old files which were  excluded  from  document
              tree  and  are  not  referenced  in  any  HTML  document.  You must also use option
              -subdir, to let pavuk find files which belongs to current mirror.  As default pavuk
              won't remove any old files.

       -browser $str
              is  used to set your browser command (in URL tree dialog you can use right click to
              raise menu, from which you can start  browser  on  actually  selected  URL).   This
              option  is  available only when compiled with GTK GUI and with support for URL tree
              preview.

       -debug/-nodebug
              turns on displaying of debug messages. This option is available only when  compiled
              with -DDEBUG.  If -debug option is used pavuk will output verbose information about
              documents, whole protocol level information, locking informations and more (depends
              on  -debug_level setup). This options is used just like trigger to enable output of
              debug messages selected by -debug_level option.  Default is debug mode turned off.

       -debug_level $level
              Set level of required  debug  informations.  $level  can  be  numeric  value  which
              represent  binary  mask  for  requested  debug  levels,  or comma separated list of
              supported debug levels.  Currently pavuk supports following debug levels :
              html - for HTML parser debugging
              protos - to see server side protocol messages
              protoc - to see client side protocol messages
              procs - to see some special procedure calls
              locks - for debugging of documents locking
              net - for debugging some low level network stuff
              misc - for miscellaneous unsorted debug messages
              user - for verbose user level messages
              all - request all currently supported debug levels
              mtlock - locking of resources in multithreading environment
              mtthr - launching/weaking/sleeping/stoping of threads in multithreaded environment
              protod - for DEBUGGING of POST requests
              limits - for debugging limiting options, you will see  the  reason  why  particular
              URLs are rejected by pavuk and which option caused this.
              ssl - to enable verbose reporting about SSL related things.

       -remind_cmd $str
              This  option  have  effect  only  when  running  pavuk in reminder mode. To command
              specified with this option pavuk sends result of running reminder mode.  There  are
              listed  URLs  which  are  changed  and  URLs which have any errors.  Default remind
              command is "mailx user@server -s \"pavuk reminder result\"" .

       -nscache_dir $dir
              Path to Netscape browser cache directory. If you specify this path, pavuk  attempts
              to find out if you have URL in this cache.  If URL is there it will be fetched else
              pavuk will download it from net. The cache  directory  index  file  must  be  named
              index.db  and  must  be  located  in the cache directory.  To support this feature,
              pavuk have to be linked with BerkeleyDB 1.8x .

       -mozcache_dir $dir
              Path to Mozilla browser  cache  directory.  Same  functionality  as  with  previous
              option,  just  for  different browser with different cache formats.  Pavuk supports
              both formats of Mozilla browser disk cache (old for versions <0.9 and new  used  in
              0.9=<).  The old format cache directory must contain cache directory index database
              with name  cache.db.  Then  new  format  cache  directory  must  contain  map  file
              _CACHE_MAP_,  and  three  block  files  _CACHE_001_,  _CACHE_002_, _CACHE_003_.  To
              support old Mozilla cache format, pavuk have to be linked with BerkeleyDB 1.8x. New
              Mozilla cache format doesn't require any external library.

       -post_cmd $str
              Post-processing  command,  which  will  be  executed  after  successful download of
              document.  This command may somehow handle with document. During  time  of  running
              this  command, pavuk leaves actual document locked, so there isn't chance that some
              other pavuk process will modify document.  This  postprocessing  command  will  get
              three additional parameters from pavuk.
                 - local name of document
                 - 1/0 1 if document is HTML document, 0 if not
                 - original URL of this document

       -hack_add_index/-nohack_add_index
              This  is  bit  hacky  option.  It  forces  pavuk to add to URL queue also directory
              indexes of all queued documents. This allow pavuk to download more  documents  from
              site,  than  it  is able achieve in normal traversing of HTML documents.  Bit dirty
              but useful in some cases.

       -js_script_file $file
              Pavuk  have  optionally  builtin  JavaScript  interpreter  to  allow   high   level
              customization  of  some internal procedures. Currently you are allowed to customize
              with your own JavaScript functions two things.  You  can  use  it  to  set  precise
              limiting  options, or you can write own functions which can be used inside rules of
              -fnrules option.  With this option you can load JavaScript  script  with  functions
              into  pavuks internal JavaScript interpreter. To learn more about this capabilities
              read separate document jsbind.txt  which  comes  with  pavuk  sources  in  toplevel
              directory.  This option is available only when you have compiled pavuk with support
              for JavaScript bindings.

EXIT STATUS

       As of version 0.9pl29 pavuk have changed indication of status by exit codes.   In  earlier
       versions  exit  status 0 was for no error and nonzero exit status was something like count
       of failed documents.  In all version after 0.0pl29 there are defined following exit codes:

           0 - no error, everything is OK
           1 - error in configuration of pavuk options or
               error in config files
           2 - some error occurred while downloading documents

ENVIRONMENTAL VARIABLES

       USER   variable is used to construct email address from user and hostname

       LC_* or LANG
              used to set internationalized environment

       PAVUKRC_FILE
              with  this  variable  you  can  specify  alternative  location  for  your   pavukrc
              configuration file.

REQUIRED EXTERNAL PROGRAMS

       at     is used for scheduling.

       gunzip is used to decode gzip or compress encoded documents.

Bugs

       If you find any, please let me know.

FILES

       @SYSCONFDIR@/pavukrc

       ~/.pavukrc

       ~/.pavuk_prefs

              These  files  are  used as default configuration files.  You may specify there some
              constant values like your proxy server or your preferred WWW browser. Configuration
              options  reflect  command line options.  Not all parameters are suitable for use in
              default configuration file.  You should select only some of them, which you  really
              need.

              File   ~/.pavuk_prefs   is   special   file  which  contains  automatically  stored
              configuration.  This file is used only when running  GUI  interface  of  pavuk  and
              option -prefs is active.

              First  (if  present)  parsed  file  is  @SYSCONFDIR@/pavukrc  then  ~/.pavukrc  (if
              present), then ~/.pavuk_prefs (if present).  Last the command line is  parsed.  The
              precedence is as follows :

              - highest -
              Entered in user interface
              Entered in command line
              ~/.pavuk_prefs
              ~/.pavukrc
              @SYSCONFDIR@/pavukrc
              - lowest -

              Here is table of config file - command line options pairs.

              MaxLevel:                  --->  -lmax
              MaxDocs:                   --->  -dmax
              MaxSize:                   --->  -maxsize
              MinSize:                   --->  -minsize
              SleepBetween:              --->  -sleep
              MaxRetry:                  --->  -retry
              MaxRegets:                 --->  -nregets
              MaxRedirections:           --->  -nredirs
              CommTimeout:               --->  -timeout
              RegetRollbackAmount:       --->  -rollback
              DocExpiration:             --->  -ddays
              UseCache:                  --->  -nocache
              UseRobots:                 --->  -noRobots
              AllowFTP:                  --->  -noFTP
              AllowHTTP:                 --->  -noHTTP
              AllowSSL:                  --->  -noSSL
              AllowGopher:               --->  -noGopher
              AllowCGI:                  --->  -noCGI
              AllowGZEncoding:           --->  -noEnc
              AllowFTPRecursion:         --->  -FTPdir
              ForceReget:                --->  -force_reget
              Debug:                     --->  -debug
              AllowedSites:              --->  -asite
              DisallowedSites:           --->  -dsite
              AllowedDomains:            --->  -adomain
              DisallowedDomains:         --->  -ddomain
              AllowedPrefixes:           --->  -aprefix
              DisallowedPrefixes:        --->  -dprefix
              AllowedSuffixes:           --->  -asfx
              DisallowedSuffixes:        --->  -dsfx
              AllowedMIMETypes:          --->  -amimet
              DisallowedMIMETypes:       --->  -dmimet
              PreferredLanguages:        --->  -alang
              PreferredCharset:          --->  -acharset
              WorkingDir:                --->  -cdir
              WorkingSubDir:             --->  -subdir
              HTTPAuthorizationScheme:   --->  -auth_scheme
              HTTPAuthorizationName:     --->  -auth_name
              HTTPAuthorizationPassword: --->  -auth_passwd
              AuthReuseDigestNonce:      --->  -auth_reuse_nonce
              SSLCertPassword:           --->  -ssl_cert_passwd
              SSLCertFile:               --->  -ssl_cert_file
              SSLKeyFile:                --->  -ssl_key_file
              EmailAddress:              --->  -from
              MatchPattern:              --->  -pattern
              REMatchPattern:            --->  -rpattern
              SkipMatchPattern:          --->  -skip_pattern
              SkipREMatchPattern:        --->  -skip_rpattern
              URLMatchPattern:           --->  -url_pattern
              URLREMatchPattern:         --->  -url_rpattern
              SkipURLMatchPattern:       --->  -skip_url_pattern
              SkipURLREMatchPattern:     --->  -skip_url_rpattern
              DefaultMode:               --->  -mode
              FTPProxy:                  --->  -ftp_proxy
              HTTPProxy:                 --->  -http_proxy
              SSLProxy:                  --->  -ssl_proxy
              GopherProxy:               --->  -gopher_proxy
              FTPViaHTTPProxy:           --->  -ftp_httpgw
              GopherViaHTTPProxy:        --->  -gopher_httpgw
              HTTPProxyUser:             --->  -http_proxy_user
              HTTPProxyPass:             --->  -http_proxy_pass
              HTTPProxyAuth:             --->  -http_proxy_auth
              AuthReuseProxyDigestNonce: --->  -auth_reuse_proxy_nonce
              Browser:                   --->  -browser
              ScenarioDir:               --->  -scndir
              ShowProgress:              --->  -progress
              XMaxLogSize:               --->  -xmaxlog
              LogFile:                   --->  -logfile
              RemoveOldDocuments:        --->  -remove_old
              AuthFile:                  --->  -auth_file
              BaseLevel:                 --->  -base_level
              FTPDirtyProxy:             --->  -ftp_dirtyproxy
              ActiveFTPData:             --->  -ftp_active/-ftp_passive
              ActiveFTPPortRange:        --->  -active_ftp_port_range
              AlwaysMDTM:                --->  -always_mdtm/-noalways_mdtm
              RemoveBeforeStore:         --->  -(no)remove_before_store
              ShowDownloadTime:          --->  -stime
              NLSMessageCatalogDir:      --->  -msgcat
              Quiet:                     --->  -quiet/-verbose
              NewerThan:                 --->  -newer_than
              OlderThan:                 --->  -older_than
              Reschedule:                --->  -reschedule
              DontLeaveSite:             --->  -dont_leave_site/-leave_site
              DontLeaveDir:              --->  -dont_leave_dir/-leave_dir
              PreserveTime:              --->  -preserve_time/-nopreserve_time
              LeaveLevel:                --->  -leave_level
              GUIFont:                   --->  -gui_font
              UserCondition:             --->  -user_condition
              CookieFile:                --->  -cookie_file
              CookieSend:                --->  -cookie_send/-nocookie_send
              CookieRecv:                --->  -cookie_recv/-nocookie_recv
              CookieUpdate:              --->  -cookie_update/-nocookie_update
              CookiesMax:                --->  -cookies_max
              CookieCheckDomain:         --->  -cookie_check/-nocookie_check
              DisabledCookieDomains:     --->  -disabled_cookie_domains
              DisableHTMLTag:            --->  -disable_html_tag
              EnableHTMLTag:             --->  -enable_html_tag
              TrDeleteChar:              --->  -tr_del_chr
              TrStrToStr:                --->  -tr_str_str
              TrChrToChr:                --->  -tr_chr_chr
              IndexName:                 --->  -index_name
              StoreName:                 --->  -store_name
              PreservePermisions:        --->  -preserve_perm/-nopreserve_perm
              PreserveAbsoluteSymlinks:  --->  -preserve_slinks/-nopreserve_slinks
              FTPListCMD:                --->  -FTPlist/-noFTPlist
              MaxRate:                   --->  -maxrate
              MinRate:                   --->  -minrate
              ReadBufferSize:            --->  -bufsize
              BgMode:                    --->  -bg/-nobg
              CheckSize:                 --->  -check_size/-nocheck_size
              SLogFile:                  --->  -slogfile
              Identity:                  --->  -identity
              SendFromHeader:            --->  -send_from/-nosend_from
              RunX:                      --->  -runX
              FnameRules:                --->  -fnrules
              StoreDocInfoFiles:         --->  -store_info/-nostore_info
              AllLinksToLocal:           --->  -all_to_local/-noall_to_local
              AllLinksToRemote:          --->  -all_to_remote/-noall_to_remote
              SelectedLinksToLocal:      --->  -sel_to_local/-nosel_to_local
              ReminderCMD:               --->  -remind_cmd
              AutoReferer:               --->  -auto_referer/-noauto_referer
              URLsFile:                  --->  -urls_file
              UsePreferences:            --->  -prefs/-noprefs
              FTPhtml:                   --->  -FTPhtml/-noFTPhtml
              StoreDirIndexFile:         --->  -store_index/-nostore_index
              Language:                  --->  -language
              FileSizeQuota:             --->  -file_quota
              TransferQuota:             --->  -trans_quota
              FSQuota:                   --->  -fs_quota
              EnableJS:                  --->  -enable_js/-disable_js
              UrlSchedulingStrategy:     --->  -url_strategy
              NetscapeCacheDir:          --->  -nscache_dir
              RemoveAdvertisement:       --->  -remove_adv/-noremove_adv
              AdvBannerRE:               --->  -adv_re
              CheckIfRunnigAtBackground: --->  -check_bg/-nocheck_bg
              SendIfRange:               --->  -send_if_range/-nosend_if_range
              SchedulingCommand:         --->  -sched_cmd
              UniqueLogName:             --->  -unique_log/-nounique_log
              PostCommand:               --->  -post_cmd
              SSLVersion:                --->  -ssl_version
              UniqueSSLID:               --->  -unique_sslid/-nounique_sslid
              AddHTTPHeader:             --->  -httpad
              StatisticsFile:            --->  -statfile
              WaitOnExit:                --->  -ewait
              AllowedIPAdrressPattern:   --->  -aip_pattern
              DisallowedIPAdrressPattern:--->  -dip_pattern
              SiteLevel:                 --->  -site_level
              UseHTTP11:                 --->  -use_http11
              MaxRunTime:                --->  -max_time
              LocalIP:                   --->  -local_ip
              RequestInfo:               --->  -request
              HashSize:                  --->  -hash_size
              NumberOfThreads:           --->  -nthreads
              ImmediateMessages:         --->  -immesg/-noimmsg
              HTMLFormData:              --->  -formdata
              DumpFD:                    --->  -dumpfd
              DumpUrlFD:                 --->  -dump_urlfd
              DeleteAfterTransfer:       --->  -del_after/-nodel_after
              UniqueDocName:             --->  -unique_name/-nounique_name
              LeaveSiteEnterDirectory:   --->  -leave_site_enter_dir/-dont_leave_site_enter_dir
              SinglePage:                --->  -singlepage/-nosinglepage
              NTLMAuthorizationDomain:   --->  -auth_ntlm_domain
              NTLMProxyAuthorizationDomain:
                                         --->  -auth_proxy_ntlm_domain
              JavascriptPattern:         --->  -js_pattern
              FollowCommand:             --->  -follow_cmd
              RetrieveSymlinks:          --->  -retrieve_symlink/-noretrieve_symlink
              JSTransform:               --->  -js_transform
              JSTransform2:              --->  -js_transform2
              FTPProxyUser:              --->  -ftp_proxy_user
              FTPProxyPassword:          --->  -ftp_proxy_pass
              LimitInlineObjects:        --->  -limit_inlines/-dont_limit_inlines
              FTPListOptions:            --->  -ftp_list_options
              FixWuFTPDBrokenLISTcmd:    --->  -fix_wuftpd_list/-nofix_wuftpd_list
              PostUpdate:                --->  -post_update/-nopost_update
              SeparateInfoDir:           --->  -info_dir
              MozillaCacheDir:           --->  -mozcache_dir
              AllowedPorts:              --->  -aport
              DisallowedPorts:           --->  -dport
              HackAddIndex:              --->  -hack_add_index/-nohack_add_index
              JavaScriptFile:            --->  -js_script_file
              FtpLoginHandshake:         --->  -ftp_login_handshake
              NSSCertDir:                --->  -nss_cert_dir
              NSSAcceptUnknownCert:      --->  -nss_accept_unknown_cert/-nonss_accept_unknown_cert
              NSSDomesticPolicy:         --->  -nss_domestic_policy/-nss_export_policy
              DontTouchUrlREPattern:     --->  -dont_touch_url_rpattern
              DontTouchUrlPattern:       --->  -dont_touch_url_pattern
              DontTouchTagREPattern:     --->  -dont_touch_tag_rpattern
              HTMLTagPattern:            --->  -tag_pattern
              HTMLTagREPattern:          --->  -tag_rpattern
              URL:                       --->  one URL (more lines with URL:
                                               ... means more URL's)

       line which begins with '#' means comment.
       TrStrToStr: and TrChrToChr: must contain two quoted strings.  All parameter names are case
       insensitive. If here is missing any option, try to look inside config.c source file.

       See pavukrc.sample file for example

       .pavuk_authinfo

              File should contain as many authentification records  as  you  need.   Records  are
              separated by any number of empty lines.  Parameter name is case insensitive.

              Structure of record:

              Proto: <proto ID>    ---> identification of protocol
                                        (ftp/http/https/..)
                                   - required field
              Host: <host:[port]>  ---> host name
                                   - required field
              User: <user>         ---> name of user
                                   - optional
              Pass: <password>     ---> password for user
                                   - optional
              Base: <path>         ---> base prefix of document path
                                   - optional
              Realm: <name>        ---> realm for HTTP authorization
                                   - optional
              NTLMDomain: <domain> ---> NT/LM domain for NTLM authorization
                                   - optional
              Type: <type>         ---> HTTP authentification scheme
                                             - 1/user   - user auth scheme
                                             - 2/Basic  - Basic auth scheme (default)
                                             - 3/Digest - Digest auth scheme
                                             - 4/NTLM   - NTLM auth scheme
                                   - optional

       see pavuk_authinfo.sample file for example

       ~/.pavuk_keys
              this is file where are stored information about configurable menu option shortcuts.
              This is available only when compiled with Gtk+1.2 and higher.

       ~/.pavuk_remind_db
              this file contains informations about URLs for running in reminder mode.  Structure
              of  this  file  is  very  easy. Each line contains information abou one URL.  first
              entry in line is last known modification time of URL (stored  in  time_t  format  -
              number of secons from 1.1.1970 GMT).  And second entry is URL.

EXAMPLE COMMAND LINE

       pavuk -mode mirror -nobg -store_info -info_dir
       /mirror/info -nthreads 1 -cdir /mirror/incoming -subdir
       /mirror/incoming -preserve_time -nopreserve_perm
       -nopreserve_slinks -noretrieve_symlink -force_reget
       -noRobots -trans_quota 16384 -maxsize 16777216
       -max_time 28 -nodel_after -remove_before_store -ftpdir
       -ftplist -ftp_list_options -a -dont_leave_site
       -dont_leave_dir -all_to_local -remove_old -nostore_index
       -active_ftp_port_range 57344:65535 -always_mdtm
       -ftp_passive -base_level 2 http://<my_host>/doc/

SEE ALSO

       Look  into  ChangeLog file for more informations about new features in particular versions
       of pavuk.

AUTHOR

       Main development Ondrejicka Stefan
       Look into CREDITS file of sources for additional information.

AVAILABILITY

       pavuk is available from http://pavuk.sourceforge.net/