Provided by: pavuk_0.9.35-4_amd64 bug

NAME

       pavuk - HTTP, HTTP over SSL, FTP, FTP over SSL and Gopher recursive document retrieval program

SYNOPSIS

       pavuk  [-mode  {normal  |  resumeregets | singlepage | singlereget | sync | dontstore | ftpdir | mirror}]
       [-X] [-runX] [-bg/-nobg] [prefs/-noprefs] [-h] [-v] [-progress/-noprogress]  [-stime/-nostime]  [-xmaxlog
       $nr]  [-logfile  $file]  [-slogfile  $file] [-auth_file $file] [-msgcat $dir] [-language $str] [-gui_font
       $font] [-quiet/-verbose [-read_css/-noread_css] [-cdir $dir] [-scndir $dir]  [-scenario  $str]  [-dumpscn
       $filename]  [-lmax  $nr]  [-dmax  $nr]  [-leave_level  $nr]  [-maxsize $nr] [-minsize $nr] [-asite $list]
       [-dsite $list] [-adomain $list] [-ddomain $list] [-asfx $list] [-dsfx $list] [-aprefix  $list]  [-dprefix
       $list]  [-amimt  $list]  [-dmimet  $list] [-pattern $pattern] [-url_pattern $pattern] [-rpattern $regexp]
       [-url_rpattern $regexp] [-skip_pattern $pattern] [-skip_url_pattern  $pattern]  [-skip_rpattern  $regexp]
       [-skip_url_rpattern  $regexp] [-newer_than $time] [-older_than $time] [-schedule $time] [-reschedule $nr]
       [-dont_leave_site/-leave_site]  [-dont_leave_dir/-leave_dir]  [-http_proxy   $site[:$port]]   [-ftp_proxy
       $site[:$port]]   [-ssl_proxy  $site[:$port]]  [-gopher_proxy  $site[:$port]]  [-ftp_httpgw/-noftp_httpgw]
       [-ftp_dirtyproxy/-noftp_dirtyproxy]   [-gopher_httpgw/-nogopher_httpgw]   [-noFTP/-FTP]   [-noHTTP/-HTTP]
       [-noSSL/-SSL]     [-noGopher/-Gopher]     [-FTPdir/-noFTPdir]     [-noCGI/-CGI]     [-FTPlist/-noFTPlist]
       [-FTPhtml/-noFTPhtml]     [-noRelocate/-Relocate]     [-force_reget/-noforce_reget]     [-nocache/-cache]
       [-check_size/-nocheck_size]  [-noRobots/-Robots]  [-noEnc/-Enc]  [-auth_name  $user] [-auth_passwd $pass]
       [-auth_scheme 1/2/3/4/user/Basic/Digest/NTLM] [-auth_reuse_nonce/-no_auth_reuse_nonce]  [-http_proxy_user
       $user]       [-http_proxy_pass       $pass]       [-http_proxy_auth       1/2/3/4/user/Basic/Digest/NTLM]
       [-auth_reuse_proxy_nonce/-no_auth_reuse_proxy_nonce]   [-ssl_key_file   $file]   [-ssl_cert_file   $file]
       [-ssl_cert_passwd     $pass]     [-from     $email]     [-send_from/-nosend_from]     [-identity    $str]
       [-auto_referer/-noauto_referer] [-referer/-noreferer]  [-alang  $list]  [-acharset  $list]  [-retry  $nr]
       [-nregets     $nr]     [-nredirs     $nr]     [-rollback     $nr]    [-sleep    $nr]    [-timeout    $nr]
       [-preserve_time/-nopreserve_time] [-preserve_perm/-nopreserve_perm] [-preserve_slinks/-nopreserve_slinks]
       [-bufsize  $nr]   [-maxrate   $nr]   [-minrate   $nr]   [-user_condition   $str]   [-cookie_file   $file]
       [-cookie_send/-nocookie_send]       [-cookie_recv/-nocookie_recv]       [-cookie_update/-nocookie_update]
       [-cookies_max   $nr]   [-disabled_cookie_domains    $list]    [-disable_html_tag    $TAG,[$ATTRIB][;...]]
       [-enable_html_tag  $TAG,[$ATTRIB][;...]]   [-tr_del_chr  $str]  [-tr_str_str  $str1  $str2]  [-tr_chr_chr
       $chrset1 $chrset2] [-index_name $str] [-store_index/-nostore_index] [-store_name $str]  [-debug/-nodebug]
       [-debug_level  $level] [-browser $str] [-urls_file $file] [-file_quota $nr] [-trans_quota $nr] [-fs_quota
       $nr]     [-enable_js/-disable_js]     [-fnrules      $t      $m      $r]      [-store_info/-nostore_info]
       [-all_to_local/-noall_to_local]     [-sel_to_local/-nosel_to_local]     [-all_to_remote/-noall_to_remote]
       [-url_strategie   $strategie]   [-remove_adv/-noremove_adv]   [-adv_re    $RE]    [-check_bg/-nocheck_bg]
       [-send_if_range/-nosend_if_range]   [-sched_cmd   $str]   [-unique_log/-nounique_log]   [-post_cmd  $str]
       [-ssl_version    $v]    [-unique_sslid/-nounique_sslid]    [-aip_pattern    $re]    [-dip_pattern    $re]
       [-use_http11/-nouse_http11]  [-local_ip $addr] [-request $req] [-formdata $req] [-httpad $str] [-nthreads
       $nr]    [-immesg/-noimmesg]    [-dumpfd    $nr]    [-dump_urlfd    $nr]     [-unique_name/-nounique_name]
       [-leave_site_enter_dir/-dont_leave_site_enter_dir]      [-max_time     $nr]     [-del_after/-nodel_after]
       [-singlepage/-nosinglepage]         [-dump_after/-nodump_after]         [-dump_response/-nodump_response]
       [-auth_ntlm_domain   $str]   [-auth_proxy_ntlm_domain   $str]   [-js_pattern   $re]   [-follow_cmd  $str]
       [-retrieve_symlink/-noretrieve_symlink] [-js_transform  $p  $t  $h  $a]  [-js_transform2  $p  $t  $h  $a]
       [-ftp_proxy_user  $str]  [-ftp_proxy_pass  $str]  [-limit_inlines/-dont_limit_inlines] [-ftp_list_options
       $str] [-fix_wuftpd_list/-nofix_wuftpd_list] [-post_update/-nopost_update] [-info_dir $dir] [-mozcache_dir
       $dir]  [-aport  $list]  [-dport   $list]   [-hack_add_index/-nohack_add_index]   [-default_prefix   $str]
       [-rsleep/-norsleep]      [-ftp_login_handshake      $host     $handshake]     [-js_script_file     $file]
       [-dont_touch_url_pattern   $pat]   [-dont_touch_url_rpattern   $pat]   [-dont_touch_tag_rpattern    $pat]
       [-tag_pattern   $tag   $attrib   $url]   [-tag_rpattern   $tag   $attrib   $url]   [-nss_cert_dir   $dir]
       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]           [-nss_domestic_policy/-nss_export_policy]
       [-[no]verify]   [-tlogfile  $file]  [-trelative  {object  |  program}]  [-transparent_proxy  FQDN[:port]]
       [-transparent_ssl_proxy FQDN[:port]] [-sdemo] [-noencode] [URLs]

       pavuk -mode {normal | singlepage | singlereget} [-base_level $nr]

       pavuk -mode sync [-ddays $nr] [-subdir $dir] [-remove_old/-noremove_old]

       pavuk -mode resumeregets [-subdir $dir]

       pavuk -mode linkupdate [-X] [-h] [-v] [-cdir $dir] [-subdir $dir] [-scndir $dir] [-scenario $str]

       pavuk -mode reminder [-remind_cmd $str]

       pavuk         -mode          mirror          [-subdir          $dir]          [-remove_old/-noremove_old]
       [-remove_before_store/-noremove_before_store] [-always_mdtm/-noalways_mdtm]

DESCRIPTION

       This  manual  page  describes how to use pavuk. Pavuk can be used to mirror contents of internet/intranet
       servers and to maintain copies in a local tree of documents.  Pavuk stores retrieved documents in locally
       mapped disk space. The structure of the local tree is the same as the one  on  the  remote  server.  Each
       supported  service (protocol) has its own subdirectory in the local tree.  Each referenced server has its
       own subdirectory in these protocols subdirectories; followed by the port  number  on  which  the  service
       resides,  delimited  by  character can be be changed. With the option -fnrules you can change the default
       layout of the local document tree, without losing link consistency.
       With pavuk it is possible to have up-to-date copies of remote documents in the local disk space.
       As of version 0.3pl2, pavuk can automatically restart broken connections, and reget partial content  from
       an  FTP server (which must support the REST command), from a properly configured HTTP/1.1 server, or from
       a HTTP/1.0 server which supports Ranges.
       As of version 0.6 it is possible to handle configurations via so  called  scenarios.   The  best  way  to
       create  such  a  configuration  file  is  to  use  the  X  Window  interface  and simply save the created
       configuration. The other way is to use the -dumpscn switch.
       As of version 0.7pl1 it is possible to store authentification information into an  authinfo  file,  which
       pavuk can then parse and use.
       As of version 0.8pl4 pavuk can fetch documents for use in a local proxy/cache server without storing them
       to local documents tree.
       As of version 0.9pl4 pavuk supports SOCKS (4/5) proxies if you have the required libraries.
       As  of  version  0.9pl12  pavuk can preserve permissions of remote files and symbolic links, so it can be
       used for powerful FTP mirroring.
       Pavuk supports SSL connections to FTP servers, if you specify ftps:// URL instead of ftp://.
       Pavuk can automatically handle file names with unsafe characters for filesystem.  This is yet implemented
       only for Win32 platform and it is hard coded.
       Pavuk can now use HTTP/1.1  protocol  for  communication  with  HTTP  servers.   It  can  use  persistent
       connections,  so one TCP connection should be used to transfer several documents without closing it. This
       feature saves network bandwidth and also speedup network communication.
       Pavuk can do configurable POST requests to HTTP servers and support also file  uploading  via  HTTP  POST
       request.
       Pavuk can automatically fill found HTML forms, if user will supply data for its fields before with option
       -formdata.
       Pavuk  can  run  configurable  number  of  concurrently  running  downloading  threads when compiled with
       multithreading support.

Format of supported URLs

       HTTP
       http://[[user][:password]@]host[:port][/document]
       [[user][:password]@]host[:port][/document]

       HTTPS
       https://[[user][:password]@]host[:port][/document]
       ssl[.domain][:port][/document]

       FTP
       ftp://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftp://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftp[.domain][:port][/document][;type=x]

       FTPS
       ftps://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftps://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftps[.domain][:port][/document][;type=x]

       Gopher
       gopher://host[:port][/type[document]]
       gopher[.domain][:port][/type[document]]

Default mapping of URLs to local filenames

       HTTP
       http://[[user][:password]@]host[:port][/document][?query]
       to
       http/host_port/[document][?query]

       HTTPS
       https://[[user][:password]@]host[:port][/document][?query]
       to
       https/host_port/[document][?query]

       FTP
       ftp://[[user][:password]@]host[:port][/path]
       to
       ftp/host_port/[path]

       FTPS
       ftps://[[user][:password]@]host[:port][/path]
       to
       ftps/host_port/[path]

       Gopher
       gopher://host[:port][/type[document]]
       to
       gopher/host_port/[type[document]]

       NOTE: Pavuk will use the string with which it queries the target server as the name of the results  file.
       This  file  name may, in some cases, contain punctuations such as $,?,=,& etc. Such punctuation can cause
       problems when you are trying to browse downloaded files with your browser or you are  trying  to  process
       downloaded files with shell scripts or view files with file management utilities which reference the name
       of  the  results  file.  If you believe that this maybe causing problems for you, then you can remove all
       punctuation from the result file name with the option: -tr_del_chr [:punct:] or with  other  options  for
       adjusting filenames.

OPTIONS

        All options are case insensitive.

List of options chapters

       Mode
       Help
       Indicate/Logging/Interface options
       Netli options
       Special start
       Scenario/Task options
       Directory options
       Preserve options
       Proxy options
       Proxy Authentification
       Protocol/Download Option
       Authentification
       Site/Domain/Port Limitation Options
       Limitation Document properties
       Limitation Document name
       Limitation Protocol Option
       Other Limitation Options
       Javascript support
       Cookie
       HTML rewriting engine tuning options
       Filename/URL Conversion Option
       Other Options

Mode

       -mode {normal, linkupdate, sync, singlepage, singlereget, resumeregets}
              Set operation mode.
              normal - retrieves recursive documents
              linkupdate  -  update remote URLs in local HTML documents to local URLs if these URLs exist in the
              local tree
              sync - synchronize remote documents with local tree (if a local copy of a document is  older  than
              remote, the document is retrieved again, otherwise nothing happens)
              singlepage  - URL is retrieved as one page with all inline objects (picture, sound ...)  this mode
              is now obsoleted by -singlepage option.
              resumeregets - pavuk scans the local tree for files that were not retrieved  fully  and  retrieves
              them again (uses partial get if possible)
              singlereget - get URL until it is retrieved in full
              dontstore  -  transfer  page  from  server,  but  don't  store it to the local tree.  This mode is
              suitable for fetching pages that are held in a local proxy/cache server.
              reminder - used to inform the user about changed documents
              ftpdir - used to list of contents of FTP directories

              default operation mode is normal mode.

Help

       -h     Print long verbose help message

       -v     Show version informations and configuration at compilation time.

Indicate/Logging/Interface options

       -quiet Don't show any messages on the screen.

       -verbose
              Force to show output messages on the screen (default)

       -progress/-noprogress
              Show retrieving progress while running in the terminal (default is progress off)

       -stime/-nostime
              Show start and end time of transfer. (default isn't this information shown)

       -xmaxlog $nr
              Maximum number of log lines in the Log widget. 0 means unlimited.  This option is  available  only
              when compiled with the GTK+ GUI. (default value is 0)

       -logfile $file
              File where all produced messages are stored.

       -unique_log/-nounique_log
              When  logfile  as  specified  with  the option -logfile is already used by another process, try to
              generate new unique name for the log file. (default is this option turned off)

       -slogfile $file
              File to store short logs in. This file contains one line of informations per  processed  document.
              This  is  meant  to  be used in connection with any sort of script to produce some statistics, for
              validating links on your website, or for generating simple sitemaps.  Multiple pavuk processes can
              use this file concurrently, without overwriting each others entries.  Record structure:

              - PID of pavuk process
              - TIME current time
              - COUNTER in the format current/total number of URLs
              - STATUS contains the type of the error: FATAL, ERR,
                WARN or OK
              - ERRCODE is the number code of the error
                (see errcode.h in pavuk sources)
              - URL of the document
              - PARENTURL first parent document of this URL
                (when it doesn't have parent - [none])
              - FILENAME is the name of the local file the
                document is saved under
              - SIZE size of requested document if known
              - DOWNLOAD_TIME time which takes downloading of this
                document in format seconds.mili_seconds
              - HTTPRESP contains the first line of the HTTP server
                response

       -language $str
              Native language that pavuk should use for communication with its user (works only when there is  a
              message  catalog  for  that  language) GNU gettext support (for message internationalization) must
              also be compiled in. Default language is taken from your NLS environment variables.

       -gui_font $font
              Font used in the GUI interface. To list available X fonts use the xlsfonts command.   This  option
              is available only when compiled with GTK+ GUI support.

Netli options

       -[no]read_css
              Enable or disable fetching objects mentioned in style sheets.

       -[no]verify
              Enable or disable verifying server CERTS in SSL mode.

       -tlogfile $file
              Turn on Netli logging with output to specified file.

       -trelative {object | program}
              Make Netli timings relative to the start of the first object or the program.

       -transparent_proxy FQDN[:port]
              When processing URL, send the original, but send it to the IP address at FQDN

       -transparent_ssl_proxy FQDN[:port]
              When processing HTTPS URL, send the original, but send it to the IP address at FQDN

       -sdemo Output  in  sdemo  compatible  format. This is only used by sdemo. (For now it simply means output
              '-1' rather than '*'  when measurements are invalid.)

       -noencode
              Do not escape characters that are "unsafe" in URLS.

Special start

       -X     Start program with X Window interface (if compiled with  support  for  GTK+).   Pavuk  as  default
              starts without GUI, and behaves as regular commandline tool.

       -runX  When  used  together with the -X option, pavuk starts processing of URLs immediately after the GUI
              window is launched. Without the -X given, this option doesn't have  any  effect.   Only  available
              when compiled with GTK+ support .

       -bg/-nobg
              This  option  allows pavuk to detach from its terminal and run in background mode.  Pavuk will not
              output any messages to the terminal than. If you want  to  see  messages,  you  have  to  use  the
              -log_file  option  to  specify  a  file where messages will be written.  Default pavuk executes at
              foreground.

       -check_bg/-nocheck_bg
              Normally, programs sent into the background after being  run  in  foreground  continue  to  output
              messages  to  the  terminal.   If  this  option  is  activated,  pavuk  checks if it is running as
              background job and will not write any messages to the terminal in this case. After  it  becomes  a
              foreground  job  again, it will start writing messages to terminal in the normal way.  This option
              is available only when your system supports retrieving of terminal info via tc*() functions.

       -prefs/-noprefs
              When you turn this option on, pavuk will preserve all settings when  exiting,  and  when  you  run
              pavuk with GUI interface again, all settings will be restored.  The settings will be stored in the
              ~./pavuk_prefs file. Default pavuk want restore its option when started.  This option is available
              only when compiled with GTK+.

       -schedule $time
              Execute  pavuk  at  the  time  specified  as  parameter.  The  Format  of  the  $time parameter is
              YYYY.MM.DD.hh.mm.  You need a properly configured scheduling with the at command  on  your  system
              for  using  this  option.   If  default configuration (at -f %f %t %d.%m.%Y) of scheduling command
              won't work on your system, try to adjust it with -sched_cmd option.

       -reschedule $nr
              Execute pavuk periodically with $nr hours period.  You need properly  configured  scheduling  with
              the at command on your system for using this option.

       -sched_cmd $str
              Command  to  use  for scheduling. Pavuk explicitly supports scheduling with at $str should contain
              regular characters and macros, escaped by % character.  Supported macros are:
                 %f
                  - for script filename
                 %t
                  - for time (in format HH:MM)
                  - all macros as supported by the strftime() function

       -urls_file $file
              If you use this option, pavuk will read URLs from $file before  it  starts  processing.   In  this
              file, each URL needs to be on a separate line. After the last URL, a single dot . followed by a LF
              (line-feed) character denotes the end.  Pavuk will start processing right after all URLs have been
              read.  If $file is given as the - character, standard input will be read.

       -store_info/-nostore_info
              This  option  causes  pavuk  to  store information about each document into a separate file in the
              .pavuk_info directory. This file is used to store the original URL from  which  the  document  was
              downloaded.  For  files  that  are downloaded via HTTP or HTTPS protocols, the whole HTTP response
              header is stored there. I recommend to use this option when you are using options that change  the
              default  layout  of  the  local document tree, because this info file helps pavuk to map the local
              filename to the URL. This option is also very useful when different URLs have the same filename in
              the local tree. When this occurs, pavuk detects this using info files,  and  it  will  prefix  the
              local name with numbers. At default is disabled storing of this extra informations.

       -info_dir $dir
              You  can  set  with this option location of separate directory for storing info files created when
              -store_info option is used. This is useful when you don't want to mix in destination directory the
              info files with regular document files. The structure of the info files  is  preserved,  just  are
              stored in different directory.

       -request $req
              With  this  option  you can specify extended informations for starting URLs.  With this option you
              can specify query data for POST or GET .  Current  syntax  of  this  option  is  :  URL:["]$url["]
              [METHOD:["]{GET|POST}["]]            [ENCODING:["]{u|m}["]]           [FIELD:["]variable=value["]]
              [FILE:["]variable=filename["] [LNAME:["]local_filename["]]

              - URL: specifies request URL
              - METHOD: specifies request method for URL and is
                one of GET or POST.
              - ENCODING: specifies encoding for request body data.
                  m is for multipart/form-data encoding
                  u is for application/x-www-form-urlencoded
                  encoding
              - FIELD: specifies field of request data in format
                  variable=value. For encoding of special characters
                  in variable and value you can use same encoding
                  as is used in application/x-www-form-urlencoded
                  encoding.
              - FILE: specifies special field of query, which is
                  used to specify file for POST based file upload.
              - LNAME: specifies localname for this request
       When you need to use inside the FIELD: and FILE: fields of request specification special characters,  you
       should   use  the  application/x-www-form-urlencoded  encoding  of  characters.  It  means  all  nonASCII
       characters, quote character ("), space character ( ), ampersand character (&), percent character (%)  and
       equal  character  (=) should be encoded in form %xx where xx is hexadecimal representation of ASCII value
       of character. So for example % character should be encoded like %25.

       -formdata $req
              This option gives you chance to specify contents for HTML forms found during  traversing  document
              tree.
               Current  syntax  of  this  option  is  same as for -request option, but ENCODING: and METHOD: are
              meaningless in this option semantics.
               In URL: you have to specify HTML form action URL, which will be matched against action URLs found
              in processed HTML documents. If pavuk finds action URL which matches that  supplied  in  -formdata
              option,  pavuk  will  construct  GET  or  POST  request from data supplied in this option and from
              default form field  values  supplied  in  HTML  document.  Values  supplied  on  commandline  have
              precedence before that supplied in HTML file.

       -nthreads $nr
              By  means  of  this  option  you  can specify how many concurrent threads will download documents.
              Default pavuk executes 3 concurrent downloading threads.  This option is available only when pavuk
              is compiled to support multithreading.

       -immesg/-noimmesg
              Default pavuks behavior when running multiple downloading threads is to buffer all output messages
              in memory buffer and flush that  buffered  data  just  when  thread  finishes  processing  of  one
              document. With this option you can change this behavior to see the messages immediately when it is
              produced.  It  is  only usable when you want to debug some specials in multithreading environment.
              This option is available only when pavuk is compiled to support multithreading.

       -dumpfd $nr
              For scripting is sometimes usable to be able to download document directly  to  pipe  or  variable
              instead  of  storing  it  to  regular  file. In such case you can use this option to dump data for
              example to stdout ($nr = 1).

       -dump_after/-nodump_after
              While using -dumpfd option in multithreaded pavuk, it is required to dump document in  one  moment
              because  documents downloaded in multiple threads can overlap. This option is also useful when you
              want to dump document after pavuk adjusts links inside HTML documents.

       -dump_response/-nodump_response
              This option have effect only when used with -dumpfd option. It  is  used  to  dump  HTTP  response
              headers.

       -dump_urlfd $nr
              When  you  will  use  this  option,  pavuk  will  output  all URLs found in HTML documents to file
              descriptor $nr. You can use this option to extract and convert all URLs to absolute.

Scenario/Task options

       -scenario $str
              Name of scenario to load and/or run. Scenarios are files with a structure similar to the  .pavukrc
              file.  Scenarios contain saved configurations. You can use it for periodical mirroring. Parameters
              from scenarios specified at the command line can be overwritten by command line parameters.  To be
              able to use this option, you need to specify scenario base directory with option -scndir.

       -dumpscn $filename
              Store  actual  configuration  into  scenario  file with name $filename.  This is useful to quickly
              create pre-configured scenarios for manual editing.

Directory options

       -msgcat $dir
              Directory which contains the message catalog for pavuk.  If you do not have permission to store  a
              pavuk  message  catalog  in  the  system  directory, you should simply create similar structure of
              directories in your home directory as it is on your system.

              For example:

              Your native language is German, and your home directory is /home/jano.

              You should at first create the directory /home/jano/locales/de/LC_MESSAGES/, then put  the  German
              pavuk.mo  there  and  set  -msgcat  to  /home/jano/locales/.   If  you  have  properly  set locale
              environment values, you will see pavuk speaking German.  This option is available  only  when  you
              compiled in support for GNU gettext messages internationalization.

       -cdir $dir
              Directory where are all retrieved documents are stored. If not specified, the current directory is
              used. If the specified directory doesn't exist, it will be created.

       -scndir $dir
              Directory  in  which  your scenarios are stored.  You must use this option when you are loading or
              storing scenario files.

Preserve options

       -preserve_time/-nopreserve_time
              Store downloaded document with same modification time as on the  remote  site.  Modification  time
              will  be  set  only  when  such information is available (some FTP servers do not support the MDTM
              command, and some documents on HTTP servers  are  created  online  so  pavuk  can't  retrieve  the
              modification time of this document).  At default modification time of documents isn't preserved.

       -preserve_perm/-nopreserve_perm
              Store downloaded document with the same permissions as on the remote site.  This option has effect
              only when downloading a file through FTP protocol and assumes that the -ftplist option is used. At
              default permissions are not preserved.

       -preserve_slinks/-nopreserve_slinks
              Set  symbolic  links  to  point  exactly  to  same  location as on the remote server; don't do any
              relocations.  This option has effect only when downloading file through FTP protocol  and  assumes
              that  the -ftplist option is used.  Default symbolic links are not preserved, and are retrieved as
              regular documents with full contents of linked file.

              For example, assume that on the FTP server ftp.xx.org there is a symbolic  link  /pub/pavuk/pavuk-
              current.tgz,  which  points  to  /tmp/pub/pavuk-0.9pl11.tgz.   Pavuk  will  create  symbolic  link
              ftp/ftp.xx.org_21/pub/pavuk/pavuk-current.tgz
              if   option   -preserve_slinks   will   be   used   this   symbolic    link    will    point    to
              /tmp/pub/pavuk-0.9pl11.tgz
              if option -preserve_slinks want be used, this symbolic link will point to
               ../../tmp/pub/pavuk-0.9pl11.tgz

       -retrieve_symlink/-noretrieve_symlink
              Retrieve files behind symbolic links instead of replicating symlinks in local tree.

Proxy options

       -http_proxy $site[:$port]
              If  this  parameter  is  used, then all HTTP requests are going through this proxy server. This is
              useful if your site resides behind a firewall, or if you want to use a HTTP  proxy  cache  server.
              The  default  port  number  is  8080.   Pavuk  allows  you to specify multiple HTTP proxies (using
              multiple -http_proxy options) and it  will  rotate  proxies  with  roundrobin  priority  disabling
              proxies with errors.

       -nocache/-cache
              Use  this  option  whenever  you want to get the document directly from the site and not from your
              HTTP proxy cache server. Default pavuk allows transfer of document copies from cache.

       -ftp_proxy $site[:$port]
              If this parameter is used, then all FTP requests are going through this  proxy  server.   This  is
              useful  when  your  site  resides behind a firewall, or if you want to use FTP proxy cache server.
              The default port number is 22.  Pavuk supports three different types of proxies for FTP,  see  the
              options  -ftp_httpgw,  -ftp_dirtyproxy.   If  none  of  the  mentioned options is used, then pavuk
              assumes a regular FTP proxy with USER user@host connecting to remote FTP server.

       -ftp_httpgw/-noftp_httpgw
              The specified FTP proxy is a HTTP gateway for the FTP protocol. Default FTP proxy is  regular  FTP
              proxy.

       -ftp_dirtyproxy/-noftp_dirtyproxy
              The  specified  FTP  proxy is a HTTP proxy which supports a CONNECT request (pavuk should use full
              FTP protocol, except of active data connections).  Default FTP proxy is  regular  FTP  proxy.   If
              both -ftp_dirtyproxy and -ftp_httpgw are specified, -ftp_dirtyproxy is preferred.

       -gopher_proxy $site[:$port]
              Gopher gateway or proxy/cache server.

       -gopher_httpgw/-nogopher_httpgw
              The  specified  Gopher  proxy server is a HTTP gateway for Gopher protocol.  When -gopher_proxy is
              set and this -gopher_httpgw option isn't used, pavuk is using proxy as HTTP  tunnel  with  CONNECT
              request to open connections to Gopher servers.

       -ssl_proxy $site[:$port]
              SSL  proxy  (tunneling)  server  [as  that in CERN httpd + patch or in Squid] with enabled CONNECT
              request (at least on port 443). This option is available only when compiled with SSL support  (you
              need the SSleay or OpenSSL libraries with development headers)

Proxy Authentification

       -http_proxy_user $user
              Username for HTTP proxy authentification.

       -http_proxy_pass $pass
              Password for HTTP proxy authentification.

       -http_proxy_auth {1/2/3/4/user/Basic/Digest/NTLM}
              Authentification scheme for proxy access. Similar meaning as the -auth_scheme option (see help for
              this option for more details).  Default is 2 (Basic scheme).

       -auth_proxy_ntlm_domain $str
              NT  or  LM domain used for authorization again HTTP proxy server when NTLM authentification scheme
              is required. This option is available only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_proxy_nonce/-noauth_reuse_proxy_nonce
              When using HTTP Proxy Digest access authentification scheme use  first  received  nonce  value  in
              multiple following requests.

       -ftp_proxy_user $user
              Username for FTP proxy authentification.

       -ftp_proxy_pass $pass
              Password for FTP proxy authentification.

Protocol/Download Options

       -ftp_passive
              Uses passive ftp when downloading via ftp.

       -ftp_active
              Uses active ftp when downloading via ftp.

       -active_ftp_port_range $min:$max
              This  option  permits  to  specify  the  ports  used  for active ftp. This permits easier firewall
              configuration since the range of ports can be restricted.

              Pavuk will randomly choose a number from within the specified range until an open port  is  found.
              Should  no  open  ports  be  found  within the given range, pavuk will default to a normal kernel-
              assigned port, and a message (debug level net) is output.

              The port range selected must be in the non-privileged range (eg. greater than or equal  to  1024);
              it  is  STRONGLY  RECOMMENDED  that  the  chosen range be large enough to handle many simultaneous
              active connections (for example, 49152-65534, the IANA-registered ephemeral port range).

       -always_mdtm/-noalways_mdtm
              Force pavuk to always use "MDTM" to determine the file modification time  and  never  uses  cached
              times determined when listing the remote files.

       -remove_before_store/-noremove_before_store
              Force  unlink'ing  of  files  before new content is stored to a file. This is helpful if the local
              files are hardlinked to some other directory and after mirroring the hardlinks  are  checked.  All
              "broken" hardlinks indicate a file update.

       -retry $nr
              Set the number of attempts to transfer processed document.  Default set to 1, this mean pavuk will
              retry once to get documents which failed on first attempt.

       -nregets $nr
              Set the number of allowed regets on a single document, after a broken transfer.  Default value for
              this option is 2.

       -nredirs $nr
              Set  number  of  allowed HTTP redirects. (use this for prevention of loops) Default value for this
              option is 5, and conform to HTTP specification.

       -force_reget/-noforce_reget
              Force reget'ing of the whole document after a broken transfer  when  the  server  doesn't  support
              retrieving  of  partial  content.  Pavuk default behavior is to stop getting documents which don't
              allow restarting of transfer from specified position.

       -timeout $nr
              Timeout for stalled connections in minutes. This value is also used for connection  timeouts.  For
              sub-minute  timeouts  you  can  use  floating  point numbers.  Default timeout is 0, an that means
              timeout checking is disabled.

       -noRobots/-Robots
              This switch suppresses the use of the robots.txt standard, which is used to restrict access of Web
              robots to some locations on the web server. Default is allowed checking  of  robots.txt  files  on
              HTTP  servers.  Enable  this  option  always  when  you  are  downloading  huge sets of pages with
              unpredictable layout.  This prevents you from upsetting server administrators :-).

       -noEnc/-Enc
              This switch suppresses using of gzip or compress or deflate encoding in transfer. I don't know  if
              some  servers  are  broken  or  what,  but they are propagating that MIME type application/gzip or
              application/compress as encoded. Turn this option off, when you doesn't have libz support compiled
              in and also gzip program which is used to  decode  document  encoded  this  way.   At  default  is
              decoding of downloaded document disabled.

       -check_size/-nocheck_size
              The  option  -nocheck_size  should  be used if you are trying to download pages from a HTTP server
              which sends a wrong Content-Length: field in the MIME header of response.  Default pavuk  behavior
              is to check this field and complain when something is wrong.

       -maxrate $nr
              If  you  don't  want  to give all your transfer bandwidth to pavuk, use this option to set pavuk's
              maximum transfer rate. This option accepts a floating point number to specify the transfer rate in
              kB/s. If you want get optimal settings, you also have to play with the size  of  the  read  buffer
              (option -bufsize) because pavuk is doing flow control only at application level.  At default pavuk
              use full bandwidth.

       -minrate $nr
              If  you  hate  slow transfer rates, this option allows you to break transfers with slow speed. You
              can set the minimum transfer rate, and if the connection gets slower  than  the  given  rate,  the
              transfer  will  be  stopped. The minimum transfer rate is given in kB/s.  At default pavuk doesn't
              check this limit.

       -bufsize $nr
              This option is used to specify the size of the read buffer (default size: 32kB).  If  you  have  a
              very fast connection, you may increase the size of the buffer to get a better read performance. If
              you  need  to  decrease the transfer rate, you may need to decrease the size of the buffer and set
              the maximum transfer rate with the -maxrate option. This option accepts the size of the buffer  in
              kB.

       -fs_quota $nr
              If you are running pavuk on a multiuser system, you may need to avoid filling up your file system.
              This  option lets you specify how many space must remain free. If pavuk detects an underrun of the
              free space, it will stop downloading files. Specify this quota in kB. Default value is 0, and that
              mean no checking of this quota.

       -file_quota $nr
              This option is useful when you want to limit downloading of big files, but  want  to  download  at
              least  $nr  kilobytes  from  big  files.   A big file will be transferred, and when it reaches the
              specified size, transfer will break. Such document will be processed as properly downloaded, so be
              careful when using this option.  At default pavuk is transferring full size of documents.

       -trans_quota $nr
              If you are aware that your selection should address a big amount of data, you can use this  option
              to limit the amount of transferred data.  Default is by size unlimited transfer.

       -max_time $nr
              Set  maximum  amount of time for program run. After time is exceeded, pavuk will stop downloading.
              Time is specified in minutes. Default value is 0, and it means downloading time is not limited.

       -url_strategy $strategy
              This option allows you to specify a downloading order for URLs  in  document  tree.   This  option
              accepts the following strings as parameters :

              level - will order URLs as it loads it from HTML files (default)
              leveli - as previous, but inline objects URLs come first
              pre - will insert URLs from actual HTML document at start, before other
              prei - as previous, but inline objects URLs come first

       -send_if_range/-nosend_if_range
              Send If-Range: header in HTTP request. I found out, that some HTTP servers (greetings, MS :-)) are
              sending  different  ETag:  fields  in  different  responses for the same, unchanged document. This
              causes problems when pavuk attempts to reget a document from such a server:  pavuk  will  remember
              the  old  ETag value and uses it it following requests for this document.  If the server checks it
              with the new ETag value and it differs, it will refuse to send only  part  of  the  document,  and
              start the download from scratch.

       -ssl_version $v
              Set  required SSL protocol version for SSL communication.  $v is one of ssl2, ssl23, ssl3 or tls1.
              This option is available only when compiled with SSL support.  Default is ssl23.

       -unique_sslid/-nounique_sslid
              This option can be used if you want to use a unique SSL ID for all  SSL  sessions.  Default  pavuk
              behavior  is  to negotiate each time new session ID for each connection.  This option is available
              only when compiled with SSL support.

       -use_http11/-nouse_http11
              This option is used to switch between HTTP/1.0 and HTTP/1.1 protocol used with HTTP  servers.  Now
              is  using  of  HTTP/1.1 protocol not default because its implementation is very fresh and not 100%
              tested. Even though using of HTTP/1.1 is very recommended, because it is faster than HTTP/1.0  and
              uses  less  network  bandwidth  for initiating connections. In any further version I will activate
              using of HTTP/1.1 as default.

       -local_ip $addr
              You can use this option when you want to use specified network interface  for  communication  with
              other hosts. This option is suitable for multihomed hosts with several network interfaces. Address
              should be entered as regular IP address or as host name.

       -identity $str
              This  option  allows you to specify content of User-Agent: field of HTTP request.  This is usable,
              when scripts on remote server returns different document on same URL for different browsers, or if
              some HTTP server refuse to serve document for Web robots like pavuk. Default pavuk sends in  User-
              Agent: field pavuk/$VERSION string.

       -auto_referer/-noauto_referer
              This  option  forces pavuk to send HTTP Referer: header field with starting URLs.  Content of this
              field will be self URL. Using this option is required, when  remote  server  checks  the  Referer:
              field.  At default pavuk wont send Referer: field with starting URLs.

       -referer/-noreferer
              This  option  allows  to  enable  and  disable  the transmission of HTTP Referer: header field. At
              default pavuk sends Referer: field.

       -httpad $str
              In some cases you may want to add user defined fields to  HTTP/HTTPS  requests.   This  option  is
              exactly  for  this  purpose. In $str you can directly specify content of additional header. If you
              specify only raw header, it will be used only for starting requests. When you  want  to  use  this
              header with each request while crawling, prefix the header with + character.

       -del_after/-nodel_after
              This  option allows you to delete FILES from REMOTE server, when download is properly finished. At
              default is this option off.

       -FTPlist/-noFTPlist
              When option -FTPlist will be used, pavuk will retrieve content of FTP directories with FTP command
              LIST instead of NLST. So the same listing will be retrieved as with "ls -l"  UNIX  command.   This
              option  is  required  if  you need to preserve permissions of remote files or you need to preserve
              symbolic links.  Pavuk supports wide listing on FTP servers with regular BSD or SYSV style "ls -l"
              directory listing, on FTP servers with EPFL listing format, VMS style listing,  DOS/Windows  style
              listing  and  Novel  listing  format.   Default  pavuk  behavior  is to use NLST fro FTP directory
              listings.

       -ftp_list_options $str
              Some FTP servers require to supply extra options to LIST or NLST FTP commands to  show  all  files
              and  directories  properly.  But be sure not to use any extra options which can reformat output of
              the listing. Useful is especially -a option which force FTP server to  show  also  dot  files  and
              directories  and  with  broken  WuFTP servers it also helps to produce full directory listings not
              just files.

       -fix_wuftpd/-nofix_wuftpd
              This option is result of several attempts to to get working properly the -remove_old  option  with
              WuFTPd  server  when -ftplist option is used. The problem is that FTP command LIST on WuFTPd don't
              mind when trying to list nonexisting directory, and indicates success in FTP response code.   When
              you  activate  this  option,  pavuk  uses  extra  FTP  command  (STAT -d dir) to check whether the
              directory really exists. Don't use this option until you are sure that you really need it!

Authentification

       -auth_file $file
              File where you have stored authentification information for  access  to  some  service.  For  file
              structure see below in FILES section.

       -auth_name $user
              If  you  are  using  this  parameter,  program  is doing authentification with each HTTP access to
              document. Use this only if you know that only one HTTP server could  be  accessed  or  use  -asite
              option  to  specify site to which you use authentification. Else your auth parameters will be sent
              to each accessed HTTP server.

       -auth_passwd $passwd
              Value of this parameter is used as password for authentification

       -auth_scheme {1/2/3/4/user/Basic/Digest/NTLM}
              This parameter specifies used authentification scheme.
              1 or user means user authentification scheme is used as defined in HTTP/1.0 or HTTP/1.1.  Password
              and user name are sent unencoded.
              2 or Basic means Basic authentification scheme is used as defined in HTTP/1.0.  Password and  user
              name are sent BASE64 encoded.
              3  or  Digest  means  Digest  access  authentification scheme based on MD5 checksums as defined in
              RFC2069.
              4 or NTLM means NTLM proprietary access authentification scheme used by  Microsoft  IIS  or  Proxy
              servers.   When  you  use  this  scheme,  you  must  also  specify  NT  or  LM  domain with option
              -auth_ntlm_domain. This scheme is supported only when compiled with OpenSSL or libdes libraries.

       -auth_ntlm_domain $str
              NT or LM domain used for authorization again HTTP server  when  NTLM  authentification  scheme  is
              required. This option is available only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_nonce/-noauth_reuse_nonce
              While  using  HTTP  Digest  access  authentification scheme use first received nonce value in more
              following requests.  Default pavuk negotiates nonce for each request.

       -ssl_key_file $file
              File with public key for SSL certificate (learn more from SSLeay or  OpenSSL  documentation)  This
              option  is available only when compiled with SSL support (you need SSleay or OpenSSL libraries and
              development headers)

       -ssl_cert_file $file
              Certificate file in PEM format (learn more from SSLeay or OpenSSL documentation)  This  option  is
              available  only  when  compiled  with  SSL  support  (you  need  SSleay  or  OpenSSL libraries and
              development headers)

       -ssl_cer_passwd $str
              Password used to generate certificate (learn more  from  SSLeay  or  OpenSSL  documentation)  This
              option  is available only when compiled with SSL support (you need SSLeay or OpenSSL libraries and
              development headers)

       -nss_cert_dir $dir
              Config directory for NSS (Netscape SSL implementation) certificates. Usually ~/.netscape  (created
              by  Netscape  communicator/navigator)  or  profile  directory below ~/.mozilla (created by Mozilla
              browser). The directory should contain cert7.db and key3.db files. If you don't  use  Mozilla  nor
              Netscape,  you  must  create  this  files by utilities distributed with NSS libraries. Pavuk opens
              certificate database only readonly.  This option is available only when pavuk is compiled with SSL
              support provided by Netscape NSS SSL implementation.

       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]
              By default will pavuk reject connection to SSL server which certificate is  not  stored  in  local
              certificate  database  (set  by  -nss_cert_dir  option).  You must explicitly force pavuk to allow
              connection to servers with unknown certificates.  This option is  available  only  when  pavuk  is
              compiled with SSL support provided by Netscape NSS SSL implementation.

       [-nss_domestic_policy/-nss_export_policy]
              Selects  sets of ciphers allowed/disabled by USA export rules.  This option is available only when
              pavuk is compiled with SSL support provided by Netscape NSS SSL implementation.

       -from $email
              This parameter is used when accessing anonymous FTP server as password or is  optionally  inserted
              into  From  field  in  HTTP  request.  If not specified pavuk discovers this from USER environment
              variable and from site hostname.

       -send_from/-nosend_from
              This option is used for enabling or disabling sending of user  identification,  entered  in  -from
              option, as FTP anonymous user password and From: field of HTTP request.  As default is this option
              off.

       -ftp_login_handshake $host $handshake
              When  you need to use nonstandard login procedure for some of FTP servers, you can use this option
              to change default pavuk login procedure. To allow more  flexibility,  you  can  assign  the  login
              procedure  to  some  server or to all. When $host is specified as empty string (""), than attached
              login procedure is assigned to all FTP servers besides those having assigned own login procedures.
              In the $handshake parameter you can specify  exact  login  procedure  specified  by  FTP  commands
              followed by expected FTP response codes delimited with backslash (\) characters.
              For  example  this  is  default  login  procedure when logging in regular ftp server without going
              through proxy server : USER %u\331\PASS %p\230. There are two commands followed  by  two  response
              codes. After USER command pavuk expects FTP response code 331 and after PASS command pavuk expects
              from  server  FTP  response  code  230. In ftp commands you can use following macros which will be
              replaced by respective values:

               %u - user name used to access FTP server
               %p - password used to access FTP server
               %U - user name used to access FTP proxy server
               %P - password used to access FTP proxy server
               %h - hostname of FTP server
               %s - port number on which FTP server listens

Site/Domain/Port Limitation Options

       -asite $list
              Specify comma separated list of allowed sites on which referenced documents are stored.

       -dsite $list
              Specify comma separated list of disallowed sites.  Previous parameter is opposite to this one.  If
              both are used the last occurrence of them is used to be valid.

       -adomain $list
              Specify comma separated list of allowed domains on which referenced documents are stored.

       -ddomain $list
              Specify comma separated list of disallowed domains. Previous parameter is opposite to this one. If
              both are used the last occurrence of them is used to be valid.

       -aport $list
              In $list, you can write comma separated list of ports from which you allow to download documents.

       -dport $list
              This  option  is  opposite  option to previous option. It is used to specify denied ports. If both
              -aport and -dport options are used the last occurrence of them is used to be valid and  all  other
              occurrences will be omitted.

Limitation Document properties

       -amimet $list
              List of comma separated allowed MIME types. You can use with this option also wildcard patterns.

       -dmimet $list
              List  of  comma  separated  disallowed  MIME  types.  You  can  use with this option also wildcard
              patterns.  Previous parameter is opposite to this one. If both are used  the  last  occurrence  of
              them is used to be valid.

       -maxsize $nr
              Maximum  allowed  size  of document.  This option is applied only when pavuk is able to detect the
              document before starting the transfer.  Default value is 0, and it means this limit isn't applied.

       -minsize $nr
              minimal allowed size of document.  This option is applied only when pavuk is able  to  detect  the
              document before starting the transfer.  Default value is 0, and it means this limit isn't applied.

       -newer_than $time
              Allow  only  transfer of documents with modification time newer than specified in parameter $time.
              Format of $time is: YYYY.MM.DD.hh:mm.   To  apply  this  option  pavuk  must  be  able  to  detect
              modification time of document.

       -older_than $time
              Allow  only  transfer of documents with modification time older than specified in parameter $time.
              Format of $time is: YYYY.MM.DD.hh:mm.   To  apply  this  option  pavuk  must  be  able  to  detect
              modification time of document.

       -noCGI/-CGI
              this switch prevents to transfer dynamically generated parametric documents through CGI interface.
              This  is  detected  with occurrence of ? character inside URL.  Default pavuk behavior is to allow
              transfer of URLs with query strings.

       -alang $list
              this allows you to specify ordered comma separated  list  of  preferred  natural  languages.  This
              option work only with HTTP and HTTPS protocol using Accept-Language: MIME field.

       -acharset $list
              This  options  allows  you  to  enter  comma  separated  list  of preferred encoding of transfered
              documents. This works only with HTTP and HTTPS urls  and  only  if  such  document  encodings  are
              located on destination server.
              example: -acharset iso-8859-2,windows-1250,utf8

Limitation Document name

       -asfx $list
              This parameter allows you to specify set of suffixes used to restrict selection of documents which
              will be processed.

       -dsfx $list
              Set  of  suffixes  that  are  used  to specify restriction on selection of documents.  This one is
              inverse to previous option. They are segregating each other.

       -aprefix $list, -dprefix $list
              This two options allow you to specify set of allowed or disallowed prefixes of documents. They are
              segregating each other.

       -pattern $pattern
              This option allows you to specify wildcard pattern for documents. All documents are tested if they
              match this pattern.

       -rpattern $reg_exp
              This is equal option as previous, but this uses regular expressions.  Available only on  platforms
              which have any supported RE implementation.

       -skip_pattern $pattern
              This  option  allows  you  to  specify wildcard pattern for documents that should be skipped.  All
              documents are tested if they match this pattern.

       -skip_rpattern $reg_exp
              This is equal option as previous, but this uses regular expressions.  Available only on  platforms
              which have any supported RE implementation.

       -url_pattern $pattern
              This  option  allows  you  to specify wildcard pattern for URLs. All URLs are tested if they match
              this pattern.
              Example:
              -url_pattern http://\*.idata.sk:\*/~ondrej/\* . this option enables  all  HTTP  URLs  from  domain
              .idata.sk on all ports which are located under /~ondrej/.

       -url_rpattern $reg_exp
              This  is equal option as previous, but this uses regular expressions.  Available only on platforms
              which have any supported RE implementation.

       -skip_url_pattern $pattern
              This option allows you to specify wildcard pattern for URLs that should be skipped.  All URLs  are
              tested if they match this pattern.

       -skip_url_rpattern $reg_exp
              This  is equal option as previous, but this uses regular expressions.  Available only on platforms
              which have any supported RE implementation.

       -aip_pattern $re
              This option allows you to limit set of transferred documents by server IP address.  IP address can
              be specified as regular expressions, so it is possible to specify  set  of  IP  addresses  by  one
              expression.  Available only on platforms which have any supported RE implementation.

       -dip_pattern $re
              This  option  similar  to  previous option, but is used to specify set of disallowed IP addresses.
              Available only on platforms which have any supported RE implementation.

       -tag_pattern $tag $attrib $url
              More powerful version of -url_pattern option for more precise matching of allowed  URLs  based  on
              HTML  tag  name  pattern,  HTML  tag attribute name pattern and on URL pattern. You can use in all
              three parameters of this option wildcard  patterns,  thus  something  like  -tag_pattern  '*'  '*'
              url_pattern  is  equal  to  -url_pattern  url_pattern.  The $tag and $attrib parameters are always
              matched again uppercase strings. For example if you want just let pavuk follow only regular  links
              ignoring any stylesheets, images, etc., use option -tag_pattern A HREF '*'.

       -tag_rpattern $tag $attrib $url
              This  is  variation on the -tag_pattern. It uses regular expression patterns in parameters instead
              of wildcard patterns used in the previous option.

Limitation Protocol Option

       -noHTTP/-HTTP
              This switch suppresses all transfers through HTTP  protocol.   Default  is  transfer  trough  HTTP
              enabled.

       -noSSL/-SSL
              This switch suppresses all transfers through HTTPS protocol (HTTP protocol over SSL) .  Default is
              transfer  trough HTTPS enabled.  This option is available only when compiled with SSL support (you
              need SSleay or OpenSSL libraries and development headers)

       -noGopher/-Gopher
              Suppress all transfers through Gopher  Internet  protocol.   Default  is  transfer  trough  Gopher
              enabled.

       -noFTP/-FTP
              This  switch  prevents  processing  documents  allocated  on all FTP servers.  Default is transfer
              trough FTP enabled.

       -noFTPS/-FTPS
              This switch prevents processing documents allocated on  all  FTP  servers  accessed  through  SSL.
              Default  is  transfer  trough  FTPS enabled.  This option is available only when compiled with SSL
              support (you need SSleay or OpenSSL libraries and development headers)

       -FTPhtml/-noFTPhtml
              By using of option -FTPhtml you can  force  pavuk  to  process  HTML  files  downloaded  with  FTP
              protocol.  At default pavuk won't parse HTML files from FTP servers.

       -FTPdir/-noFTPdir
              Force  recursive  processing of FTP directories too.  At default is recursive downloading from FTP
              servers denied.

       -disable_html_tag $TAG,[$ATTRIB][;...]
              -enable_html_tag $TAG,[$ATTRIB][;...]  Enable or disable processing of  particular  HTML  tags  or
              attributes.  At default all supported HTML tags are enabled.

              For  example  if  you  don't  want  to  process all images you should use option -disable_html_tag
              'IMG,SRC;INPUT,SRC;BODY,BACKGROUND' .

Other Limitation Options

       -subdir $dir
              Subdirectory of local tree directory, to limit some of the modes {sync, resumeregets,  linkupdate}
              in its tree scan.

       -dont_leave_site/-leave_site
              (Don't) leave starting site. At default pavuk can span host when recursing through WWW tree.

       -dont_leave_dir/-leave_dir
              (Don't)  leave  starting  directory.  If  -dont_leave_dir  option  is used pavuk will stay only in
              starting directory (including its own  subdirectories).   At  default  pavuk  can  leave  starting
              directories.

       -leave_site_enter_dir/-dont_leave_site_enter_dir
              If  you are downloading WWW tree which spans multiple hosts with huge trees, you may want to allow
              downloading of document which are in directory hierarchy below directory which we visited as first
              on each site. To obtain this, use option -dont_leave_site_enter_dir. As default pavuk will go also
              to higher directory levels on that site.

       -lmax $nr
              Set maximum allowed level of tree traverse. Default is  set  to  0,  what  means  that  pavuk  can
              traverse at infinitum.  As of version 0.8pl1 inline objects of HTML pages are placed at same level
              as parent HTML page.

       -leave_level $nr
              Maximum  level  of  documents outside from site of starting URL.  Default is set to 0, and 0 means
              that checking is not applied.

       -site_level $nr
              Maximum level of sites outside from site of starting URL.  Default is set to 0, and 0  means  that
              checking is not applied.

       -dmax $nr
              Set  maximum  allowed  number of documents that are processed.  Default value is 0.  That means no
              restrictions are used in number of processed documents.

       -singlepage/-nosinglepage
              Using option -singlepage allows you to transfer just HTML  pages  with  all  its  inlined  objects
              (pictures,  sounds,  frame  documents,  ...).   As  default is disabled single page transfer. This
              option makes -mode singlepage option obsolete.

       -limit_inlines/-dont_limit_inlines
              With this option you can control whether limiting options apply also to inline objects  (pictures,
              sounds, ...). This is useful when you want to download specified set of HTML pages with all inline
              options without any restrictions.

       -user_condition $str
              Script  or program name for users own conditions.  You can write any script which should with exit
              value decide if download URL or not.  Script gets from pavuk any  number  of  options,  with  this
              meaning :

                 -url $url - processed URL
                 -parent $url - any number of parent URLs
                 -level $nr - level of this URL from starting URL
                 -size $nr - size of requested URL
                 -date $datenr - modification time of requested URL in format YYYYMMDDhhmmss

              The  exit status 0 of script or program means that current URL should be rejected and nonzero exit
              status means that URL should be accepted.
              Warning : use user conditions only if required because of big slowdowns caused by forking  scripts
              for each checked URL.

       -follow_cmd $str
              This option allows you to specify script or program which can by its exit status decide whether to
              follow  URLs  from  current  HTML document. This script will be called after download of each HTML
              document.  The script will get following options as it's parameters:

                 -url $url - URL of current HTML document
                 -infile $file - local file where is stored HTML document

              The exit status 0 of script or program means that URLs from current document will  be  disallowed,
              other exit status means, that pavuk can follow links from current HTML document.

Javascript support

       Support  for  scripting languages like JavaScript or VBScript in pavuk is done bit hacky way. There is no
       interpreter for this languages, so not all things will work. Whole support  which  pavuk  have  for  this
       scripting  languages  is  based  on  regular expression patterns specified by user. Pavuk search for this
       patterns in DOM event attributes of HTML  tags,  in  javascript:...  URLs,  in  inline  scripts  in  HTML
       documents  enclosed  between  <script></script>  tags  and  in  separate  javascript  files.  Support for
       scripting languages is only available when pavuk is  compiled  with  proper  regular  expression  library
       (POSIX/GNU/PCRE).

       -enable_js/-disable_js
              This  options  are used to enable or disable processing of Javascript parts of HTML documents. You
              must enable this option to be able to use processing of javascript patterns.

       -js_pattern $re
              With this option you are specifying  what  patterns  match  interested  parts  of  Javascript  for
              extracting  URLs. The parameter must be RE pattern with exactly one subpattern which match exactly
              the URL part. For example to match URL in following type of javascript expressions :
                document.b1.src='pics/button1_pre.jpg'
              you can use this pattern
                "^document.[a-zA-Z0-9_]*.src[ ]*=[ ]*'(.*)'$"

       -js_transform $p $t $h $a
              This option is similar to previous, but you can use custom transform rules for the  URL  parts  of
              patterns  and also specify the exact HTML tag and attribute where to look for this pattern. The $p
              is the pattern to match the interested part of script. The $t is transform rule for  the  URL,  in
              this  parameter  the  $x  parts  will  be  replaced  by  x-th subpattern of the $p pattern. The $h
              parameter is exact HTML tag or "*" when this apply to javascript: URLs or DOM event attribs or  ""
              (empty  string)  when  this  apply to javascript body of HTML document or separate JS file. The $a
              parameter is exact HTML attrib of tag or "" (empty string) when  this  rule  apply  to  javascript
              body.

       -js_transform2 $p $t $h $a
              This  option  is very similar to previous. The meaning of all parameters is same, just the pattern
              $p can have only one substring which will be used in the transform rule $t. This  is  required  to
              allow  rewriting of URL parts of the tags and scripts. This option can also be used to force pavuk
              to recognize HTML targ/attribute pairs which pavuk does not support.

Cookie

       -cookie_file $file
              File where are stored cookie infos. This file must be in Netscape cookie  file  format  (generated
              with Netscape Navigator or Communicator ...).

       -cookie_send/-nocookie_send
              Use collected cookies in HTTP/HTTPS requests.  Pavuk will not send at default cookies.

       -cookie_recv/-nocookie_recv
              Store  received cookies from HTTP/HTTPS responses into memory cookie cache.  At default pavuk will
              not remember received cookies.

       -cookie_update/-nocookie_update
              Update cookie file on disk and synchronize it with changes made by any concurrent  processes.   At
              default pavuk will not update cookie file on disk.

       -cookies_max $nr
              Maximum  number  of  cookies  in  memory  cookie  cache.   Default  value  is 0, and that means no
              restrictions for cookies number.

       -disabled_cookie_domains $list
              Comma-separated list of cookie domains which are permitted to  send  cookies  stored  into  cookie
              cache

       -cookie_check/-nocookie_check
              Check  when  receiving  cookie,  if  cookie  domain  is equal to domain of server which sends this
              cookie. At default pavuk check is server is setting cookies for its domain, and if it tries to set
              cookie for foreign domain pavuk will complain about that and will reject such cookie.

HTML rewriting engine tuning options

       -noRelocate/-Relocate
              This switch prevents the program to rewrite relative URLs to  absolute,  after  HTML  document  is
              transfered.  Default  pavuk  behavior is to maintain link consistence of HTML documents. So always
              when HTML document is downloaded pavuk will rewrite all URLs to point to local document if  it  is
              available  and if it is not available it will point to remote document. After document is properly
              downloaded, pavuk will update links in HTML documents, which point to this one.

       -all_to_local/-noall_to_local
              This option forces pavuk to change all URLs inside HTML document to local URLs  immediately  after
              download of document. Default is this option disabled.

       -sel_to_local/-nosel_to_local
              This  option  forces  pavuk to change all URLs, which accomplish conditions for download, to local
              inside HTML document immediately after download of document.  I recommend to use this option, when
              you are sure, that transfer will be without any problems. This option can save a lot of  processor
              time.  Default is this option disabled.

       -all_to_remote/-noall_to_remote
              This  option forces pavuk to change all URLs inside HTML document to remote URLs immediately after
              download of document.  Default is this option disabled.

       -post_update/-nopost_update
              This option is especially designed to allow in -fnrules option doing rules based on MIME  type  of
              document.  This  option  forces pavuk to generate local names for documents just after pavuk knows
              what is the MIME type of document. This have big impact on the rewriting engine  of  links  inside
              HTML documents. This option causes disfunction of other options for controlling the link rewriting
              engine. Use this option only when you know what you are doing :-)

       -dont_touch_url_pattern $pat
              This options serves to deny rewriting and processing of particular URLs in HTML documents by pavuk
              HTML  rewriting  engine.  This  option accepts wildcard patterns to specify such URLs. Matching is
              done against untouched URLs so when he URL is relative, you must use  pattern  which  matches  the
              relative URL, when it is absolute, you must use absolute URL.

       -dont_touch_url_rpattern $pat
              This  option  is variation on previous option. This one uses regular patterns for matching of URLs
              instead of wildcard patterns used by -dont_touch_url_pattern option. This option is available only
              when pavuk is compiled with support for regular expression patterns.

       -dont_touch_tag_rpattern $pat
              This option is variation on previous option, just matching is made on full HTML tag with  included
              <>.  This  option accepts regular expression patterns. It is available only when pavuk is compiled
              with support for regular expression patterns.

Filename/URL Conversion Option

       -tr_del_chr $str
              All characters found in $str will be deleted from local name of  document.   $str  should  contain
              escape sequences similar like in tr command:
              \n - newline
              \r - carriage return
              \t - horizontal tab space
              \0xXX - hexadecimal  ASCII value
              [:upper:] - all uppercase letters
              [:lower:] - all lowercase letters
              [:alpha:] - all letters
              [:alnum:] - all letters and digits
              [:digit:] - all digits
              [:xdigit:] - all hexadecimal digits
              [:space:] - all horizontal and vertical whitespace
              [:blank:] - all horizontal whitespace
              [:cntrl:] - all control characters
              [:print:] - all printable characters including space
              [:nprint:] - all non printable characters
              [:punct:] - all punctation characters
              [:graph:] - all printable characters excluding space

       -tr_str_str $str1 $str2
              String $str1 from local name of document will be replaced with $str2.

       -tr_chr_chr $chrset1 $chrset2
              Characters from $chrset1 from local name of document will be replaced with corresponding character
              from $chrset2. $charset1 and $charset2 should have same syntax as $str in -tr_del_chr option.

       -store_name $str
              When  you  want to change local filename of first file downloaded with singlepage mode, you should
              use this option.

       -index_name $str
              With this option you can change directory index name. As default is used _._.html .

       -store_index/-nostore_index
              With option -nostore_index you should deny storing of directory indexes into HTML files.

       -fnrules $t $m $r
              This is a very powerful option! This option is used to flexible change layout  of  local  document
              tree.  It  accepts  three  parameters.  First  parameter  $t is used to say what type is following
              pattern.  F is used for wildcard pattern (uses fnmatch()) and R is  used  for  regular  expression
              pattern  (using  any  supported  RE implementation).  Second parameter is matching pattern used to
              select URLs for this rule.  If URL match this pattern, then local name for this  URL  is  computed
              following  rules  of  third parameter.  And third parameter is local name building rule. Pavuk now
              supports two kinds of local name building rules. One is simple based only  on  simple  macros  and
              other   more  complicated  extended  rule,  which  also  enables  to  perform  several  functions.
              Recognition between those two kinds of rules is done by looking at first character  of  rule.   In
              case when first character is '(', rule is extended and in all other cases it is the simple kind of
              rule.

              Simple  rule should contain literals or escaped macros.  Macros are escaped by % character or by $
              character.

              Here is list of recognized macros:

              $x - where x is any positive number. This macro is replaced with  x-th  substring  matched  by  RE
              pattern. (If you use this you need to understand RE !)
              %i - is replaced with protocol id (http, https, ftp, gopher)
              %p - is replaced with password. (use this only when usable)
              %u - is replaced with username.
              %h - is replaced with host name.
              %m - is replaced with domain name.
              %r - is replaced with port number.
              %d - is replaced with path to document.
              %n - is replaced with document name.
              %b - is replaced with basename of document (without extension).
              %e - is replaced with extension.
              %s - is replaced with searchstring.
              %M  -  is  replaced with MIME type of document. When you are using this macro, you *must* use also
              -post_update option else it won't work.
              %E - is replaced with default extension assigned to MIME type of document. When you are using this
              macro, you *must* use also -post_update option else it won't work.
              %x - where x is positive number. This macro is replaced with x-th directory from path to  document
              from beginning.
              %-x - where x is positive number. This macro is replaced with x-th directory from path to document
              from end.

              Here  is  example. If you want place document into single directories by extension, you should use
              following fnrules option:
              -fnrules F '*' '/%e/%n'

              Extended rule ever begins with character ยด('. It uses some kind of LISP like syntax.

              Here are base rules for writing extended rules : - the local filename of of this  kind  is  return
              value function
              - each function is enclosed inside round braces ()
              - first token right after opening brace is function name
              - each function have nonzero fixed number of parameters
              - each function returns numeric or string value
              - function parameters are separated by any number of space characters
              - parameter of function should be string, number, macro or other function
              - string is ever quoted with "
              -  each  numeric  parameter can be in any encoding supported by strtod() function (octal, decimal,
              hexadecimal, ...)
              - there is no implicit conversion from number to string
              - each macro is prefixed by % character and is one character long
              - each macro is replaced by its string representation from current URL
              - function parameters are typed strictly
              - toplevel function must return string value

              Extended rule supports full set of  %  escaped  macros  supported  with  simple  rules,  plus  two
              following addition macros :
              %U - URL string
              %o - default localname for URL

              Here is description of all supported functions

              sc - concat two string parameters
                 - accepts two string parameters
                 - returns string value
              ss - substring form string
                 - accepts three parameters.
                   - first is string from which we want to cut subpart
                   - second is number which represents starting position in string
                   - third is number which represents ending position in string
                 - returns string value
              hsh - compute modulo hash value from string with specified base
                 - accepts two parameters
                   - first is string for which we are computing the hash value
                   - second is numeric value for base of modulo hash
                 - returns numeric value
              md5 - compute MD5 checksum for string
                 - accepts one string value
                 - returns string which represents MD5 checksum
              lo - convert all characters inside string to lower case
                 - accepts ane string value
                 - returns string value
              up - convert all characters inside string to upper case
                 - accepts one string value
                 - returns string value
              ue  -  encode  unsafe  characters  in  string with same encoding which is used for encoding unsafe
              characters inside URL (%xx) As default are encoded all nonascii values when this function is used.
                 - accepts two string values
                   - first is string which we want to encode
                   - second is string which contains unsafe characters
                 - return string value
              dc - delete unwanted characters from string (have similar functionality as -tr_del_chr option)
                 - accepts two string values
                   - first is string from which we want delete
                   - second is string which contains characters we want to delete.
                 - returns string value
              tc - replace character with other character in string (have similar functionality  as  -tr_chr_chr
              option)
                 - accepts three string values
                   - first is string inside which we want to replace characters
                   - second is set of characters which we want to replace
                   - third is set of characters with which we are replacing
                 - returns string value
              ts  -  replace  some  string  inside  string  with any other string (have similar functionality as
              -tr_str_str option)
                 - accepts three string values
                   - first is string inside which we want to replace string
                   - second is the from string
                   - third is to string
                 - returns string value
              spn - calculate initial length of string which contains only specified set of  characters.   (have
              same functionality as strspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of acceptable characters
                 - returns numeric value
              cspn  -  calculate  initial  length  of  string which doesn't contain specified set of characters.
              (have same functionality as strcspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of unacceptable characters
                 - returns numeric value
              sl - calculate length of string
                 - accepts one string value
                 - returns numeric value
              ns - convert number to string by format
                 - accepts two parameters
                   - first parameter is format string same as for printf() function
                   - second is number which we want to convert
                 - returns string value
              lc - return position of last occurrence of specified character inside string
                 - accepts two string parameters
                   - first string which we are searching in
                   - second string contains character for which we are looking for
                 - returns numeric value
              + - add two numeric values
                 - accepts two numeric values
                 - returns numeric value
              - - subtract two numeric values
                 - accepts two numeric values
                 - returns numeric value
              % - modulo addition
                 - accepts two numeric values
                 - returns numeric value
              * - multiple two numeric values
                 - accepts two numeric values
                 - returns numeric value
              / - divide two numeric values
                 - accepts two numeric values
                 - returns numeric value
              rmpar - remove parameter from query string
                - accepts two string
                  - first string is string which we are adjusting
                  - second parameter is name of parameter which should be removed
                - returns adjusted string
              getval - get query string parameter value
                - accepts two string
                  - first string is query string from which to get the parameter
                    value (usually %s)
                  - second string is name of parameter for which we want to get
                    the value
                - returns value of the parameter or empty string when the parameter
                  doesn't exists
              sif - logical decision
                - accepts three parameters
                  - first is numeric and when is zero than result of this decision
                    is result of second parameter, else result is result of third
                    parameter
                  - second parameter is string
                  - third parameter is string
                - returns string result of decision
              ! - logical not
                - accepts one numeric parameter
                - returns negation of parameter
              & - logical and
                - accept two numeric parameters
                - returns logical and of parameters
              | - logical or
                - accept two numeric parameters
                - returns logical or of parameters
              getext - get file extension
                - accept one sting (filename or path)
                - return string containing extension of parameter
              seq - compare two strings
                - accepts two strings for comparison
                - returns numeric value 0 - if different 1 - if equal
              jsf - execute JavaScript function
                - accepts one string parameter which holds name of
                  JavaScript function specified in script loaded with
                  -js_script_file option.
                - returns string value equal to return value of
                  JavaScript function
                - this function is available only when pavuk is compiled
                  with support for JavaScript bindings

              For example, if you are mirroring very huge number of internet sites into  same  local  directory,
              too  much entries in one directory, should cause performance problems. You may use for example hsh
              or md5 functions to generate one additional level of hash directories based on hostname  whit  one
              of following options :

              -fnrules F '*' '(sc (nc "%02d/" (hsh %h 100)) %o)'
              -fnrules F '*' '(sc (ss (md5 %h) 0 2) %o)'

       -base_level $nr
              Number of directory levels to omit in local tree.

              For  example  when  downloading  URL ftp://ftp.idata.sk/pub/unix/www/pavuk-0.7pl1.tgz you enter at
              command  line  -base_level  4  in  local   tree   will   be   created   www/pavuk-0.7pl1.tgz   not
              ftp/ftp.idata.sk_21/pub/unix/www/pavuk-0.7pl1.tgz as normally.

       -default_prefix $str
              Default  prefix of mirrored directory. This option is used only when you are trying to synchronize
              content of remote directory which was downloaded using  -base_level  option.  Also  you  must  use
              directory  based  synchronization method, not URL based synchronization method. This is especially
              useful, when used in conjunction with -remove_old option.

       -remove_adv/-noremove_adv
              This option is used for turn on/off of removing HTML tags which  contains  advertisement  banners.
              The  banners  are  not  removed from HTML file, but are commented out.  Such URLs also will not be
              downloaded.  This option have effect only when used with option -adv_re.  Default is  turned  off.
              This  option  is  available  only  when  your  system  have  support  for one of supported regular
              expressions implementation.

       -adv_re $RE
              This option is used to specify regular expressions for matching  URLs  of  advertisement  banners.
              For  example  :  -adv_re  http://ad.doubleclick.net/.*   is  used  to  match all files from server
              ad.doubleclick.net.  This option is available only when your system  have  any  supported  regular
              expressions implementation.

       -unique_name/-nounique_name
              Pavuk  as default always attempts to assign to unique URL unique local filename.  If this behavior
              is not wanted, you can use option -nounique_name to disable this.

Other Options

       -sleep $nr
              This option allows you to specify number of seconds during that  the  program  will  be  suspended
              between two transfers. Useful to deny server overload.  Default value for this option is 0.

       -rsleep/-norsleep
              When  this  option  is  active,  pavuk randomizes the the sleep time between transfers in interval
              between zero and value specified with -sleep option. Default is this option inactive.

       -ddays $nr
              If document has modification time later as $nr days, then in sync mode pavuk attempts to  retrieve
              newer copy of document from remote server. Default value is 0.

       -remove_old/-noremove_old
              Remove  improper  documents  (that,  which doesn't exist on remote site).  This option have effect
              only when used in directory based sync mode.  When used with URL based sync mode, pavuk  will  not
              remove  any  old  files  which were excluded from document tree and are not referenced in any HTML
              document.  You must also use option -subdir, to let pavuk find  files  which  belongs  to  current
              mirror.  As default pavuk won't remove any old files.

       -browser $str
              is  used  to  set  your browser command (in URL tree dialog you can use right click to raise menu,
              from which you can start browser on actually selected URL).  This option is  available  only  when
              compiled with GTK GUI and with support for URL tree preview.

       -debug/-nodebug
              turns  on  displaying of debug messages. This option is available only when compiled with -DDEBUG.
              If -debug option is used pavuk will output verbose information  about  documents,  whole  protocol
              level  information, locking informations and more (depends on -debug_level setup). This options is
              used just like trigger to enable  output  of  debug  messages  selected  by  -debug_level  option.
              Default is debug mode turned off.

       -debug_level $level
              Set  level of required debug informations. $level can be numeric value which represent binary mask
              for requested debug levels, or comma separated list of supported debug  levels.   Currently  pavuk
              supports following debug levels :
              html - for HTML parser debugging
              protos - to see server side protocol messages
              protoc - to see client side protocol messages
              procs - to see some special procedure calls
              locks - for debugging of documents locking
              net - for debugging some low level network stuff
              misc - for miscellaneous unsorted debug messages
              user - for verbose user level messages
              all - request all currently supported debug levels
              mtlock - locking of resources in multithreading environment
              mtthr - launching/weaking/sleeping/stoping of threads in multithreaded environment
              protod - for DEBUGGING of POST requests
              limits  - for debugging limiting options, you will see the reason why particular URLs are rejected
              by pavuk and which option caused this.
              ssl - to enable verbose reporting about SSL related things.

       -remind_cmd $str
              This option have effect only when running pavuk in reminder mode. To command specified  with  this
              option  pavuk  sends result of running reminder mode.  There are listed URLs which are changed and
              URLs which have any errors.  Default remind command is  "mailx  user@server  -s  \"pavuk  reminder
              result\"" .

       -nscache_dir $dir
              Path  to Netscape browser cache directory. If you specify this path, pavuk attempts to find out if
              you have URL in this cache.  If URL is there it will be fetched else pavuk will download  it  from
              net.  The  cache  directory  index  file  must  be named index.db and must be located in the cache
              directory.  To support this feature, pavuk have to be linked with BerkeleyDB 1.8x .

       -mozcache_dir $dir
              Path to Mozilla browser cache directory. Same functionality as  with  previous  option,  just  for
              different  browser  with  different cache formats.  Pavuk supports both formats of Mozilla browser
              disk cache (old for versions <0.9 and new used in 0.9=<).  The old  format  cache  directory  must
              contain  cache  directory  index database with name cache.db. Then new format cache directory must
              contain map file _CACHE_MAP_, and three block files  _CACHE_001_,  _CACHE_002_,  _CACHE_003_.   To
              support  old Mozilla cache format, pavuk have to be linked with BerkeleyDB 1.8x. New Mozilla cache
              format doesn't require any external library.

       -post_cmd $str
              Post-processing command, which will be executed  after  successful  download  of  document.   This
              command may somehow handle with document. During time of running this command, pavuk leaves actual
              document  locked,  so there isn't chance that some other pavuk process will modify document.  This
              postprocessing command will get three additional parameters from pavuk.
                 - local name of document
                 - 1/0 1 if document is HTML document, 0 if not
                 - original URL of this document

       -hack_add_index/-nohack_add_index
              This is bit hacky option. It forces pavuk to add to URL queue also directory indexes of all queued
              documents. This allow pavuk to download more documents from site,  than  it  is  able  achieve  in
              normal traversing of HTML documents.  Bit dirty but useful in some cases.

       -js_script_file $file
              Pavuk  have  optionally  builtin  JavaScript interpreter to allow high level customization of some
              internal procedures. Currently you are allowed to customize with your own JavaScript functions two
              things. You can use it to set precise limiting options, or you can write own functions  which  can
              be  used  inside  rules  of -fnrules option.  With this option you can load JavaScript script with
              functions into pavuks internal JavaScript interpreter. To learn more about this capabilities  read
              separate document jsbind.txt which comes with pavuk sources in toplevel directory.  This option is
              available only when you have compiled pavuk with support for JavaScript bindings.

EXIT STATUS

       As  of  version  0.9pl29 pavuk have changed indication of status by exit codes.  In earlier versions exit
       status 0 was for no error and nonzero exit status was something like count of failed documents.   In  all
       version after 0.0pl29 there are defined following exit codes:

           0 - no error, everything is OK
           1 - error in configuration of pavuk options or
               error in config files
           2 - some error occurred while downloading documents

ENVIRONMENTAL VARIABLES

       USER   variable is used to construct email address from user and hostname

       LC_* or LANG
              used to set internationalized environment

       PAVUKRC_FILE
              with this variable you can specify alternative location for your pavukrc configuration file.

REQUIRED EXTERNAL PROGRAMS

       at     is used for scheduling.

       gunzip is used to decode gzip or compress encoded documents.

Bugs

       If you find any, please let me know.

FILES

       @SYSCONFDIR@/pavukrc

       ~/.pavukrc

       ~/.pavuk_prefs

              These  files  are used as default configuration files.  You may specify there some constant values
              like your proxy server or your preferred WWW browser. Configuration options reflect  command  line
              options.   Not  all  parameters  are  suitable  for use in default configuration file.  You should
              select only some of them, which you really need.

              File ~/.pavuk_prefs is special file which contains automatically stored configuration.  This  file
              is used only when running GUI interface of pavuk and option -prefs is active.

              First  (if  present)  parsed  file  is  @SYSCONFDIR@/pavukrc  then  ~/.pavukrc  (if present), then
              ~/.pavuk_prefs (if present).  Last the command line is parsed. The precedence is as follows :

              - highest -
              Entered in user interface
              Entered in command line
              ~/.pavuk_prefs
              ~/.pavukrc
              @SYSCONFDIR@/pavukrc
              - lowest -

              Here is table of config file - command line options pairs.

              MaxLevel:                  --->  -lmax
              MaxDocs:                   --->  -dmax
              MaxSize:                   --->  -maxsize
              MinSize:                   --->  -minsize
              SleepBetween:              --->  -sleep
              MaxRetry:                  --->  -retry
              MaxRegets:                 --->  -nregets
              MaxRedirections:           --->  -nredirs
              CommTimeout:               --->  -timeout
              RegetRollbackAmount:       --->  -rollback
              DocExpiration:             --->  -ddays
              UseCache:                  --->  -nocache
              UseRobots:                 --->  -noRobots
              AllowFTP:                  --->  -noFTP
              AllowHTTP:                 --->  -noHTTP
              AllowSSL:                  --->  -noSSL
              AllowGopher:               --->  -noGopher
              AllowCGI:                  --->  -noCGI
              AllowGZEncoding:           --->  -noEnc
              AllowFTPRecursion:         --->  -FTPdir
              ForceReget:                --->  -force_reget
              Debug:                     --->  -debug
              AllowedSites:              --->  -asite
              DisallowedSites:           --->  -dsite
              AllowedDomains:            --->  -adomain
              DisallowedDomains:         --->  -ddomain
              AllowedPrefixes:           --->  -aprefix
              DisallowedPrefixes:        --->  -dprefix
              AllowedSuffixes:           --->  -asfx
              DisallowedSuffixes:        --->  -dsfx
              AllowedMIMETypes:          --->  -amimet
              DisallowedMIMETypes:       --->  -dmimet
              PreferredLanguages:        --->  -alang
              PreferredCharset:          --->  -acharset
              WorkingDir:                --->  -cdir
              WorkingSubDir:             --->  -subdir
              HTTPAuthorizationScheme:   --->  -auth_scheme
              HTTPAuthorizationName:     --->  -auth_name
              HTTPAuthorizationPassword: --->  -auth_passwd
              AuthReuseDigestNonce:      --->  -auth_reuse_nonce
              SSLCertPassword:           --->  -ssl_cert_passwd
              SSLCertFile:               --->  -ssl_cert_file
              SSLKeyFile:                --->  -ssl_key_file
              EmailAddress:              --->  -from
              MatchPattern:              --->  -pattern
              REMatchPattern:            --->  -rpattern
              SkipMatchPattern:          --->  -skip_pattern
              SkipREMatchPattern:        --->  -skip_rpattern
              URLMatchPattern:           --->  -url_pattern
              URLREMatchPattern:         --->  -url_rpattern
              SkipURLMatchPattern:       --->  -skip_url_pattern
              SkipURLREMatchPattern:     --->  -skip_url_rpattern
              DefaultMode:               --->  -mode
              FTPProxy:                  --->  -ftp_proxy
              HTTPProxy:                 --->  -http_proxy
              SSLProxy:                  --->  -ssl_proxy
              GopherProxy:               --->  -gopher_proxy
              FTPViaHTTPProxy:           --->  -ftp_httpgw
              GopherViaHTTPProxy:        --->  -gopher_httpgw
              HTTPProxyUser:             --->  -http_proxy_user
              HTTPProxyPass:             --->  -http_proxy_pass
              HTTPProxyAuth:             --->  -http_proxy_auth
              AuthReuseProxyDigestNonce: --->  -auth_reuse_proxy_nonce
              Browser:                   --->  -browser
              ScenarioDir:               --->  -scndir
              ShowProgress:              --->  -progress
              XMaxLogSize:               --->  -xmaxlog
              LogFile:                   --->  -logfile
              RemoveOldDocuments:        --->  -remove_old
              AuthFile:                  --->  -auth_file
              BaseLevel:                 --->  -base_level
              FTPDirtyProxy:             --->  -ftp_dirtyproxy
              ActiveFTPData:             --->  -ftp_active/-ftp_passive
              ActiveFTPPortRange:        --->  -active_ftp_port_range
              AlwaysMDTM:                --->  -always_mdtm/-noalways_mdtm
              RemoveBeforeStore:         --->  -(no)remove_before_store
              ShowDownloadTime:          --->  -stime
              NLSMessageCatalogDir:      --->  -msgcat
              Quiet:                     --->  -quiet/-verbose
              NewerThan:                 --->  -newer_than
              OlderThan:                 --->  -older_than
              Reschedule:                --->  -reschedule
              DontLeaveSite:             --->  -dont_leave_site/-leave_site
              DontLeaveDir:              --->  -dont_leave_dir/-leave_dir
              PreserveTime:              --->  -preserve_time/-nopreserve_time
              LeaveLevel:                --->  -leave_level
              GUIFont:                   --->  -gui_font
              UserCondition:             --->  -user_condition
              CookieFile:                --->  -cookie_file
              CookieSend:                --->  -cookie_send/-nocookie_send
              CookieRecv:                --->  -cookie_recv/-nocookie_recv
              CookieUpdate:              --->  -cookie_update/-nocookie_update
              CookiesMax:                --->  -cookies_max
              CookieCheckDomain:         --->  -cookie_check/-nocookie_check
              DisabledCookieDomains:     --->  -disabled_cookie_domains
              DisableHTMLTag:            --->  -disable_html_tag
              EnableHTMLTag:             --->  -enable_html_tag
              TrDeleteChar:              --->  -tr_del_chr
              TrStrToStr:                --->  -tr_str_str
              TrChrToChr:                --->  -tr_chr_chr
              IndexName:                 --->  -index_name
              StoreName:                 --->  -store_name
              PreservePermisions:        --->  -preserve_perm/-nopreserve_perm
              PreserveAbsoluteSymlinks:  --->  -preserve_slinks/-nopreserve_slinks
              FTPListCMD:                --->  -FTPlist/-noFTPlist
              MaxRate:                   --->  -maxrate
              MinRate:                   --->  -minrate
              ReadBufferSize:            --->  -bufsize
              BgMode:                    --->  -bg/-nobg
              CheckSize:                 --->  -check_size/-nocheck_size
              SLogFile:                  --->  -slogfile
              Identity:                  --->  -identity
              SendFromHeader:            --->  -send_from/-nosend_from
              RunX:                      --->  -runX
              FnameRules:                --->  -fnrules
              StoreDocInfoFiles:         --->  -store_info/-nostore_info
              AllLinksToLocal:           --->  -all_to_local/-noall_to_local
              AllLinksToRemote:          --->  -all_to_remote/-noall_to_remote
              SelectedLinksToLocal:      --->  -sel_to_local/-nosel_to_local
              ReminderCMD:               --->  -remind_cmd
              AutoReferer:               --->  -auto_referer/-noauto_referer
              URLsFile:                  --->  -urls_file
              UsePreferences:            --->  -prefs/-noprefs
              FTPhtml:                   --->  -FTPhtml/-noFTPhtml
              StoreDirIndexFile:         --->  -store_index/-nostore_index
              Language:                  --->  -language
              FileSizeQuota:             --->  -file_quota
              TransferQuota:             --->  -trans_quota
              FSQuota:                   --->  -fs_quota
              EnableJS:                  --->  -enable_js/-disable_js
              UrlSchedulingStrategy:     --->  -url_strategy
              NetscapeCacheDir:          --->  -nscache_dir
              RemoveAdvertisement:       --->  -remove_adv/-noremove_adv
              AdvBannerRE:               --->  -adv_re
              CheckIfRunnigAtBackground: --->  -check_bg/-nocheck_bg
              SendIfRange:               --->  -send_if_range/-nosend_if_range
              SchedulingCommand:         --->  -sched_cmd
              UniqueLogName:             --->  -unique_log/-nounique_log
              PostCommand:               --->  -post_cmd
              SSLVersion:                --->  -ssl_version
              UniqueSSLID:               --->  -unique_sslid/-nounique_sslid
              AddHTTPHeader:             --->  -httpad
              StatisticsFile:            --->  -statfile
              WaitOnExit:                --->  -ewait
              AllowedIPAdrressPattern:   --->  -aip_pattern
              DisallowedIPAdrressPattern:--->  -dip_pattern
              SiteLevel:                 --->  -site_level
              UseHTTP11:                 --->  -use_http11
              MaxRunTime:                --->  -max_time
              LocalIP:                   --->  -local_ip
              RequestInfo:               --->  -request
              HashSize:                  --->  -hash_size
              NumberOfThreads:           --->  -nthreads
              ImmediateMessages:         --->  -immesg/-noimmsg
              HTMLFormData:              --->  -formdata
              DumpFD:                    --->  -dumpfd
              DumpUrlFD:                 --->  -dump_urlfd
              DeleteAfterTransfer:       --->  -del_after/-nodel_after
              UniqueDocName:             --->  -unique_name/-nounique_name
              LeaveSiteEnterDirectory:   --->  -leave_site_enter_dir/-dont_leave_site_enter_dir
              SinglePage:                --->  -singlepage/-nosinglepage
              NTLMAuthorizationDomain:   --->  -auth_ntlm_domain
              NTLMProxyAuthorizationDomain:
                                         --->  -auth_proxy_ntlm_domain
              JavascriptPattern:         --->  -js_pattern
              FollowCommand:             --->  -follow_cmd
              RetrieveSymlinks:          --->  -retrieve_symlink/-noretrieve_symlink
              JSTransform:               --->  -js_transform
              JSTransform2:              --->  -js_transform2
              FTPProxyUser:              --->  -ftp_proxy_user
              FTPProxyPassword:          --->  -ftp_proxy_pass
              LimitInlineObjects:        --->  -limit_inlines/-dont_limit_inlines
              FTPListOptions:            --->  -ftp_list_options
              FixWuFTPDBrokenLISTcmd:    --->  -fix_wuftpd_list/-nofix_wuftpd_list
              PostUpdate:                --->  -post_update/-nopost_update
              SeparateInfoDir:           --->  -info_dir
              MozillaCacheDir:           --->  -mozcache_dir
              AllowedPorts:              --->  -aport
              DisallowedPorts:           --->  -dport
              HackAddIndex:              --->  -hack_add_index/-nohack_add_index
              JavaScriptFile:            --->  -js_script_file
              FtpLoginHandshake:         --->  -ftp_login_handshake
              NSSCertDir:                --->  -nss_cert_dir
              NSSAcceptUnknownCert:      --->  -nss_accept_unknown_cert/-nonss_accept_unknown_cert
              NSSDomesticPolicy:         --->  -nss_domestic_policy/-nss_export_policy
              DontTouchUrlREPattern:     --->  -dont_touch_url_rpattern
              DontTouchUrlPattern:       --->  -dont_touch_url_pattern
              DontTouchTagREPattern:     --->  -dont_touch_tag_rpattern
              HTMLTagPattern:            --->  -tag_pattern
              HTMLTagREPattern:          --->  -tag_rpattern
              URL:                       --->  one URL (more lines with URL:
                                               ... means more URL's)

       line which begins with '#' means comment.
       TrStrToStr: and TrChrToChr: must contain two quoted strings.  All parameter names are  case  insensitive.
       If here is missing any option, try to look inside config.c source file.

       See pavukrc.sample file for example

       .pavuk_authinfo

              File  should  contain  as many authentification records as you need.  Records are separated by any
              number of empty lines.  Parameter name is case insensitive.

              Structure of record:

              Proto: <proto ID>    ---> identification of protocol
                                        (ftp/http/https/..)
                                   - required field
              Host: <host:[port]>  ---> host name
                                   - required field
              User: <user>         ---> name of user
                                   - optional
              Pass: <password>     ---> password for user
                                   - optional
              Base: <path>         ---> base prefix of document path
                                   - optional
              Realm: <name>        ---> realm for HTTP authorization
                                   - optional
              NTLMDomain: <domain> ---> NT/LM domain for NTLM authorization
                                   - optional
              Type: <type>         ---> HTTP authentification scheme
                                             - 1/user   - user auth scheme
                                             - 2/Basic  - Basic auth scheme (default)
                                             - 3/Digest - Digest auth scheme
                                             - 4/NTLM   - NTLM auth scheme
                                   - optional

       see pavuk_authinfo.sample file for example

       ~/.pavuk_keys
              this is file where are stored information about  configurable  menu  option  shortcuts.   This  is
              available only when compiled with Gtk+1.2 and higher.

       ~/.pavuk_remind_db
              this file contains informations about URLs for running in reminder mode. Structure of this file is
              very  easy.  Each  line  contains  information  abou  one  URL.  first entry in line is last known
              modification time of URL (stored in time_t format - number of  secons  from  1.1.1970  GMT).   And
              second entry is URL.

EXAMPLE COMMAND LINE

       pavuk -mode mirror -nobg -store_info -info_dir
       /mirror/info -nthreads 1 -cdir /mirror/incoming -subdir
       /mirror/incoming -preserve_time -nopreserve_perm
       -nopreserve_slinks -noretrieve_symlink -force_reget
       -noRobots -trans_quota 16384 -maxsize 16777216
       -max_time 28 -nodel_after -remove_before_store -ftpdir
       -ftplist -ftp_list_options -a -dont_leave_site
       -dont_leave_dir -all_to_local -remove_old -nostore_index
       -active_ftp_port_range 57344:65535 -always_mdtm
       -ftp_passive -base_level 2 http://<my_host>/doc/

SEE ALSO

       Look into ChangeLog file for more informations about new features in particular versions of pavuk.

AUTHOR

       Main development Ondrejicka Stefan
       Look into CREDITS file of sources for additional information.

AVAILABILITY

       pavuk is available from http://pavuk.sourceforge.net/

0.9.35                                          2015-12-01T17:14                                        pavuk(1)