xenial (1) pavuk.1.gz

Provided by: pavuk_0.9.35-4_amd64 bug

NAME

       pavuk - HTTP, HTTP over SSL, FTP, FTP over SSL and Gopher recursive document retrieval program

SYNOPSIS

       pavuk  [-mode  {normal  |  resumeregets | singlepage | singlereget | sync | dontstore | ftpdir | mirror}]
       [-X] [-runX] [-bg/-nobg] [prefs/-noprefs] [-h] [-v] [-progress/-noprogress]  [-stime/-nostime]  [-xmaxlog
       $nr]  [-logfile  $file]  [-slogfile  $file] [-auth_file $file] [-msgcat $dir] [-language $str] [-gui_font
       $font] [-quiet/-verbose [-read_css/-noread_css] [-cdir $dir] [-scndir $dir]  [-scenario  $str]  [-dumpscn
       $filename]  [-lmax  $nr]  [-dmax  $nr]  [-leave_level  $nr]  [-maxsize $nr] [-minsize $nr] [-asite $list]
       [-dsite $list] [-adomain $list] [-ddomain $list] [-asfx $list] [-dsfx $list] [-aprefix  $list]  [-dprefix
       $list]  [-amimt  $list]  [-dmimet  $list] [-pattern $pattern] [-url_pattern $pattern] [-rpattern $regexp]
       [-url_rpattern $regexp] [-skip_pattern $pattern] [-skip_url_pattern  $pattern]  [-skip_rpattern  $regexp]
       [-skip_url_rpattern  $regexp] [-newer_than $time] [-older_than $time] [-schedule $time] [-reschedule $nr]
       [-dont_leave_site/-leave_site]  [-dont_leave_dir/-leave_dir]  [-http_proxy   $site[:$port]]   [-ftp_proxy
       $site[:$port]]   [-ssl_proxy  $site[:$port]]  [-gopher_proxy  $site[:$port]]  [-ftp_httpgw/-noftp_httpgw]
       [-ftp_dirtyproxy/-noftp_dirtyproxy]   [-gopher_httpgw/-nogopher_httpgw]   [-noFTP/-FTP]   [-noHTTP/-HTTP]
       [-noSSL/-SSL]     [-noGopher/-Gopher]     [-FTPdir/-noFTPdir]     [-noCGI/-CGI]     [-FTPlist/-noFTPlist]
       [-FTPhtml/-noFTPhtml]     [-noRelocate/-Relocate]     [-force_reget/-noforce_reget]     [-nocache/-cache]
       [-check_size/-nocheck_size]  [-noRobots/-Robots]  [-noEnc/-Enc]  [-auth_name  $user] [-auth_passwd $pass]
       [-auth_scheme 1/2/3/4/user/Basic/Digest/NTLM] [-auth_reuse_nonce/-no_auth_reuse_nonce]  [-http_proxy_user
       $user]       [-http_proxy_pass       $pass]       [-http_proxy_auth       1/2/3/4/user/Basic/Digest/NTLM]
       [-auth_reuse_proxy_nonce/-no_auth_reuse_proxy_nonce]   [-ssl_key_file   $file]   [-ssl_cert_file   $file]
       [-ssl_cert_passwd     $pass]     [-from     $email]     [-send_from/-nosend_from]     [-identity    $str]
       [-auto_referer/-noauto_referer] [-referer/-noreferer]  [-alang  $list]  [-acharset  $list]  [-retry  $nr]
       [-nregets     $nr]     [-nredirs     $nr]     [-rollback     $nr]    [-sleep    $nr]    [-timeout    $nr]
       [-preserve_time/-nopreserve_time] [-preserve_perm/-nopreserve_perm] [-preserve_slinks/-nopreserve_slinks]
       [-bufsize   $nr]   [-maxrate   $nr]   [-minrate   $nr]   [-user_condition   $str]   [-cookie_file  $file]
       [-cookie_send/-nocookie_send]       [-cookie_recv/-nocookie_recv]       [-cookie_update/-nocookie_update]
       [-cookies_max    $nr]    [-disabled_cookie_domains    $list]   [-disable_html_tag   $TAG,[$ATTRIB][;...]]
       [-enable_html_tag  $TAG,[$ATTRIB][;...]]   [-tr_del_chr  $str]  [-tr_str_str  $str1  $str2]  [-tr_chr_chr
       $chrset1  $chrset2] [-index_name $str] [-store_index/-nostore_index] [-store_name $str] [-debug/-nodebug]
       [-debug_level $level] [-browser $str] [-urls_file $file] [-file_quota $nr] [-trans_quota $nr]  [-fs_quota
       $nr]      [-enable_js/-disable_js]      [-fnrules      $t      $m     $r]     [-store_info/-nostore_info]
       [-all_to_local/-noall_to_local]     [-sel_to_local/-nosel_to_local]     [-all_to_remote/-noall_to_remote]
       [-url_strategie    $strategie]    [-remove_adv/-noremove_adv]   [-adv_re   $RE]   [-check_bg/-nocheck_bg]
       [-send_if_range/-nosend_if_range]  [-sched_cmd   $str]   [-unique_log/-nounique_log]   [-post_cmd   $str]
       [-ssl_version    $v]    [-unique_sslid/-nounique_sslid]    [-aip_pattern    $re]    [-dip_pattern    $re]
       [-use_http11/-nouse_http11] [-local_ip $addr] [-request $req] [-formdata $req] [-httpad $str]  [-nthreads
       $nr]     [-immesg/-noimmesg]    [-dumpfd    $nr]    [-dump_urlfd    $nr]    [-unique_name/-nounique_name]
       [-leave_site_enter_dir/-dont_leave_site_enter_dir]     [-max_time     $nr]      [-del_after/-nodel_after]
       [-singlepage/-nosinglepage]         [-dump_after/-nodump_after]         [-dump_response/-nodump_response]
       [-auth_ntlm_domain  $str]   [-auth_proxy_ntlm_domain   $str]   [-js_pattern   $re]   [-follow_cmd   $str]
       [-retrieve_symlink/-noretrieve_symlink]  [-js_transform  $p  $t  $h  $a]  [-js_transform2  $p  $t  $h $a]
       [-ftp_proxy_user $str]  [-ftp_proxy_pass  $str]  [-limit_inlines/-dont_limit_inlines]  [-ftp_list_options
       $str] [-fix_wuftpd_list/-nofix_wuftpd_list] [-post_update/-nopost_update] [-info_dir $dir] [-mozcache_dir
       $dir]  [-aport  $list]  [-dport   $list]   [-hack_add_index/-nohack_add_index]   [-default_prefix   $str]
       [-rsleep/-norsleep]      [-ftp_login_handshake      $host     $handshake]     [-js_script_file     $file]
       [-dont_touch_url_pattern   $pat]   [-dont_touch_url_rpattern   $pat]   [-dont_touch_tag_rpattern    $pat]
       [-tag_pattern   $tag   $attrib   $url]   [-tag_rpattern   $tag   $attrib   $url]   [-nss_cert_dir   $dir]
       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]           [-nss_domestic_policy/-nss_export_policy]
       [-[no]verify]   [-tlogfile  $file]  [-trelative  {object  |  program}]  [-transparent_proxy  FQDN[:port]]
       [-transparent_ssl_proxy FQDN[:port]] [-sdemo] [-noencode] [URLs]

       pavuk -mode {normal | singlepage | singlereget} [-base_level $nr]

       pavuk -mode sync [-ddays $nr] [-subdir $dir] [-remove_old/-noremove_old]

       pavuk -mode resumeregets [-subdir $dir]

       pavuk -mode linkupdate [-X] [-h] [-v] [-cdir $dir] [-subdir $dir] [-scndir $dir] [-scenario $str]

       pavuk -mode reminder [-remind_cmd $str]

       pavuk         -mode          mirror          [-subdir          $dir]          [-remove_old/-noremove_old]
       [-remove_before_store/-noremove_before_store] [-always_mdtm/-noalways_mdtm]

DESCRIPTION

       This  manual  page  describes how to use pavuk. Pavuk can be used to mirror contents of internet/intranet
       servers and to maintain copies in a local tree of documents.  Pavuk stores retrieved documents in locally
       mapped  disk  space.  The  structure  of the local tree is the same as the one on the remote server. Each
       supported service (protocol) has its own subdirectory in the local tree.  Each referenced server has  its
       own  subdirectory  in  these  protocols  subdirectories; followed by the port number on which the service
       resides, delimited by character can be be changed. With the option -fnrules you can  change  the  default
       layout of the local document tree, without losing link consistency.
       With pavuk it is possible to have up-to-date copies of remote documents in the local disk space.
       As  of version 0.3pl2, pavuk can automatically restart broken connections, and reget partial content from
       an FTP server (which must support the REST command), from a properly configured HTTP/1.1 server, or  from
       a HTTP/1.0 server which supports Ranges.
       As  of  version  0.6  it  is  possible to handle configurations via so called scenarios.  The best way to
       create such a configuration file  is  to  use  the  X  Window  interface  and  simply  save  the  created
       configuration. The other way is to use the -dumpscn switch.
       As  of  version  0.7pl1 it is possible to store authentification information into an authinfo file, which
       pavuk can then parse and use.
       As of version 0.8pl4 pavuk can fetch documents for use in a local proxy/cache server without storing them
       to local documents tree.
       As of version 0.9pl4 pavuk supports SOCKS (4/5) proxies if you have the required libraries.
       As  of  version  0.9pl12  pavuk can preserve permissions of remote files and symbolic links, so it can be
       used for powerful FTP mirroring.
       Pavuk supports SSL connections to FTP servers, if you specify ftps:// URL instead of ftp://.
       Pavuk can automatically handle file names with unsafe characters for filesystem.  This is yet implemented
       only for Win32 platform and it is hard coded.
       Pavuk  can  now  use  HTTP/1.1  protocol  for  communication  with  HTTP  servers.  It can use persistent
       connections, so one TCP connection should be used to transfer several documents without closing it.  This
       feature saves network bandwidth and also speedup network communication.
       Pavuk  can  do  configurable  POST requests to HTTP servers and support also file uploading via HTTP POST
       request.
       Pavuk can automatically fill found HTML forms, if user will supply data for its fields before with option
       -formdata.
       Pavuk  can  run  configurable  number  of  concurrently  running  downloading  threads when compiled with
       multithreading support.

Format of supported URLs

       HTTP
       http://[[user][:password]@]host[:port][/document]
       [[user][:password]@]host[:port][/document]

       HTTPS
       https://[[user][:password]@]host[:port][/document]
       ssl[.domain][:port][/document]

       FTP
       ftp://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftp://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftp[.domain][:port][/document][;type=x]

       FTPS
       ftps://[[user][:password]@]host[:port][/relative_path][;type=x]
       ftps://[[user][:password]@]host[:port][//absolute_path][;type=x]
       ftps[.domain][:port][/document][;type=x]

       Gopher
       gopher://host[:port][/type[document]]
       gopher[.domain][:port][/type[document]]

Default mapping of URLs to local filenames

       HTTP
       http://[[user][:password]@]host[:port][/document][?query]
       to
       http/host_port/[document][?query]

       HTTPS
       https://[[user][:password]@]host[:port][/document][?query]
       to
       https/host_port/[document][?query]

       FTP
       ftp://[[user][:password]@]host[:port][/path]
       to
       ftp/host_port/[path]

       FTPS
       ftps://[[user][:password]@]host[:port][/path]
       to
       ftps/host_port/[path]

       Gopher
       gopher://host[:port][/type[document]]
       to
       gopher/host_port/[type[document]]

       NOTE: Pavuk will use the string with which it queries the target server as the name of the results  file.
       This  file  name may, in some cases, contain punctuations such as $,?,=,& etc. Such punctuation can cause
       problems when you are trying to browse downloaded files with your browser or you are  trying  to  process
       downloaded files with shell scripts or view files with file management utilities which reference the name
       of the results file.  If you believe that this maybe causing problems for you, then you  can  remove  all
       punctuation  from  the  result file name with the option: -tr_del_chr [:punct:] or with other options for
       adjusting filenames.

OPTIONS

        All options are case insensitive.

List of options chapters

       Mode
       Help
       Indicate/Logging/Interface options
       Netli options
       Special start
       Scenario/Task options
       Directory options
       Preserve options
       Proxy options
       Proxy Authentification
       Protocol/Download Option
       Authentification
       Site/Domain/Port Limitation Options
       Limitation Document properties
       Limitation Document name
       Limitation Protocol Option
       Other Limitation Options
       Javascript support
       Cookie
       HTML rewriting engine tuning options
       Filename/URL Conversion Option
       Other Options

Mode

       -mode {normal, linkupdate, sync, singlepage, singlereget, resumeregets}
              Set operation mode.
              normal - retrieves recursive documents
              linkupdate - update remote URLs in local HTML documents to local URLs if these URLs exist  in  the
              local tree
              sync  -  synchronize remote documents with local tree (if a local copy of a document is older than
              remote, the document is retrieved again, otherwise nothing happens)
              singlepage - URL is retrieved as one page with all inline objects (picture, sound ...)  this  mode
              is now obsoleted by -singlepage option.
              resumeregets  -  pavuk  scans the local tree for files that were not retrieved fully and retrieves
              them again (uses partial get if possible)
              singlereget - get URL until it is retrieved in full
              dontstore - transfer page from server, but don't store  it  to  the  local  tree.   This  mode  is
              suitable for fetching pages that are held in a local proxy/cache server.
              reminder - used to inform the user about changed documents
              ftpdir - used to list of contents of FTP directories

              default operation mode is normal mode.

Help

       -h     Print long verbose help message

       -v     Show version informations and configuration at compilation time.

Indicate/Logging/Interface options

       -quiet Don't show any messages on the screen.

       -verbose
              Force to show output messages on the screen (default)

       -progress/-noprogress
              Show retrieving progress while running in the terminal (default is progress off)

       -stime/-nostime
              Show start and end time of transfer. (default isn't this information shown)

       -xmaxlog $nr
              Maximum  number  of log lines in the Log widget. 0 means unlimited.  This option is available only
              when compiled with the GTK+ GUI. (default value is 0)

       -logfile $file
              File where all produced messages are stored.

       -unique_log/-nounique_log
              When logfile as specified with the option -logfile is already used  by  another  process,  try  to
              generate new unique name for the log file. (default is this option turned off)

       -slogfile $file
              File  to  store short logs in. This file contains one line of informations per processed document.
              This is meant to be used in connection with any sort of script to  produce  some  statistics,  for
              validating links on your website, or for generating simple sitemaps.  Multiple pavuk processes can
              use this file concurrently, without overwriting each others entries.  Record structure:

              - PID of pavuk process
              - TIME current time
              - COUNTER in the format current/total number of URLs
              - STATUS contains the type of the error: FATAL, ERR,
                WARN or OK
              - ERRCODE is the number code of the error
                (see errcode.h in pavuk sources)
              - URL of the document
              - PARENTURL first parent document of this URL
                (when it doesn't have parent - [none])
              - FILENAME is the name of the local file the
                document is saved under
              - SIZE size of requested document if known
              - DOWNLOAD_TIME time which takes downloading of this
                document in format seconds.mili_seconds
              - HTTPRESP contains the first line of the HTTP server
                response

       -language $str
              Native language that pavuk should use for communication with its user (works only when there is  a
              message  catalog  for  that  language) GNU gettext support (for message internationalization) must
              also be compiled in. Default language is taken from your NLS environment variables.

       -gui_font $font
              Font used in the GUI interface. To list available X fonts use the xlsfonts command.   This  option
              is available only when compiled with GTK+ GUI support.

Netli options

       -[no]read_css
              Enable or disable fetching objects mentioned in style sheets.

       -[no]verify
              Enable or disable verifying server CERTS in SSL mode.

       -tlogfile $file
              Turn on Netli logging with output to specified file.

       -trelative {object | program}
              Make Netli timings relative to the start of the first object or the program.

       -transparent_proxy FQDN[:port]
              When processing URL, send the original, but send it to the IP address at FQDN

       -transparent_ssl_proxy FQDN[:port]
              When processing HTTPS URL, send the original, but send it to the IP address at FQDN

       -sdemo Output  in  sdemo  compatible  format. This is only used by sdemo. (For now it simply means output
              '-1' rather than '*'  when measurements are invalid.)

       -noencode
              Do not escape characters that are "unsafe" in URLS.

Special start

       -X     Start program with X Window interface (if compiled with  support  for  GTK+).   Pavuk  as  default
              starts without GUI, and behaves as regular commandline tool.

       -runX  When  used  together with the -X option, pavuk starts processing of URLs immediately after the GUI
              window is launched. Without the -X given, this option doesn't have  any  effect.   Only  available
              when compiled with GTK+ support .

       -bg/-nobg
              This  option  allows pavuk to detach from its terminal and run in background mode.  Pavuk will not
              output any messages to the terminal than. If you want  to  see  messages,  you  have  to  use  the
              -log_file  option  to  specify  a  file where messages will be written.  Default pavuk executes at
              foreground.

       -check_bg/-nocheck_bg
              Normally, programs sent into the background after being  run  in  foreground  continue  to  output
              messages  to  the  terminal.   If  this  option  is  activated,  pavuk  checks if it is running as
              background job and will not write any messages to the terminal in this case. After  it  becomes  a
              foreground  job  again, it will start writing messages to terminal in the normal way.  This option
              is available only when your system supports retrieving of terminal info via tc*() functions.

       -prefs/-noprefs
              When you turn this option on, pavuk will preserve all settings when  exiting,  and  when  you  run
              pavuk with GUI interface again, all settings will be restored.  The settings will be stored in the
              ~./pavuk_prefs file. Default pavuk want restore its option when started.  This option is available
              only when compiled with GTK+.

       -schedule $time
              Execute  pavuk  at  the  time  specified  as  parameter.  The  Format  of  the  $time parameter is
              YYYY.MM.DD.hh.mm.  You need a properly configured scheduling with the at command  on  your  system
              for  using  this  option.   If  default configuration (at -f %f %t %d.%m.%Y) of scheduling command
              won't work on your system, try to adjust it with -sched_cmd option.

       -reschedule $nr
              Execute pavuk periodically with $nr hours period.  You need properly  configured  scheduling  with
              the at command on your system for using this option.

       -sched_cmd $str
              Command  to  use  for scheduling. Pavuk explicitly supports scheduling with at $str should contain
              regular characters and macros, escaped by % character.  Supported macros are:
                 %f
                  - for script filename
                 %t
                  - for time (in format HH:MM)
                  - all macros as supported by the strftime() function

       -urls_file $file
              If you use this option, pavuk will read URLs from $file before  it  starts  processing.   In  this
              file, each URL needs to be on a separate line. After the last URL, a single dot . followed by a LF
              (line-feed) character denotes the end.  Pavuk will start processing right after all URLs have been
              read.  If $file is given as the - character, standard input will be read.

       -store_info/-nostore_info
              This  option  causes  pavuk  to  store information about each document into a separate file in the
              .pavuk_info directory. This file is used to store the original URL from  which  the  document  was
              downloaded.  For  files  that  are downloaded via HTTP or HTTPS protocols, the whole HTTP response
              header is stored there. I recommend to use this option when you are using options that change  the
              default  layout  of  the  local document tree, because this info file helps pavuk to map the local
              filename to the URL. This option is also very useful when different URLs have the same filename in
              the  local  tree.  When  this  occurs, pavuk detects this using info files, and it will prefix the
              local name with numbers. At default is disabled storing of this extra informations.

       -info_dir $dir
              You can set with this option location of separate directory for storing info  files  created  when
              -store_info option is used. This is useful when you don't want to mix in destination directory the
              info files with regular document files. The structure of the info files  is  preserved,  just  are
              stored in different directory.

       -request $req
              With  this  option  you can specify extended informations for starting URLs.  With this option you
              can specify query data for POST or GET .  Current  syntax  of  this  option  is  :  URL:["]$url["]
              [METHOD:["]{GET|POST}["]]            [ENCODING:["]{u|m}["]]           [FIELD:["]variable=value["]]
              [FILE:["]variable=filename["] [LNAME:["]local_filename["]]

              - URL: specifies request URL
              - METHOD: specifies request method for URL and is
                one of GET or POST.
              - ENCODING: specifies encoding for request body data.
                  m is for multipart/form-data encoding
                  u is for application/x-www-form-urlencoded
                  encoding
              - FIELD: specifies field of request data in format
                  variable=value. For encoding of special characters
                  in variable and value you can use same encoding
                  as is used in application/x-www-form-urlencoded
                  encoding.
              - FILE: specifies special field of query, which is
                  used to specify file for POST based file upload.
              - LNAME: specifies localname for this request
       When you need to use inside the FIELD: and FILE: fields of request specification special characters,  you
       should   use  the  application/x-www-form-urlencoded  encoding  of  characters.  It  means  all  nonASCII
       characters, quote character ("), space character ( ), ampersand character (&), percent character (%)  and
       equal  character  (=) should be encoded in form %xx where xx is hexadecimal representation of ASCII value
       of character. So for example % character should be encoded like %25.

       -formdata $req
              This option gives you chance to specify contents for HTML forms found during  traversing  document
              tree.
               Current  syntax  of  this  option  is  same as for -request option, but ENCODING: and METHOD: are
              meaningless in this option semantics.
               In URL: you have to specify HTML form action URL, which will be matched against action URLs found
              in  processed  HTML  documents. If pavuk finds action URL which matches that supplied in -formdata
              option, pavuk will construct GET or POST request from  data  supplied  in  this  option  and  from
              default  form  field  values  supplied  in  HTML  document.  Values  supplied  on commandline have
              precedence before that supplied in HTML file.

       -nthreads $nr
              By means of this option you can specify how  many  concurrent  threads  will  download  documents.
              Default pavuk executes 3 concurrent downloading threads.  This option is available only when pavuk
              is compiled to support multithreading.

       -immesg/-noimmesg
              Default pavuks behavior when running multiple downloading threads is to buffer all output messages
              in  memory  buffer  and  flush  that  buffered  data  just  when thread finishes processing of one
              document. With this option you can change this behavior to see the messages immediately when it is
              produced.  It  is  only usable when you want to debug some specials in multithreading environment.
              This option is available only when pavuk is compiled to support multithreading.

       -dumpfd $nr
              For scripting is sometimes usable to be able to download document directly  to  pipe  or  variable
              instead  of  storing  it  to  regular  file. In such case you can use this option to dump data for
              example to stdout ($nr = 1).

       -dump_after/-nodump_after
              While using -dumpfd option in multithreaded pavuk, it is required to dump document in  one  moment
              because  documents downloaded in multiple threads can overlap. This option is also useful when you
              want to dump document after pavuk adjusts links inside HTML documents.

       -dump_response/-nodump_response
              This option have effect only when used with -dumpfd option. It  is  used  to  dump  HTTP  response
              headers.

       -dump_urlfd $nr
              When  you  will  use  this  option,  pavuk  will  output  all URLs found in HTML documents to file
              descriptor $nr. You can use this option to extract and convert all URLs to absolute.

Scenario/Task options

       -scenario $str
              Name of scenario to load and/or run. Scenarios are files with a structure similar to the  .pavukrc
              file.  Scenarios contain saved configurations. You can use it for periodical mirroring. Parameters
              from scenarios specified at the command line can be overwritten by command line parameters.  To be
              able to use this option, you need to specify scenario base directory with option -scndir.

       -dumpscn $filename
              Store  actual  configuration  into  scenario  file with name $filename.  This is useful to quickly
              create pre-configured scenarios for manual editing.

Directory options

       -msgcat $dir
              Directory which contains the message catalog for pavuk.  If you do not have permission to store  a
              pavuk  message  catalog  in  the  system  directory, you should simply create similar structure of
              directories in your home directory as it is on your system.

              For example:

              Your native language is German, and your home directory is /home/jano.

              You should at first create the directory /home/jano/locales/de/LC_MESSAGES/, then put  the  German
              pavuk.mo  there  and  set  -msgcat  to  /home/jano/locales/.   If  you  have  properly  set locale
              environment values, you will see pavuk speaking German.  This option is available  only  when  you
              compiled in support for GNU gettext messages internationalization.

       -cdir $dir
              Directory where are all retrieved documents are stored. If not specified, the current directory is
              used. If the specified directory doesn't exist, it will be created.

       -scndir $dir
              Directory in which your scenarios are stored.  You must use this option when you  are  loading  or
              storing scenario files.

Preserve options

       -preserve_time/-nopreserve_time
              Store  downloaded  document  with  same modification time as on the remote site. Modification time
              will be set only when such information is available (some FTP servers  do  not  support  the  MDTM
              command,  and  some  documents  on  HTTP  servers  are  created online so pavuk can't retrieve the
              modification time of this document).  At default modification time of documents isn't preserved.

       -preserve_perm/-nopreserve_perm
              Store downloaded document with the same permissions as on the remote site.  This option has effect
              only when downloading a file through FTP protocol and assumes that the -ftplist option is used. At
              default permissions are not preserved.

       -preserve_slinks/-nopreserve_slinks
              Set symbolic links to point exactly to same location  as  on  the  remote  server;  don't  do  any
              relocations.   This  option has effect only when downloading file through FTP protocol and assumes
              that the -ftplist option is used.  Default symbolic links are not preserved, and are retrieved  as
              regular documents with full contents of linked file.

              For  example,  assume that on the FTP server ftp.xx.org there is a symbolic link /pub/pavuk/pavuk-
              current.tgz,  which  points  to  /tmp/pub/pavuk-0.9pl11.tgz.   Pavuk  will  create  symbolic  link
              ftp/ftp.xx.org_21/pub/pavuk/pavuk-current.tgz
              if    option    -preserve_slinks    will    be   used   this   symbolic   link   will   point   to
              /tmp/pub/pavuk-0.9pl11.tgz
              if option -preserve_slinks want be used, this symbolic link will point to
               ../../tmp/pub/pavuk-0.9pl11.tgz

       -retrieve_symlink/-noretrieve_symlink
              Retrieve files behind symbolic links instead of replicating symlinks in local tree.

Proxy options

       -http_proxy $site[:$port]
              If this parameter is used, then all HTTP requests are going through this  proxy  server.  This  is
              useful  if  your  site resides behind a firewall, or if you want to use a HTTP proxy cache server.
              The default port number is 8080.  Pavuk  allows  you  to  specify  multiple  HTTP  proxies  (using
              multiple  -http_proxy  options)  and  it  will  rotate  proxies with roundrobin priority disabling
              proxies with errors.

       -nocache/-cache
              Use this option whenever you want to get the document directly from the site  and  not  from  your
              HTTP proxy cache server. Default pavuk allows transfer of document copies from cache.

       -ftp_proxy $site[:$port]
              If  this  parameter  is  used, then all FTP requests are going through this proxy server.  This is
              useful when your site resides behind a firewall, or if you want to use  FTP  proxy  cache  server.
              The  default  port number is 22.  Pavuk supports three different types of proxies for FTP, see the
              options -ftp_httpgw, -ftp_dirtyproxy.  If none of  the  mentioned  options  is  used,  then  pavuk
              assumes a regular FTP proxy with USER user@host connecting to remote FTP server.

       -ftp_httpgw/-noftp_httpgw
              The  specified  FTP proxy is a HTTP gateway for the FTP protocol. Default FTP proxy is regular FTP
              proxy.

       -ftp_dirtyproxy/-noftp_dirtyproxy
              The specified FTP proxy is a HTTP proxy which supports a CONNECT request (pavuk  should  use  full
              FTP  protocol,  except  of  active data connections).  Default FTP proxy is regular FTP proxy.  If
              both -ftp_dirtyproxy and -ftp_httpgw are specified, -ftp_dirtyproxy is preferred.

       -gopher_proxy $site[:$port]
              Gopher gateway or proxy/cache server.

       -gopher_httpgw/-nogopher_httpgw
              The specified Gopher proxy server is a HTTP gateway for Gopher protocol.   When  -gopher_proxy  is
              set  and  this  -gopher_httpgw option isn't used, pavuk is using proxy as HTTP tunnel with CONNECT
              request to open connections to Gopher servers.

       -ssl_proxy $site[:$port]
              SSL proxy (tunneling) server [as that in CERN httpd + patch or  in  Squid]  with  enabled  CONNECT
              request  (at least on port 443). This option is available only when compiled with SSL support (you
              need the SSleay or OpenSSL libraries with development headers)

Proxy Authentification

       -http_proxy_user $user
              Username for HTTP proxy authentification.

       -http_proxy_pass $pass
              Password for HTTP proxy authentification.

       -http_proxy_auth {1/2/3/4/user/Basic/Digest/NTLM}
              Authentification scheme for proxy access. Similar meaning as the -auth_scheme option (see help for
              this option for more details).  Default is 2 (Basic scheme).

       -auth_proxy_ntlm_domain $str
              NT  or  LM domain used for authorization again HTTP proxy server when NTLM authentification scheme
              is required. This option is available only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_proxy_nonce/-noauth_reuse_proxy_nonce
              When using HTTP Proxy Digest access authentification scheme use  first  received  nonce  value  in
              multiple following requests.

       -ftp_proxy_user $user
              Username for FTP proxy authentification.

       -ftp_proxy_pass $pass
              Password for FTP proxy authentification.

Protocol/Download Options

       -ftp_passive
              Uses passive ftp when downloading via ftp.

       -ftp_active
              Uses active ftp when downloading via ftp.

       -active_ftp_port_range $min:$max
              This  option  permits  to  specify  the  ports  used  for active ftp. This permits easier firewall
              configuration since the range of ports can be restricted.

              Pavuk will randomly choose a number from within the specified range until an open port  is  found.
              Should  no  open  ports  be  found  within the given range, pavuk will default to a normal kernel-
              assigned port, and a message (debug level net) is output.

              The port range selected must be in the non-privileged range (eg. greater than or equal  to  1024);
              it  is  STRONGLY  RECOMMENDED  that  the  chosen range be large enough to handle many simultaneous
              active connections (for example, 49152-65534, the IANA-registered ephemeral port range).

       -always_mdtm/-noalways_mdtm
              Force pavuk to always use "MDTM" to determine the file modification time  and  never  uses  cached
              times determined when listing the remote files.

       -remove_before_store/-noremove_before_store
              Force  unlink'ing  of  files  before new content is stored to a file. This is helpful if the local
              files are hardlinked to some other directory and after mirroring the hardlinks  are  checked.  All
              "broken" hardlinks indicate a file update.

       -retry $nr
              Set the number of attempts to transfer processed document.  Default set to 1, this mean pavuk will
              retry once to get documents which failed on first attempt.

       -nregets $nr
              Set the number of allowed regets on a single document, after a broken transfer.  Default value for
              this option is 2.

       -nredirs $nr
              Set  number  of  allowed HTTP redirects. (use this for prevention of loops) Default value for this
              option is 5, and conform to HTTP specification.

       -force_reget/-noforce_reget
              Force reget'ing of the whole document after a broken transfer  when  the  server  doesn't  support
              retrieving  of  partial  content.  Pavuk default behavior is to stop getting documents which don't
              allow restarting of transfer from specified position.

       -timeout $nr
              Timeout for stalled connections in minutes. This value is also used for connection  timeouts.  For
              sub-minute  timeouts  you  can  use  floating  point numbers.  Default timeout is 0, an that means
              timeout checking is disabled.

       -noRobots/-Robots
              This switch suppresses the use of the robots.txt standard, which is used to restrict access of Web
              robots  to  some  locations  on the web server. Default is allowed checking of robots.txt files on
              HTTP servers. Enable this option  always  when  you  are  downloading  huge  sets  of  pages  with
              unpredictable layout.  This prevents you from upsetting server administrators :-).

       -noEnc/-Enc
              This  switch suppresses using of gzip or compress or deflate encoding in transfer. I don't know if
              some servers are broken or what, but they are  propagating  that  MIME  type  application/gzip  or
              application/compress as encoded. Turn this option off, when you doesn't have libz support compiled
              in and also gzip program which is used to  decode  document  encoded  this  way.   At  default  is
              decoding of downloaded document disabled.

       -check_size/-nocheck_size
              The  option  -nocheck_size  should  be used if you are trying to download pages from a HTTP server
              which sends a wrong Content-Length: field in the MIME header of response.  Default pavuk  behavior
              is to check this field and complain when something is wrong.

       -maxrate $nr
              If  you  don't  want  to give all your transfer bandwidth to pavuk, use this option to set pavuk's
              maximum transfer rate. This option accepts a floating point number to specify the transfer rate in
              kB/s.  If  you  want  get optimal settings, you also have to play with the size of the read buffer
              (option -bufsize) because pavuk is doing flow control only at application level.  At default pavuk
              use full bandwidth.

       -minrate $nr
              If  you  hate  slow transfer rates, this option allows you to break transfers with slow speed. You
              can set the minimum transfer rate, and if the connection gets slower  than  the  given  rate,  the
              transfer  will  be  stopped. The minimum transfer rate is given in kB/s.  At default pavuk doesn't
              check this limit.

       -bufsize $nr
              This option is used to specify the size of the read buffer (default size: 32kB).  If  you  have  a
              very fast connection, you may increase the size of the buffer to get a better read performance. If
              you need to decrease the transfer rate, you may need to decrease the size of the  buffer  and  set
              the  maximum transfer rate with the -maxrate option. This option accepts the size of the buffer in
              kB.

       -fs_quota $nr
              If you are running pavuk on a multiuser system, you may need to avoid filling up your file system.
              This  option lets you specify how many space must remain free. If pavuk detects an underrun of the
              free space, it will stop downloading files. Specify this quota in kB. Default value is 0, and that
              mean no checking of this quota.

       -file_quota $nr
              This  option  is  useful  when you want to limit downloading of big files, but want to download at
              least $nr kilobytes from big files.  A big file will be  transferred,  and  when  it  reaches  the
              specified size, transfer will break. Such document will be processed as properly downloaded, so be
              careful when using this option.  At default pavuk is transferring full size of documents.

       -trans_quota $nr
              If you are aware that your selection should address a big amount of data, you can use this  option
              to limit the amount of transferred data.  Default is by size unlimited transfer.

       -max_time $nr
              Set  maximum  amount of time for program run. After time is exceeded, pavuk will stop downloading.
              Time is specified in minutes. Default value is 0, and it means downloading time is not limited.

       -url_strategy $strategy
              This option allows you to specify a downloading order for URLs  in  document  tree.   This  option
              accepts the following strings as parameters :

              level - will order URLs as it loads it from HTML files (default)
              leveli - as previous, but inline objects URLs come first
              pre - will insert URLs from actual HTML document at start, before other
              prei - as previous, but inline objects URLs come first

       -send_if_range/-nosend_if_range
              Send If-Range: header in HTTP request. I found out, that some HTTP servers (greetings, MS :-)) are
              sending different ETag: fields in different responses  for  the  same,  unchanged  document.  This
              causes  problems  when  pavuk attempts to reget a document from such a server: pavuk will remember
              the old ETag value and uses it it following requests for this document.  If the server  checks  it
              with  the  new  ETag  value  and it differs, it will refuse to send only part of the document, and
              start the download from scratch.

       -ssl_version $v
              Set required SSL protocol version for SSL communication.  $v is one of ssl2, ssl23, ssl3 or  tls1.
              This option is available only when compiled with SSL support.  Default is ssl23.

       -unique_sslid/-nounique_sslid
              This  option  can  be  used if you want to use a unique SSL ID for all SSL sessions. Default pavuk
              behavior is to negotiate each time new session ID for each connection.  This option  is  available
              only when compiled with SSL support.

       -use_http11/-nouse_http11
              This  option  is used to switch between HTTP/1.0 and HTTP/1.1 protocol used with HTTP servers. Now
              is using of HTTP/1.1 protocol not default because its implementation is very fresh  and  not  100%
              tested.  Even though using of HTTP/1.1 is very recommended, because it is faster than HTTP/1.0 and
              uses less network bandwidth for initiating connections. In any further  version  I  will  activate
              using of HTTP/1.1 as default.

       -local_ip $addr
              You  can  use  this option when you want to use specified network interface for communication with
              other hosts. This option is suitable for multihomed hosts with several network interfaces. Address
              should be entered as regular IP address or as host name.

       -identity $str
              This  option  allows you to specify content of User-Agent: field of HTTP request.  This is usable,
              when scripts on remote server returns different document on same URL for different browsers, or if
              some  HTTP server refuse to serve document for Web robots like pavuk. Default pavuk sends in User-
              Agent: field pavuk/$VERSION string.

       -auto_referer/-noauto_referer
              This option forces pavuk to send HTTP Referer: header field with starting URLs.  Content  of  this
              field  will  be  self  URL.  Using this option is required, when remote server checks the Referer:
              field.  At default pavuk wont send Referer: field with starting URLs.

       -referer/-noreferer
              This option allows to enable and disable the  transmission  of  HTTP  Referer:  header  field.  At
              default pavuk sends Referer: field.

       -httpad $str
              In  some  cases  you  may  want to add user defined fields to HTTP/HTTPS requests.  This option is
              exactly for this purpose. In $str you can directly specify content of additional  header.  If  you
              specify  only  raw  header,  it will be used only for starting requests. When you want to use this
              header with each request while crawling, prefix the header with + character.

       -del_after/-nodel_after
              This option allows you to delete FILES from REMOTE server, when download is properly finished.  At
              default is this option off.

       -FTPlist/-noFTPlist
              When option -FTPlist will be used, pavuk will retrieve content of FTP directories with FTP command
              LIST instead of NLST. So the same listing will be retrieved as with "ls -l"  UNIX  command.   This
              option  is  required  if  you need to preserve permissions of remote files or you need to preserve
              symbolic links.  Pavuk supports wide listing on FTP servers with regular BSD or SYSV style "ls -l"
              directory  listing,  on FTP servers with EPFL listing format, VMS style listing, DOS/Windows style
              listing and Novel listing format.  Default pavuk  behavior  is  to  use  NLST  fro  FTP  directory
              listings.

       -ftp_list_options $str
              Some  FTP  servers  require to supply extra options to LIST or NLST FTP commands to show all files
              and directories properly. But be sure not to use any extra options which can  reformat  output  of
              the  listing.  Useful  is  especially  -a option which force FTP server to show also dot files and
              directories and with broken WuFTP servers it also helps to produce  full  directory  listings  not
              just files.

       -fix_wuftpd/-nofix_wuftpd
              This  option  is result of several attempts to to get working properly the -remove_old option with
              WuFTPd server when -ftplist option is used. The problem is that FTP command LIST on  WuFTPd  don't
              mind  when trying to list nonexisting directory, and indicates success in FTP response code.  When
              you activate this option, pavuk uses extra  FTP  command  (STAT  -d  dir)  to  check  whether  the
              directory really exists. Don't use this option until you are sure that you really need it!

Authentification

       -auth_file $file
              File  where  you  have  stored  authentification  information for access to some service. For file
              structure see below in FILES section.

       -auth_name $user
              If you are using this parameter, program is  doing  authentification  with  each  HTTP  access  to
              document.  Use  this  only  if  you know that only one HTTP server could be accessed or use -asite
              option to specify site to which you use authentification. Else your auth parameters will  be  sent
              to each accessed HTTP server.

       -auth_passwd $passwd
              Value of this parameter is used as password for authentification

       -auth_scheme {1/2/3/4/user/Basic/Digest/NTLM}
              This parameter specifies used authentification scheme.
              1 or user means user authentification scheme is used as defined in HTTP/1.0 or HTTP/1.1.  Password
              and user name are sent unencoded.
              2 or Basic means Basic authentification scheme is used as defined in HTTP/1.0.  Password and  user
              name are sent BASE64 encoded.
              3  or  Digest  means  Digest  access  authentification scheme based on MD5 checksums as defined in
              RFC2069.
              4 or NTLM means NTLM proprietary access authentification scheme used by  Microsoft  IIS  or  Proxy
              servers.   When  you  use  this  scheme,  you  must  also  specify  NT  or  LM  domain with option
              -auth_ntlm_domain. This scheme is supported only when compiled with OpenSSL or libdes libraries.

       -auth_ntlm_domain $str
              NT or LM domain used for authorization again HTTP server  when  NTLM  authentification  scheme  is
              required. This option is available only when compiled with OpenSSL or libdes libraries.

       -auth_reuse_nonce/-noauth_reuse_nonce
              While  using  HTTP  Digest  access  authentification scheme use first received nonce value in more
              following requests.  Default pavuk negotiates nonce for each request.

       -ssl_key_file $file
              File with public key for SSL certificate (learn more from SSLeay or  OpenSSL  documentation)  This
              option  is available only when compiled with SSL support (you need SSleay or OpenSSL libraries and
              development headers)

       -ssl_cert_file $file
              Certificate file in PEM format (learn more from SSLeay or OpenSSL documentation)  This  option  is
              available  only  when  compiled  with  SSL  support  (you  need  SSleay  or  OpenSSL libraries and
              development headers)

       -ssl_cer_passwd $str
              Password used to generate certificate (learn more  from  SSLeay  or  OpenSSL  documentation)  This
              option  is available only when compiled with SSL support (you need SSLeay or OpenSSL libraries and
              development headers)

       -nss_cert_dir $dir
              Config directory for NSS (Netscape SSL implementation) certificates. Usually ~/.netscape  (created
              by  Netscape  communicator/navigator)  or  profile  directory below ~/.mozilla (created by Mozilla
              browser). The directory should contain cert7.db and key3.db files. If you don't  use  Mozilla  nor
              Netscape,  you  must  create  this  files by utilities distributed with NSS libraries. Pavuk opens
              certificate database only readonly.  This option is available only when pavuk is compiled with SSL
              support provided by Netscape NSS SSL implementation.

       [-nss_accept_unknown_cert/-nonss_accept_unknown_cert]
              By  default  will  pavuk  reject connection to SSL server which certificate is not stored in local
              certificate database (set by -nss_cert_dir option).  You must  explicitly  force  pavuk  to  allow
              connection  to  servers  with  unknown  certificates.  This option is available only when pavuk is
              compiled with SSL support provided by Netscape NSS SSL implementation.

       [-nss_domestic_policy/-nss_export_policy]
              Selects sets of ciphers allowed/disabled by USA export rules.  This option is available only  when
              pavuk is compiled with SSL support provided by Netscape NSS SSL implementation.

       -from $email
              This  parameter  is used when accessing anonymous FTP server as password or is optionally inserted
              into From field in HTTP request. If not specified  pavuk  discovers  this  from  USER  environment
              variable and from site hostname.

       -send_from/-nosend_from
              This  option  is  used  for enabling or disabling sending of user identification, entered in -from
              option, as FTP anonymous user password and From: field of HTTP request.  As default is this option
              off.

       -ftp_login_handshake $host $handshake
              When  you need to use nonstandard login procedure for some of FTP servers, you can use this option
              to change default pavuk login procedure. To allow more  flexibility,  you  can  assign  the  login
              procedure  to  some  server or to all. When $host is specified as empty string (""), than attached
              login procedure is assigned to all FTP servers besides those having assigned own login procedures.
              In  the  $handshake  parameter  you  can  specify  exact login procedure specified by FTP commands
              followed by expected FTP response codes delimited with backslash (\) characters.
              For example this is default login procedure when logging  in  regular  ftp  server  without  going
              through  proxy  server  : USER %u\331\PASS %p\230. There are two commands followed by two response
              codes. After USER command pavuk expects FTP response code 331 and after PASS command pavuk expects
              from  server  FTP  response  code  230. In ftp commands you can use following macros which will be
              replaced by respective values:

               %u - user name used to access FTP server
               %p - password used to access FTP server
               %U - user name used to access FTP proxy server
               %P - password used to access FTP proxy server
               %h - hostname of FTP server
               %s - port number on which FTP server listens

Site/Domain/Port Limitation Options

       -asite $list
              Specify comma separated list of allowed sites on which referenced documents are stored.

       -dsite $list
              Specify comma separated list of disallowed sites.  Previous parameter is opposite to this one.  If
              both are used the last occurrence of them is used to be valid.

       -adomain $list
              Specify comma separated list of allowed domains on which referenced documents are stored.

       -ddomain $list
              Specify comma separated list of disallowed domains. Previous parameter is opposite to this one. If
              both are used the last occurrence of them is used to be valid.

       -aport $list
              In $list, you can write comma separated list of ports from which you allow to download documents.

       -dport $list
              This option is opposite option to previous option. It is used to specify  denied  ports.  If  both
              -aport  and  -dport options are used the last occurrence of them is used to be valid and all other
              occurrences will be omitted.

Limitation Document properties

       -amimet $list
              List of comma separated allowed MIME types. You can use with this option also wildcard patterns.

       -dmimet $list
              List of comma separated disallowed MIME  types.  You  can  use  with  this  option  also  wildcard
              patterns.   Previous  parameter  is  opposite to this one. If both are used the last occurrence of
              them is used to be valid.

       -maxsize $nr
              Maximum allowed size of document.  This option is applied only when pavuk is able  to  detect  the
              document before starting the transfer.  Default value is 0, and it means this limit isn't applied.

       -minsize $nr
              minimal  allowed  size  of document.  This option is applied only when pavuk is able to detect the
              document before starting the transfer.  Default value is 0, and it means this limit isn't applied.

       -newer_than $time
              Allow only transfer of documents with modification time newer than specified in  parameter  $time.
              Format  of  $time  is:  YYYY.MM.DD.hh:mm.   To  apply  this  option  pavuk  must be able to detect
              modification time of document.

       -older_than $time
              Allow only transfer of documents with modification time older than specified in  parameter  $time.
              Format  of  $time  is:  YYYY.MM.DD.hh:mm.   To  apply  this  option  pavuk  must be able to detect
              modification time of document.

       -noCGI/-CGI
              this switch prevents to transfer dynamically generated parametric documents through CGI interface.
              This  is  detected  with occurrence of ? character inside URL.  Default pavuk behavior is to allow
              transfer of URLs with query strings.

       -alang $list
              this allows you to specify ordered comma separated  list  of  preferred  natural  languages.  This
              option work only with HTTP and HTTPS protocol using Accept-Language: MIME field.

       -acharset $list
              This  options  allows  you  to  enter  comma  separated  list  of preferred encoding of transfered
              documents. This works only with HTTP and HTTPS urls  and  only  if  such  document  encodings  are
              located on destination server.
              example: -acharset iso-8859-2,windows-1250,utf8

Limitation Document name

       -asfx $list
              This parameter allows you to specify set of suffixes used to restrict selection of documents which
              will be processed.

       -dsfx $list
              Set of suffixes that are used to specify restriction on  selection  of  documents.   This  one  is
              inverse to previous option. They are segregating each other.

       -aprefix $list, -dprefix $list
              This two options allow you to specify set of allowed or disallowed prefixes of documents. They are
              segregating each other.

       -pattern $pattern
              This option allows you to specify wildcard pattern for documents. All documents are tested if they
              match this pattern.

       -rpattern $reg_exp
              This  is equal option as previous, but this uses regular expressions.  Available only on platforms
              which have any supported RE implementation.

       -skip_pattern $pattern
              This option allows you to specify wildcard pattern for documents  that  should  be  skipped.   All
              documents are tested if they match this pattern.

       -skip_rpattern $reg_exp
              This  is equal option as previous, but this uses regular expressions.  Available only on platforms
              which have any supported RE implementation.

       -url_pattern $pattern
              This option allows you to specify wildcard pattern for URLs. All URLs are  tested  if  they  match
              this pattern.
              Example:
              -url_pattern  http://\*.idata.sk:\*/~ondrej/\*  .  this  option  enables all HTTP URLs from domain
              .idata.sk on all ports which are located under /~ondrej/.

       -url_rpattern $reg_exp
              This is equal option as previous, but this uses regular expressions.  Available only on  platforms
              which have any supported RE implementation.

       -skip_url_pattern $pattern
              This  option allows you to specify wildcard pattern for URLs that should be skipped.  All URLs are
              tested if they match this pattern.

       -skip_url_rpattern $reg_exp
              This is equal option as previous, but this uses regular expressions.  Available only on  platforms
              which have any supported RE implementation.

       -aip_pattern $re
              This option allows you to limit set of transferred documents by server IP address.  IP address can
              be specified as regular expressions, so it is possible to specify  set  of  IP  addresses  by  one
              expression.  Available only on platforms which have any supported RE implementation.

       -dip_pattern $re
              This  option  similar  to  previous option, but is used to specify set of disallowed IP addresses.
              Available only on platforms which have any supported RE implementation.

       -tag_pattern $tag $attrib $url
              More powerful version of -url_pattern option for more precise matching of allowed  URLs  based  on
              HTML  tag  name  pattern,  HTML  tag attribute name pattern and on URL pattern. You can use in all
              three parameters of this option wildcard  patterns,  thus  something  like  -tag_pattern  '*'  '*'
              url_pattern  is  equal  to  -url_pattern  url_pattern.  The $tag and $attrib parameters are always
              matched again uppercase strings. For example if you want just let pavuk follow only regular  links
              ignoring any stylesheets, images, etc., use option -tag_pattern A HREF '*'.

       -tag_rpattern $tag $attrib $url
              This  is  variation on the -tag_pattern. It uses regular expression patterns in parameters instead
              of wildcard patterns used in the previous option.

Limitation Protocol Option

       -noHTTP/-HTTP
              This switch suppresses all transfers through HTTP  protocol.   Default  is  transfer  trough  HTTP
              enabled.

       -noSSL/-SSL
              This switch suppresses all transfers through HTTPS protocol (HTTP protocol over SSL) .  Default is
              transfer trough HTTPS enabled.  This option is available only when compiled with SSL support  (you
              need SSleay or OpenSSL libraries and development headers)

       -noGopher/-Gopher
              Suppress  all  transfers  through  Gopher  Internet  protocol.   Default is transfer trough Gopher
              enabled.

       -noFTP/-FTP
              This switch prevents processing documents allocated on  all  FTP  servers.   Default  is  transfer
              trough FTP enabled.

       -noFTPS/-FTPS
              This  switch  prevents  processing  documents  allocated  on all FTP servers accessed through SSL.
              Default is transfer trough FTPS enabled.  This option is available only  when  compiled  with  SSL
              support (you need SSleay or OpenSSL libraries and development headers)

       -FTPhtml/-noFTPhtml
              By  using  of  option  -FTPhtml  you  can  force  pavuk  to process HTML files downloaded with FTP
              protocol.  At default pavuk won't parse HTML files from FTP servers.

       -FTPdir/-noFTPdir
              Force recursive processing of FTP directories too.  At default is recursive downloading  from  FTP
              servers denied.

       -disable_html_tag $TAG,[$ATTRIB][;...]
              -enable_html_tag  $TAG,[$ATTRIB][;...]   Enable  or  disable processing of particular HTML tags or
              attributes.  At default all supported HTML tags are enabled.

              For example if you don't want to process  all  images  you  should  use  option  -disable_html_tag
              'IMG,SRC;INPUT,SRC;BODY,BACKGROUND' .

Other Limitation Options

       -subdir $dir
              Subdirectory  of local tree directory, to limit some of the modes {sync, resumeregets, linkupdate}
              in its tree scan.

       -dont_leave_site/-leave_site
              (Don't) leave starting site. At default pavuk can span host when recursing through WWW tree.

       -dont_leave_dir/-leave_dir
              (Don't) leave starting directory. If -dont_leave_dir option  is  used  pavuk  will  stay  only  in
              starting  directory  (including  its  own  subdirectories).   At  default pavuk can leave starting
              directories.

       -leave_site_enter_dir/-dont_leave_site_enter_dir
              If you are downloading WWW tree which spans multiple hosts with huge trees, you may want to  allow
              downloading of document which are in directory hierarchy below directory which we visited as first
              on each site. To obtain this, use option -dont_leave_site_enter_dir. As default pavuk will go also
              to higher directory levels on that site.

       -lmax $nr
              Set  maximum  allowed  level  of  tree  traverse.  Default  is set to 0, what means that pavuk can
              traverse at infinitum.  As of version 0.8pl1 inline objects of HTML pages are placed at same level
              as parent HTML page.

       -leave_level $nr
              Maximum  level  of  documents outside from site of starting URL.  Default is set to 0, and 0 means
              that checking is not applied.

       -site_level $nr
              Maximum level of sites outside from site of starting URL.  Default is set to 0, and 0  means  that
              checking is not applied.

       -dmax $nr
              Set  maximum  allowed  number of documents that are processed.  Default value is 0.  That means no
              restrictions are used in number of processed documents.

       -singlepage/-nosinglepage
              Using option -singlepage allows you to transfer just HTML  pages  with  all  its  inlined  objects
              (pictures,  sounds,  frame  documents,  ...).   As  default is disabled single page transfer. This
              option makes -mode singlepage option obsolete.

       -limit_inlines/-dont_limit_inlines
              With this option you can control whether limiting options apply also to inline objects  (pictures,
              sounds, ...). This is useful when you want to download specified set of HTML pages with all inline
              options without any restrictions.

       -user_condition $str
              Script or program name for users own conditions.  You can write any script which should with  exit
              value  decide  if  download  URL  or not.  Script gets from pavuk any number of options, with this
              meaning :

                 -url $url - processed URL
                 -parent $url - any number of parent URLs
                 -level $nr - level of this URL from starting URL
                 -size $nr - size of requested URL
                 -date $datenr - modification time of requested URL in format YYYYMMDDhhmmss

              The exit status 0 of script or program means that current URL should be rejected and nonzero  exit
              status means that URL should be accepted.
              Warning  : use user conditions only if required because of big slowdowns caused by forking scripts
              for each checked URL.

       -follow_cmd $str
              This option allows you to specify script or program which can by its exit status decide whether to
              follow  URLs  from  current  HTML document. This script will be called after download of each HTML
              document.  The script will get following options as it's parameters:

                 -url $url - URL of current HTML document
                 -infile $file - local file where is stored HTML document

              The exit status 0 of script or program means that URLs from current document will  be  disallowed,
              other exit status means, that pavuk can follow links from current HTML document.

Javascript support

       Support  for  scripting languages like JavaScript or VBScript in pavuk is done bit hacky way. There is no
       interpreter for this languages, so not all things will work. Whole support  which  pavuk  have  for  this
       scripting  languages  is  based  on  regular expression patterns specified by user. Pavuk search for this
       patterns in DOM event attributes of HTML  tags,  in  javascript:...  URLs,  in  inline  scripts  in  HTML
       documents  enclosed  between  <script></script>  tags  and  in  separate  javascript  files.  Support for
       scripting languages is only available when pavuk is  compiled  with  proper  regular  expression  library
       (POSIX/GNU/PCRE).

       -enable_js/-disable_js
              This  options  are used to enable or disable processing of Javascript parts of HTML documents. You
              must enable this option to be able to use processing of javascript patterns.

       -js_pattern $re
              With this option you are specifying  what  patterns  match  interested  parts  of  Javascript  for
              extracting  URLs. The parameter must be RE pattern with exactly one subpattern which match exactly
              the URL part. For example to match URL in following type of javascript expressions :
                document.b1.src='pics/button1_pre.jpg'
              you can use this pattern
                "^document.[a-zA-Z0-9_]*.src[ ]*=[ ]*'(.*)'$"

       -js_transform $p $t $h $a
              This option is similar to previous, but you can use custom transform rules for the  URL  parts  of
              patterns  and also specify the exact HTML tag and attribute where to look for this pattern. The $p
              is the pattern to match the interested part of script. The $t is transform rule for  the  URL,  in
              this  parameter  the  $x  parts  will  be  replaced  by  x-th subpattern of the $p pattern. The $h
              parameter is exact HTML tag or "*" when this apply to javascript: URLs or DOM event attribs or  ""
              (empty  string)  when  this  apply to javascript body of HTML document or separate JS file. The $a
              parameter is exact HTML attrib of tag or "" (empty string) when  this  rule  apply  to  javascript
              body.

       -js_transform2 $p $t $h $a
              This  option  is very similar to previous. The meaning of all parameters is same, just the pattern
              $p can have only one substring which will be used in the transform rule $t. This  is  required  to
              allow  rewriting of URL parts of the tags and scripts. This option can also be used to force pavuk
              to recognize HTML targ/attribute pairs which pavuk does not support.

       -cookie_file $file
              File where are stored cookie infos. This file must be in Netscape cookie  file  format  (generated
              with Netscape Navigator or Communicator ...).

       -cookie_send/-nocookie_send
              Use collected cookies in HTTP/HTTPS requests.  Pavuk will not send at default cookies.

       -cookie_recv/-nocookie_recv
              Store  received cookies from HTTP/HTTPS responses into memory cookie cache.  At default pavuk will
              not remember received cookies.

       -cookie_update/-nocookie_update
              Update cookie file on disk and synchronize it with changes made by any concurrent  processes.   At
              default pavuk will not update cookie file on disk.

       -cookies_max $nr
              Maximum  number  of  cookies  in  memory  cookie  cache.   Default  value  is 0, and that means no
              restrictions for cookies number.

       -disabled_cookie_domains $list
              Comma-separated list of cookie domains which are permitted to  send  cookies  stored  into  cookie
              cache

       -cookie_check/-nocookie_check
              Check  when  receiving  cookie,  if  cookie  domain  is equal to domain of server which sends this
              cookie. At default pavuk check is server is setting cookies for its domain, and if it tries to set
              cookie for foreign domain pavuk will complain about that and will reject such cookie.

HTML rewriting engine tuning options

       -noRelocate/-Relocate
              This  switch  prevents  the  program  to rewrite relative URLs to absolute, after HTML document is
              transfered. Default pavuk behavior is to maintain link consistence of HTML  documents.  So  always
              when  HTML  document is downloaded pavuk will rewrite all URLs to point to local document if it is
              available and if it is not available it will point to remote document. After document is  properly
              downloaded, pavuk will update links in HTML documents, which point to this one.

       -all_to_local/-noall_to_local
              This  option  forces pavuk to change all URLs inside HTML document to local URLs immediately after
              download of document. Default is this option disabled.

       -sel_to_local/-nosel_to_local
              This option forces pavuk to change all URLs, which accomplish conditions for  download,  to  local
              inside HTML document immediately after download of document.  I recommend to use this option, when
              you are sure, that transfer will be without any problems. This option can save a lot of  processor
              time.  Default is this option disabled.

       -all_to_remote/-noall_to_remote
              This  option forces pavuk to change all URLs inside HTML document to remote URLs immediately after
              download of document.  Default is this option disabled.

       -post_update/-nopost_update
              This option is especially designed to allow in -fnrules option doing rules based on MIME  type  of
              document.  This  option  forces pavuk to generate local names for documents just after pavuk knows
              what is the MIME type of document. This have big impact on the rewriting engine  of  links  inside
              HTML documents. This option causes disfunction of other options for controlling the link rewriting
              engine. Use this option only when you know what you are doing :-)

       -dont_touch_url_pattern $pat
              This options serves to deny rewriting and processing of particular URLs in HTML documents by pavuk
              HTML  rewriting  engine.  This  option accepts wildcard patterns to specify such URLs. Matching is
              done against untouched URLs so when he URL is relative, you must use  pattern  which  matches  the
              relative URL, when it is absolute, you must use absolute URL.

       -dont_touch_url_rpattern $pat
              This  option  is variation on previous option. This one uses regular patterns for matching of URLs
              instead of wildcard patterns used by -dont_touch_url_pattern option. This option is available only
              when pavuk is compiled with support for regular expression patterns.

       -dont_touch_tag_rpattern $pat
              This  option is variation on previous option, just matching is made on full HTML tag with included
              <>. This option accepts regular expression patterns. It is available only when pavuk  is  compiled
              with support for regular expression patterns.

Filename/URL Conversion Option

       -tr_del_chr $str
              All  characters  found  in  $str will be deleted from local name of document.  $str should contain
              escape sequences similar like in tr command:
              \n - newline
              \r - carriage return
              \t - horizontal tab space
              \0xXX - hexadecimal  ASCII value
              [:upper:] - all uppercase letters
              [:lower:] - all lowercase letters
              [:alpha:] - all letters
              [:alnum:] - all letters and digits
              [:digit:] - all digits
              [:xdigit:] - all hexadecimal digits
              [:space:] - all horizontal and vertical whitespace
              [:blank:] - all horizontal whitespace
              [:cntrl:] - all control characters
              [:print:] - all printable characters including space
              [:nprint:] - all non printable characters
              [:punct:] - all punctation characters
              [:graph:] - all printable characters excluding space

       -tr_str_str $str1 $str2
              String $str1 from local name of document will be replaced with $str2.

       -tr_chr_chr $chrset1 $chrset2
              Characters from $chrset1 from local name of document will be replaced with corresponding character
              from $chrset2. $charset1 and $charset2 should have same syntax as $str in -tr_del_chr option.

       -store_name $str
              When  you  want to change local filename of first file downloaded with singlepage mode, you should
              use this option.

       -index_name $str
              With this option you can change directory index name. As default is used _._.html .

       -store_index/-nostore_index
              With option -nostore_index you should deny storing of directory indexes into HTML files.

       -fnrules $t $m $r
              This is a very powerful option! This option is used to flexible change layout  of  local  document
              tree.  It  accepts  three  parameters.  First  parameter  $t is used to say what type is following
              pattern.  F is used for wildcard pattern (uses fnmatch()) and R is  used  for  regular  expression
              pattern  (using  any  supported  RE implementation).  Second parameter is matching pattern used to
              select URLs for this rule.  If URL match this pattern, then local name for this  URL  is  computed
              following  rules  of  third parameter.  And third parameter is local name building rule. Pavuk now
              supports two kinds of local name building rules. One is simple based only  on  simple  macros  and
              other   more  complicated  extended  rule,  which  also  enables  to  perform  several  functions.
              Recognition between those two kinds of rules is done by looking at first character  of  rule.   In
              case when first character is '(', rule is extended and in all other cases it is the simple kind of
              rule.

              Simple rule should contain literals or escaped macros.  Macros are escaped by % character or by  $
              character.

              Here is list of recognized macros:

              $x  -  where  x  is  any positive number. This macro is replaced with x-th substring matched by RE
              pattern. (If you use this you need to understand RE !)
              %i - is replaced with protocol id (http, https, ftp, gopher)
              %p - is replaced with password. (use this only when usable)
              %u - is replaced with username.
              %h - is replaced with host name.
              %m - is replaced with domain name.
              %r - is replaced with port number.
              %d - is replaced with path to document.
              %n - is replaced with document name.
              %b - is replaced with basename of document (without extension).
              %e - is replaced with extension.
              %s - is replaced with searchstring.
              %M - is replaced with MIME type of document. When you are using this macro, you  *must*  use  also
              -post_update option else it won't work.
              %E - is replaced with default extension assigned to MIME type of document. When you are using this
              macro, you *must* use also -post_update option else it won't work.
              %x - where x is positive number. This macro is replaced with x-th directory from path to  document
              from beginning.
              %-x - where x is positive number. This macro is replaced with x-th directory from path to document
              from end.

              Here is example. If you want place document into single directories by extension, you  should  use
              following fnrules option:
              -fnrules F '*' '/%e/%n'

              Extended rule ever begins with character ´('. It uses some kind of LISP like syntax.

              Here  are  base  rules for writing extended rules : - the local filename of of this kind is return
              value function
              - each function is enclosed inside round braces ()
              - first token right after opening brace is function name
              - each function have nonzero fixed number of parameters
              - each function returns numeric or string value
              - function parameters are separated by any number of space characters
              - parameter of function should be string, number, macro or other function
              - string is ever quoted with "
              - each numeric parameter can be in any encoding supported by strtod()  function  (octal,  decimal,
              hexadecimal, ...)
              - there is no implicit conversion from number to string
              - each macro is prefixed by % character and is one character long
              - each macro is replaced by its string representation from current URL
              - function parameters are typed strictly
              - toplevel function must return string value

              Extended  rule  supports  full  set  of  %  escaped  macros  supported with simple rules, plus two
              following addition macros :
              %U - URL string
              %o - default localname for URL

              Here is description of all supported functions

              sc - concat two string parameters
                 - accepts two string parameters
                 - returns string value
              ss - substring form string
                 - accepts three parameters.
                   - first is string from which we want to cut subpart
                   - second is number which represents starting position in string
                   - third is number which represents ending position in string
                 - returns string value
              hsh - compute modulo hash value from string with specified base
                 - accepts two parameters
                   - first is string for which we are computing the hash value
                   - second is numeric value for base of modulo hash
                 - returns numeric value
              md5 - compute MD5 checksum for string
                 - accepts one string value
                 - returns string which represents MD5 checksum
              lo - convert all characters inside string to lower case
                 - accepts ane string value
                 - returns string value
              up - convert all characters inside string to upper case
                 - accepts one string value
                 - returns string value
              ue - encode unsafe characters in string with same encoding  which  is  used  for  encoding  unsafe
              characters inside URL (%xx) As default are encoded all nonascii values when this function is used.
                 - accepts two string values
                   - first is string which we want to encode
                   - second is string which contains unsafe characters
                 - return string value
              dc - delete unwanted characters from string (have similar functionality as -tr_del_chr option)
                 - accepts two string values
                   - first is string from which we want delete
                   - second is string which contains characters we want to delete.
                 - returns string value
              tc  -  replace character with other character in string (have similar functionality as -tr_chr_chr
              option)
                 - accepts three string values
                   - first is string inside which we want to replace characters
                   - second is set of characters which we want to replace
                   - third is set of characters with which we are replacing
                 - returns string value
              ts - replace some string inside string with  any  other  string  (have  similar  functionality  as
              -tr_str_str option)
                 - accepts three string values
                   - first is string inside which we want to replace string
                   - second is the from string
                   - third is to string
                 - returns string value
              spn  -  calculate initial length of string which contains only specified set of characters.  (have
              same functionality as strspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of acceptable characters
                 - returns numeric value
              cspn - calculate initial length of string which  doesn't  contain  specified  set  of  characters.
              (have same functionality as strcspn() libc function)
                 - accepts two string values
                   - first is input string
                   - second is set of unacceptable characters
                 - returns numeric value
              sl - calculate length of string
                 - accepts one string value
                 - returns numeric value
              ns - convert number to string by format
                 - accepts two parameters
                   - first parameter is format string same as for printf() function
                   - second is number which we want to convert
                 - returns string value
              lc - return position of last occurrence of specified character inside string
                 - accepts two string parameters
                   - first string which we are searching in
                   - second string contains character for which we are looking for
                 - returns numeric value
              + - add two numeric values
                 - accepts two numeric values
                 - returns numeric value
              - - subtract two numeric values
                 - accepts two numeric values
                 - returns numeric value
              % - modulo addition
                 - accepts two numeric values
                 - returns numeric value
              * - multiple two numeric values
                 - accepts two numeric values
                 - returns numeric value
              / - divide two numeric values
                 - accepts two numeric values
                 - returns numeric value
              rmpar - remove parameter from query string
                - accepts two string
                  - first string is string which we are adjusting
                  - second parameter is name of parameter which should be removed
                - returns adjusted string
              getval - get query string parameter value
                - accepts two string
                  - first string is query string from which to get the parameter
                    value (usually %s)
                  - second string is name of parameter for which we want to get
                    the value
                - returns value of the parameter or empty string when the parameter
                  doesn't exists
              sif - logical decision
                - accepts three parameters
                  - first is numeric and when is zero than result of this decision
                    is result of second parameter, else result is result of third
                    parameter
                  - second parameter is string
                  - third parameter is string
                - returns string result of decision
              ! - logical not
                - accepts one numeric parameter
                - returns negation of parameter
              & - logical and
                - accept two numeric parameters
                - returns logical and of parameters
              | - logical or
                - accept two numeric parameters
                - returns logical or of parameters
              getext - get file extension
                - accept one sting (filename or path)
                - return string containing extension of parameter
              seq - compare two strings
                - accepts two strings for comparison
                - returns numeric value 0 - if different 1 - if equal
              jsf - execute JavaScript function
                - accepts one string parameter which holds name of
                  JavaScript function specified in script loaded with
                  -js_script_file option.
                - returns string value equal to return value of
                  JavaScript function
                - this function is available only when pavuk is compiled
                  with support for JavaScript bindings

              For  example,  if  you are mirroring very huge number of internet sites into same local directory,
              too much entries in one directory, should cause performance problems. You may use for example  hsh
              or  md5  functions to generate one additional level of hash directories based on hostname whit one
              of following options :

              -fnrules F '*' '(sc (nc "%02d/" (hsh %h 100)) %o)'
              -fnrules F '*' '(sc (ss (md5 %h) 0 2) %o)'

       -base_level $nr
              Number of directory levels to omit in local tree.

              For example when downloading URL  ftp://ftp.idata.sk/pub/unix/www/pavuk-0.7pl1.tgz  you  enter  at
              command   line   -base_level   4   in   local   tree  will  be  created  www/pavuk-0.7pl1.tgz  not
              ftp/ftp.idata.sk_21/pub/unix/www/pavuk-0.7pl1.tgz as normally.

       -default_prefix $str
              Default prefix of mirrored directory. This option is used only when you are trying to  synchronize
              content  of  remote  directory  which  was  downloaded using -base_level option. Also you must use
              directory based synchronization method, not URL based synchronization method. This  is  especially
              useful, when used in conjunction with -remove_old option.

       -remove_adv/-noremove_adv
              This  option  is  used for turn on/off of removing HTML tags which contains advertisement banners.
              The banners are not removed from HTML file, but are commented out.  Such URLs  also  will  not  be
              downloaded.   This  option have effect only when used with option -adv_re.  Default is turned off.
              This option is available only  when  your  system  have  support  for  one  of  supported  regular
              expressions implementation.

       -adv_re $RE
              This  option  is  used  to specify regular expressions for matching URLs of advertisement banners.
              For example : -adv_re http://ad.doubleclick.net/.*   is  used  to  match  all  files  from  server
              ad.doubleclick.net.   This  option  is  available only when your system have any supported regular
              expressions implementation.

       -unique_name/-nounique_name
              Pavuk as default always attempts to assign to unique URL unique local filename.  If this  behavior
              is not wanted, you can use option -nounique_name to disable this.

Other Options

       -sleep $nr
              This  option  allows  you  to  specify number of seconds during that the program will be suspended
              between two transfers. Useful to deny server overload.  Default value for this option is 0.

       -rsleep/-norsleep
              When this option is active, pavuk randomizes the the sleep  time  between  transfers  in  interval
              between zero and value specified with -sleep option. Default is this option inactive.

       -ddays $nr
              If  document has modification time later as $nr days, then in sync mode pavuk attempts to retrieve
              newer copy of document from remote server. Default value is 0.

       -remove_old/-noremove_old
              Remove improper documents (that, which doesn't exist on remote site).   This  option  have  effect
              only  when  used in directory based sync mode.  When used with URL based sync mode, pavuk will not
              remove any old files which were excluded from document tree and are not  referenced  in  any  HTML
              document.   You  must  also  use  option -subdir, to let pavuk find files which belongs to current
              mirror.  As default pavuk won't remove any old files.

       -browser $str
              is used to set your browser command (in URL tree dialog you can use right  click  to  raise  menu,
              from  which  you  can start browser on actually selected URL).  This option is available only when
              compiled with GTK GUI and with support for URL tree preview.

       -debug/-nodebug
              turns on displaying of debug messages. This option is available only when compiled  with  -DDEBUG.
              If  -debug  option  is  used pavuk will output verbose information about documents, whole protocol
              level information, locking informations and more (depends on -debug_level setup). This options  is
              used  just  like  trigger  to  enable  output  of  debug messages selected by -debug_level option.
              Default is debug mode turned off.

       -debug_level $level
              Set level of required debug informations. $level can be numeric value which represent binary  mask
              for  requested  debug  levels, or comma separated list of supported debug levels.  Currently pavuk
              supports following debug levels :
              html - for HTML parser debugging
              protos - to see server side protocol messages
              protoc - to see client side protocol messages
              procs - to see some special procedure calls
              locks - for debugging of documents locking
              net - for debugging some low level network stuff
              misc - for miscellaneous unsorted debug messages
              user - for verbose user level messages
              all - request all currently supported debug levels
              mtlock - locking of resources in multithreading environment
              mtthr - launching/weaking/sleeping/stoping of threads in multithreaded environment
              protod - for DEBUGGING of POST requests
              limits - for debugging limiting options, you will see the reason why particular URLs are  rejected
              by pavuk and which option caused this.
              ssl - to enable verbose reporting about SSL related things.

       -remind_cmd $str
              This  option  have effect only when running pavuk in reminder mode. To command specified with this
              option pavuk sends result of running reminder mode.  There are listed URLs which are  changed  and
              URLs  which  have  any  errors.   Default remind command is "mailx user@server -s \"pavuk reminder
              result\"" .

       -nscache_dir $dir
              Path to Netscape browser cache directory. If you specify this path, pavuk attempts to find out  if
              you  have  URL in this cache.  If URL is there it will be fetched else pavuk will download it from
              net. The cache directory index file must be named index.db  and  must  be  located  in  the  cache
              directory.  To support this feature, pavuk have to be linked with BerkeleyDB 1.8x .

       -mozcache_dir $dir
              Path  to  Mozilla  browser  cache  directory. Same functionality as with previous option, just for
              different browser with different cache formats.  Pavuk supports both formats  of  Mozilla  browser
              disk  cache  (old  for  versions <0.9 and new used in 0.9=<).  The old format cache directory must
              contain cache directory index database with name cache.db. Then new format  cache  directory  must
              contain  map  file  _CACHE_MAP_,  and three block files _CACHE_001_, _CACHE_002_, _CACHE_003_.  To
              support old Mozilla cache format, pavuk have to be linked with BerkeleyDB 1.8x. New Mozilla  cache
              format doesn't require any external library.

       -post_cmd $str
              Post-processing  command,  which  will  be  executed  after successful download of document.  This
              command may somehow handle with document. During time of running this command, pavuk leaves actual
              document  locked,  so there isn't chance that some other pavuk process will modify document.  This
              postprocessing command will get three additional parameters from pavuk.
                 - local name of document
                 - 1/0 1 if document is HTML document, 0 if not
                 - original URL of this document

       -hack_add_index/-nohack_add_index
              This is bit hacky option. It forces pavuk to add to URL queue also directory indexes of all queued
              documents.  This  allow  pavuk  to  download  more documents from site, than it is able achieve in
              normal traversing of HTML documents.  Bit dirty but useful in some cases.

       -js_script_file $file
              Pavuk have optionally builtin JavaScript interpreter to allow high  level  customization  of  some
              internal procedures. Currently you are allowed to customize with your own JavaScript functions two
              things. You can use it to set precise limiting options, or you can write own functions  which  can
              be  used  inside  rules  of -fnrules option.  With this option you can load JavaScript script with
              functions into pavuks internal JavaScript interpreter. To learn more about this capabilities  read
              separate document jsbind.txt which comes with pavuk sources in toplevel directory.  This option is
              available only when you have compiled pavuk with support for JavaScript bindings.

EXIT STATUS

       As of version 0.9pl29 pavuk have changed indication of status by exit codes.  In  earlier  versions  exit
       status  0  was for no error and nonzero exit status was something like count of failed documents.  In all
       version after 0.0pl29 there are defined following exit codes:

           0 - no error, everything is OK
           1 - error in configuration of pavuk options or
               error in config files
           2 - some error occurred while downloading documents

ENVIRONMENTAL VARIABLES

       USER   variable is used to construct email address from user and hostname

       LC_* or LANG
              used to set internationalized environment

       PAVUKRC_FILE
              with this variable you can specify alternative location for your pavukrc configuration file.

REQUIRED EXTERNAL PROGRAMS

       at     is used for scheduling.

       gunzip is used to decode gzip or compress encoded documents.

Bugs

       If you find any, please let me know.

FILES

       @SYSCONFDIR@/pavukrc

       ~/.pavukrc

       ~/.pavuk_prefs

              These files are used as default configuration files.  You may specify there some  constant  values
              like  your  proxy server or your preferred WWW browser. Configuration options reflect command line
              options.  Not all parameters are suitable for use  in  default  configuration  file.   You  should
              select only some of them, which you really need.

              File  ~/.pavuk_prefs is special file which contains automatically stored configuration.  This file
              is used only when running GUI interface of pavuk and option -prefs is active.

              First (if present)  parsed  file  is  @SYSCONFDIR@/pavukrc  then  ~/.pavukrc  (if  present),  then
              ~/.pavuk_prefs (if present).  Last the command line is parsed. The precedence is as follows :

              - highest -
              Entered in user interface
              Entered in command line
              ~/.pavuk_prefs
              ~/.pavukrc
              @SYSCONFDIR@/pavukrc
              - lowest -

              Here is table of config file - command line options pairs.

              MaxLevel:                  --->  -lmax
              MaxDocs:                   --->  -dmax
              MaxSize:                   --->  -maxsize
              MinSize:                   --->  -minsize
              SleepBetween:              --->  -sleep
              MaxRetry:                  --->  -retry
              MaxRegets:                 --->  -nregets
              MaxRedirections:           --->  -nredirs
              CommTimeout:               --->  -timeout
              RegetRollbackAmount:       --->  -rollback
              DocExpiration:             --->  -ddays
              UseCache:                  --->  -nocache
              UseRobots:                 --->  -noRobots
              AllowFTP:                  --->  -noFTP
              AllowHTTP:                 --->  -noHTTP
              AllowSSL:                  --->  -noSSL
              AllowGopher:               --->  -noGopher
              AllowCGI:                  --->  -noCGI
              AllowGZEncoding:           --->  -noEnc
              AllowFTPRecursion:         --->  -FTPdir
              ForceReget:                --->  -force_reget
              Debug:                     --->  -debug
              AllowedSites:              --->  -asite
              DisallowedSites:           --->  -dsite
              AllowedDomains:            --->  -adomain
              DisallowedDomains:         --->  -ddomain
              AllowedPrefixes:           --->  -aprefix
              DisallowedPrefixes:        --->  -dprefix
              AllowedSuffixes:           --->  -asfx
              DisallowedSuffixes:        --->  -dsfx
              AllowedMIMETypes:          --->  -amimet
              DisallowedMIMETypes:       --->  -dmimet
              PreferredLanguages:        --->  -alang
              PreferredCharset:          --->  -acharset
              WorkingDir:                --->  -cdir
              WorkingSubDir:             --->  -subdir
              HTTPAuthorizationScheme:   --->  -auth_scheme
              HTTPAuthorizationName:     --->  -auth_name
              HTTPAuthorizationPassword: --->  -auth_passwd
              AuthReuseDigestNonce:      --->  -auth_reuse_nonce
              SSLCertPassword:           --->  -ssl_cert_passwd
              SSLCertFile:               --->  -ssl_cert_file
              SSLKeyFile:                --->  -ssl_key_file
              EmailAddress:              --->  -from
              MatchPattern:              --->  -pattern
              REMatchPattern:            --->  -rpattern
              SkipMatchPattern:          --->  -skip_pattern
              SkipREMatchPattern:        --->  -skip_rpattern
              URLMatchPattern:           --->  -url_pattern
              URLREMatchPattern:         --->  -url_rpattern
              SkipURLMatchPattern:       --->  -skip_url_pattern
              SkipURLREMatchPattern:     --->  -skip_url_rpattern
              DefaultMode:               --->  -mode
              FTPProxy:                  --->  -ftp_proxy
              HTTPProxy:                 --->  -http_proxy
              SSLProxy:                  --->  -ssl_proxy
              GopherProxy:               --->  -gopher_proxy
              FTPViaHTTPProxy:           --->  -ftp_httpgw
              GopherViaHTTPProxy:        --->  -gopher_httpgw
              HTTPProxyUser:             --->  -http_proxy_user
              HTTPProxyPass:             --->  -http_proxy_pass
              HTTPProxyAuth:             --->  -http_proxy_auth
              AuthReuseProxyDigestNonce: --->  -auth_reuse_proxy_nonce
              Browser:                   --->  -browser
              ScenarioDir:               --->  -scndir
              ShowProgress:              --->  -progress
              XMaxLogSize:               --->  -xmaxlog
              LogFile:                   --->  -logfile
              RemoveOldDocuments:        --->  -remove_old
              AuthFile:                  --->  -auth_file
              BaseLevel:                 --->  -base_level
              FTPDirtyProxy:             --->  -ftp_dirtyproxy
              ActiveFTPData:             --->  -ftp_active/-ftp_passive
              ActiveFTPPortRange:        --->  -active_ftp_port_range
              AlwaysMDTM:                --->  -always_mdtm/-noalways_mdtm
              RemoveBeforeStore:         --->  -(no)remove_before_store
              ShowDownloadTime:          --->  -stime
              NLSMessageCatalogDir:      --->  -msgcat
              Quiet:                     --->  -quiet/-verbose
              NewerThan:                 --->  -newer_than
              OlderThan:                 --->  -older_than
              Reschedule:                --->  -reschedule
              DontLeaveSite:             --->  -dont_leave_site/-leave_site
              DontLeaveDir:              --->  -dont_leave_dir/-leave_dir
              PreserveTime:              --->  -preserve_time/-nopreserve_time
              LeaveLevel:                --->  -leave_level
              GUIFont:                   --->  -gui_font
              UserCondition:             --->  -user_condition
              CookieFile:                --->  -cookie_file
              CookieSend:                --->  -cookie_send/-nocookie_send
              CookieRecv:                --->  -cookie_recv/-nocookie_recv
              CookieUpdate:              --->  -cookie_update/-nocookie_update
              CookiesMax:                --->  -cookies_max
              CookieCheckDomain:         --->  -cookie_check/-nocookie_check
              DisabledCookieDomains:     --->  -disabled_cookie_domains
              DisableHTMLTag:            --->  -disable_html_tag
              EnableHTMLTag:             --->  -enable_html_tag
              TrDeleteChar:              --->  -tr_del_chr
              TrStrToStr:                --->  -tr_str_str
              TrChrToChr:                --->  -tr_chr_chr
              IndexName:                 --->  -index_name
              StoreName:                 --->  -store_name
              PreservePermisions:        --->  -preserve_perm/-nopreserve_perm
              PreserveAbsoluteSymlinks:  --->  -preserve_slinks/-nopreserve_slinks
              FTPListCMD:                --->  -FTPlist/-noFTPlist
              MaxRate:                   --->  -maxrate
              MinRate:                   --->  -minrate
              ReadBufferSize:            --->  -bufsize
              BgMode:                    --->  -bg/-nobg
              CheckSize:                 --->  -check_size/-nocheck_size
              SLogFile:                  --->  -slogfile
              Identity:                  --->  -identity
              SendFromHeader:            --->  -send_from/-nosend_from
              RunX:                      --->  -runX
              FnameRules:                --->  -fnrules
              StoreDocInfoFiles:         --->  -store_info/-nostore_info
              AllLinksToLocal:           --->  -all_to_local/-noall_to_local
              AllLinksToRemote:          --->  -all_to_remote/-noall_to_remote
              SelectedLinksToLocal:      --->  -sel_to_local/-nosel_to_local
              ReminderCMD:               --->  -remind_cmd
              AutoReferer:               --->  -auto_referer/-noauto_referer
              URLsFile:                  --->  -urls_file
              UsePreferences:            --->  -prefs/-noprefs
              FTPhtml:                   --->  -FTPhtml/-noFTPhtml
              StoreDirIndexFile:         --->  -store_index/-nostore_index
              Language:                  --->  -language
              FileSizeQuota:             --->  -file_quota
              TransferQuota:             --->  -trans_quota
              FSQuota:                   --->  -fs_quota
              EnableJS:                  --->  -enable_js/-disable_js
              UrlSchedulingStrategy:     --->  -url_strategy
              NetscapeCacheDir:          --->  -nscache_dir
              RemoveAdvertisement:       --->  -remove_adv/-noremove_adv
              AdvBannerRE:               --->  -adv_re
              CheckIfRunnigAtBackground: --->  -check_bg/-nocheck_bg
              SendIfRange:               --->  -send_if_range/-nosend_if_range
              SchedulingCommand:         --->  -sched_cmd
              UniqueLogName:             --->  -unique_log/-nounique_log
              PostCommand:               --->  -post_cmd
              SSLVersion:                --->  -ssl_version
              UniqueSSLID:               --->  -unique_sslid/-nounique_sslid
              AddHTTPHeader:             --->  -httpad
              StatisticsFile:            --->  -statfile
              WaitOnExit:                --->  -ewait
              AllowedIPAdrressPattern:   --->  -aip_pattern
              DisallowedIPAdrressPattern:--->  -dip_pattern
              SiteLevel:                 --->  -site_level
              UseHTTP11:                 --->  -use_http11
              MaxRunTime:                --->  -max_time
              LocalIP:                   --->  -local_ip
              RequestInfo:               --->  -request
              HashSize:                  --->  -hash_size
              NumberOfThreads:           --->  -nthreads
              ImmediateMessages:         --->  -immesg/-noimmsg
              HTMLFormData:              --->  -formdata
              DumpFD:                    --->  -dumpfd
              DumpUrlFD:                 --->  -dump_urlfd
              DeleteAfterTransfer:       --->  -del_after/-nodel_after
              UniqueDocName:             --->  -unique_name/-nounique_name
              LeaveSiteEnterDirectory:   --->  -leave_site_enter_dir/-dont_leave_site_enter_dir
              SinglePage:                --->  -singlepage/-nosinglepage
              NTLMAuthorizationDomain:   --->  -auth_ntlm_domain
              NTLMProxyAuthorizationDomain:
                                         --->  -auth_proxy_ntlm_domain
              JavascriptPattern:         --->  -js_pattern
              FollowCommand:             --->  -follow_cmd
              RetrieveSymlinks:          --->  -retrieve_symlink/-noretrieve_symlink
              JSTransform:               --->  -js_transform
              JSTransform2:              --->  -js_transform2
              FTPProxyUser:              --->  -ftp_proxy_user
              FTPProxyPassword:          --->  -ftp_proxy_pass
              LimitInlineObjects:        --->  -limit_inlines/-dont_limit_inlines
              FTPListOptions:            --->  -ftp_list_options
              FixWuFTPDBrokenLISTcmd:    --->  -fix_wuftpd_list/-nofix_wuftpd_list
              PostUpdate:                --->  -post_update/-nopost_update
              SeparateInfoDir:           --->  -info_dir
              MozillaCacheDir:           --->  -mozcache_dir
              AllowedPorts:              --->  -aport
              DisallowedPorts:           --->  -dport
              HackAddIndex:              --->  -hack_add_index/-nohack_add_index
              JavaScriptFile:            --->  -js_script_file
              FtpLoginHandshake:         --->  -ftp_login_handshake
              NSSCertDir:                --->  -nss_cert_dir
              NSSAcceptUnknownCert:      --->  -nss_accept_unknown_cert/-nonss_accept_unknown_cert
              NSSDomesticPolicy:         --->  -nss_domestic_policy/-nss_export_policy
              DontTouchUrlREPattern:     --->  -dont_touch_url_rpattern
              DontTouchUrlPattern:       --->  -dont_touch_url_pattern
              DontTouchTagREPattern:     --->  -dont_touch_tag_rpattern
              HTMLTagPattern:            --->  -tag_pattern
              HTMLTagREPattern:          --->  -tag_rpattern
              URL:                       --->  one URL (more lines with URL:
                                               ... means more URL's)

       line which begins with '#' means comment.
       TrStrToStr:  and  TrChrToChr: must contain two quoted strings.  All parameter names are case insensitive.
       If here is missing any option, try to look inside config.c source file.

       See pavukrc.sample file for example

       .pavuk_authinfo

              File should contain as many authentification records as you need.  Records are  separated  by  any
              number of empty lines.  Parameter name is case insensitive.

              Structure of record:

              Proto: <proto ID>    ---> identification of protocol
                                        (ftp/http/https/..)
                                   - required field
              Host: <host:[port]>  ---> host name
                                   - required field
              User: <user>         ---> name of user
                                   - optional
              Pass: <password>     ---> password for user
                                   - optional
              Base: <path>         ---> base prefix of document path
                                   - optional
              Realm: <name>        ---> realm for HTTP authorization
                                   - optional
              NTLMDomain: <domain> ---> NT/LM domain for NTLM authorization
                                   - optional
              Type: <type>         ---> HTTP authentification scheme
                                             - 1/user   - user auth scheme
                                             - 2/Basic  - Basic auth scheme (default)
                                             - 3/Digest - Digest auth scheme
                                             - 4/NTLM   - NTLM auth scheme
                                   - optional

       see pavuk_authinfo.sample file for example

       ~/.pavuk_keys
              this  is  file  where  are  stored  information about configurable menu option shortcuts.  This is
              available only when compiled with Gtk+1.2 and higher.

       ~/.pavuk_remind_db
              this file contains informations about URLs for running in reminder mode. Structure of this file is
              very  easy.  Each  line  contains  information  abou  one  URL.  first entry in line is last known
              modification time of URL (stored in time_t format - number of  secons  from  1.1.1970  GMT).   And
              second entry is URL.

EXAMPLE COMMAND LINE

       pavuk -mode mirror -nobg -store_info -info_dir
       /mirror/info -nthreads 1 -cdir /mirror/incoming -subdir
       /mirror/incoming -preserve_time -nopreserve_perm
       -nopreserve_slinks -noretrieve_symlink -force_reget
       -noRobots -trans_quota 16384 -maxsize 16777216
       -max_time 28 -nodel_after -remove_before_store -ftpdir
       -ftplist -ftp_list_options -a -dont_leave_site
       -dont_leave_dir -all_to_local -remove_old -nostore_index
       -active_ftp_port_range 57344:65535 -always_mdtm
       -ftp_passive -base_level 2 http://<my_host>/doc/

SEE ALSO

       Look into ChangeLog file for more informations about new features in particular versions of pavuk.

AUTHOR

       Main development Ondrejicka Stefan
       Look into CREDITS file of sources for additional information.

AVAILABILITY

       pavuk is available from http://pavuk.sourceforge.net/