libdap  Updated for version 3.20.3
libdap4 is an implementation of OPeNDAP's DAP protocol.
HTTPConnect.cc
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 #ifdef HAVE_UNISTD_H
30 #include <unistd.h>
31 #endif
32 
33 #include <sys/stat.h>
34 
35 #ifdef WIN32
36 #include <io.h>
37 #endif
38 
39 #include <string>
40 #include <vector>
41 #include <functional>
42 #include <algorithm>
43 #include <sstream>
44 #include <fstream>
45 #include <iterator>
46 #include <cstdlib>
47 #include <cstring>
48 #include <cerrno>
49 
50 //#define DODS_DEBUG2
51 //#define HTTP_TRACE
52 //#define DODS_DEBUG
53 
54 #undef USE_GETENV
55 
56 
57 #include "debug.h"
58 #include "mime_util.h"
59 #include "media_types.h"
60 #include "GNURegex.h"
61 #include "HTTPCache.h"
62 #include "HTTPConnect.h"
63 #include "RCReader.h"
64 #include "HTTPResponse.h"
65 #include "HTTPCacheResponse.h"
66 
67 using namespace std;
68 
69 namespace libdap {
70 
71 // These global variables are not MT-Safe, but I'm leaving them as is because
72 // they are used only for debugging (set them in a debugger like gdb or ddd).
73 // They are not static because I think that many debuggers cannot access
74 // static variables. 08/07/02 jhrg
75 
76 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
77 int www_trace = 0;
78 
79 // Keep the temporary files; useful for debugging.
80 int dods_keep_temps = 0;
81 
82 #define CLIENT_ERR_MIN 400
83 #define CLIENT_ERR_MAX 417
84 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
85  {
86  "Bad Request:",
87  "Unauthorized: Contact the server administrator.",
88  "Payment Required.",
89  "Forbidden: Contact the server administrator.",
90  "Not Found: The data source or server could not be found.\n\
91  Often this means that the OPeNDAP server is missing or needs attention.\n\
92  Please contact the server administrator.",
93  "Method Not Allowed.",
94  "Not Acceptable.",
95  "Proxy Authentication Required.",
96  "Request Time-out.",
97  "Conflict.",
98  "Gone:.",
99  "Length Required.",
100  "Precondition Failed.",
101  "Request Entity Too Large.",
102  "Request URI Too Large.",
103  "Unsupported Media Type.",
104  "Requested Range Not Satisfiable.",
105  "Expectation Failed."
106  };
107 
108 #define SERVER_ERR_MIN 500
109 #define SERVER_ERR_MAX 505
110 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
111  {
112  "Internal Server Error.",
113  "Not Implemented.",
114  "Bad Gateway.",
115  "Service Unavailable.",
116  "Gateway Time-out.",
117  "HTTP Version Not Supported."
118  };
119 
122 static string
123 http_status_to_string(int status)
124 {
125  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
126  return string(http_client_errors[status - CLIENT_ERR_MIN]);
127  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
128  return string(http_server_errors[status - SERVER_ERR_MIN]);
129  else
130  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
131 }
132 
133 static ObjectType
134 determine_object_type(const string &header_value)
135 {
136  // DAP4 Data: application/vnd.opendap.dap4.data
137  // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
138 
139  string::size_type plus = header_value.find('+');
140  string base_type;
141  string type_extension = "";
142  if (plus != string::npos) {
143  base_type= header_value.substr(0, plus);
144  type_extension = header_value.substr(plus+1);
145  }
146  else
147  base_type = header_value;
148 
149  if (base_type == DMR_Content_Type
150  || (base_type.find("application/") != string::npos
151  && base_type.find("dap4.dataset-metadata") != string::npos)) {
152  if (type_extension == "xml")
153  return dap4_dmr;
154  else
155  return unknown_type;
156  }
157  else if (base_type == DAP4_DATA_Content_Type
158  || (base_type.find("application/") != string::npos
159  && base_type.find("dap4.data") != string::npos)) {
160  return dap4_data;
161  }
162  else if (header_value.find("text/html") != string::npos) {
163  return web_error;
164  }
165  else
166  return unknown_type;
167 }
168 
173 class ParseHeader : public unary_function<const string &, void>
174 {
175  ObjectType type; // What type of object is in the stream?
176  string server; // Server's version string.
177  string protocol; // Server's protocol version.
178  string location; // Url returned by server
179 
180 public:
181  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
182  { }
183 
184  void operator()(const string &line)
185  {
186  string name, value;
187  parse_mime_header(line, name, value);
188 
189  DBG2(cerr << name << ": " << value << endl);
190 
191  // Content-Type is used to determine the content of DAP4 responses, but allow the
192  // Content-Description header to override CT o preserve operation with DAP2 servers.
193  // jhrg 11/12/13
194  if (type == unknown_type && name == "content-type") {
195  type = determine_object_type(value); // see above
196  }
197  if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
198  type = get_description_type(value); // defined in mime_util.cc
199  }
200  // The second test (== "dods/0.0") tests if xopendap-server has already
201  // been seen. If so, use that header in preference to the old
202  // XDODS-Server header. jhrg 2/7/06
203  else if (name == "xdods-server" && server == "dods/0.0") {
204  server = value;
205  }
206  else if (name == "xopendap-server") {
207  server = value;
208  }
209  else if (name == "xdap") {
210  protocol = value;
211  }
212  else if (server == "dods/0.0" && name == "server") {
213  server = value;
214  }
215  else if (name == "location") {
216  location = value;
217  }
218  }
219 
220  ObjectType get_object_type()
221  {
222  return type;
223  }
224 
225  string get_server()
226  {
227  return server;
228  }
229 
230  string get_protocol()
231  {
232  return protocol;
233  }
234 
235  string get_location() {
236  return location;
237  }
238 };
239 
255 static size_t
256 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
257 {
258  DBG2(cerr << "Inside the header parser." << endl);
259  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
260 
261  // Grab the header, minus the trailing newline. Or \r\n pair.
262  string complete_line;
263  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
264  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
265  else
266  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
267 
268  // Store all non-empty headers that are not HTTP status codes
269  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
270  DBG(cerr << "Header line: " << complete_line << endl);
271  hdrs->push_back(complete_line);
272  }
273 
274  return size * nmemb;
275 }
276 
278 static int
279 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
280 {
281  string message(msg, size);
282 
283  switch (info) {
284  case CURLINFO_TEXT:
285  cerr << "Text: " << message; break;
286  case CURLINFO_HEADER_IN:
287  cerr << "Header in: " << message; break;
288  case CURLINFO_HEADER_OUT:
289  cerr << "Header out: " << message; break;
290  case CURLINFO_DATA_IN:
291  cerr << "Data in: " << message; break;
292  case CURLINFO_DATA_OUT:
293  cerr << "Data out: " << message; break;
294  case CURLINFO_END:
295  cerr << "End: " << message; break;
296 #ifdef CURLINFO_SSL_DATA_IN
297  case CURLINFO_SSL_DATA_IN:
298  cerr << "SSL Data in: " << message; break;
299 #endif
300 #ifdef CURLINFO_SSL_DATA_OUT
301  case CURLINFO_SSL_DATA_OUT:
302  cerr << "SSL Data out: " << message; break;
303 #endif
304  default:
305  cerr << "Curl info: " << message; break;
306  }
307  return 0;
308 }
309 
313 void
314 HTTPConnect::www_lib_init()
315 {
316  d_curl = curl_easy_init();
317  if (!d_curl)
318  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
319 
320  // Now set options that will remain constant for the duration of this
321  // CURL object.
322 
323  // Set the proxy host.
324  if (!d_rcr->get_proxy_server_host().empty()) {
325  DBG(cerr << "Setting up a proxy server." << endl);
326  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
327  << endl);
328  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
329  << endl);
330  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
331  << endl);
332  curl_easy_setopt(d_curl, CURLOPT_PROXY,
333  d_rcr->get_proxy_server_host().c_str());
334  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
335  d_rcr->get_proxy_server_port());
336 
337  // As of 4/21/08 only NTLM, Digest and Basic work.
338 #ifdef CURLOPT_PROXYAUTH
339  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
340 #endif
341 
342  // Password might not be required. 06/21/04 jhrg
343  if (!d_rcr->get_proxy_server_userpw().empty())
344  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
345  d_rcr->get_proxy_server_userpw().c_str());
346  }
347 
348  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
349  // We have to set FailOnError to false for any of the non-Basic
350  // authentication schemes to work. 07/28/03 jhrg
351  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
352 
353  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
354  // choosing the the 'safest' one supported by the server.
355  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
356  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
357 
358  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
359  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
360  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
361  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
362  // param of save_raw_http_headers to a vector<string> object.
363 
364  // Follow 302 (redirect) responses
365  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
366  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
367 
368  // If the user turns off SSL validation...
369  if (d_rcr->get_validate_ssl() == 0) {
370  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
371  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
372  }
373 
374  // Look to see if cookies are turned on in the .dodsrc file. If so,
375  // activate here. We honor 'session cookies' (cookies without an
376  // expiration date) here so that session-based SSO systems will work as
377  // expected.
378  if (!d_cookie_jar.empty()) {
379  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
380  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
381  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
382  }
383 
384  if (www_trace) {
385  cerr << "Curl version: " << curl_version() << endl;
386  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
387  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
388  }
389 }
390 
394 class BuildHeaders : public unary_function<const string &, void>
395 {
396  struct curl_slist *d_cl;
397 
398 public:
399  BuildHeaders() : d_cl(0)
400  {}
401 
402  void operator()(const string &header)
403  {
404  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
405  << endl);
406  d_cl = curl_slist_append(d_cl, header.c_str());
407  }
408 
409  struct curl_slist *get_headers()
410  {
411  return d_cl;
412  }
413 };
414 
429 long
430 HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
431 {
432  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
433 
434 #ifdef WIN32
435  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
436  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
437  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
438  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
439  // this issue is that one should not pass a FILE * to a windows DLL. Close
440  // inspection of libcurl yields that their default write function when using
441  // the CURLOPT_WRITEDATA is just "fwrite".
442  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
443  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
444 #else
445  curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
446 #endif
447 
448  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
449  ostream_iterator<string>(cerr, "\n")));
450 
451  BuildHeaders req_hdrs;
452  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
453  req_hdrs);
454  if (headers)
455  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
456 
457  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
458 
459  // Turn off the proxy for this URL?
460  bool temporary_proxy = false;
461  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
462  DBG(cerr << "Suppress proxy for url: " << url << endl);
463  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
464  }
465 
466  string::size_type at_sign = url.find('@');
467  // Assume username:password present *and* assume it's an HTTP URL; it *is*
468  // HTTPConnect, after all. 7 is position after "http://"; the second arg
469  // to substr() is the sub string length.
470  if (at_sign != url.npos)
471  d_upstring = url.substr(7, at_sign - 7);
472 
473  if (!d_upstring.empty())
474  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
475 
476  // Pass save_raw_http_headers() a pointer to the vector<string> where the
477  // response headers may be stored. Callers can use the resp_hdrs
478  // value/result parameter to get the raw response header information .
479  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
480 
481  // This is the call that causes curl to go and get the remote resource and "write it down"
482  // utilizing the configuration state that has been previously conditioned by various perturbations
483  // of calls to curl_easy_setopt().
484  CURLcode res = curl_easy_perform(d_curl);
485 
486  // Free the header list and null the value in d_curl.
487  curl_slist_free_all(req_hdrs.get_headers());
488  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
489 
490  // Reset the proxy?
491  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
492  curl_easy_setopt(d_curl, CURLOPT_PROXY,
493  d_rcr->get_proxy_server_host().c_str());
494 
495  if (res != 0)
496  throw Error(d_error_buffer);
497 
498  long status;
499  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
500  if (res != 0)
501  throw Error(d_error_buffer);
502 
503  char *ct_ptr = 0;
504  res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
505  if (res == CURLE_OK && ct_ptr)
506  d_content_type = ct_ptr;
507  else
508  d_content_type = "";
509 
510  return status;
511 }
512 
516 bool
517 HTTPConnect::url_uses_proxy_for(const string &url)
518 {
519  if (d_rcr->is_proxy_for_used()) {
520  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
521  int index = 0, matchlen;
522  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
523  }
524 
525  return false;
526 }
527 
531 bool
532 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
533 {
534  return d_rcr->is_no_proxy_for_used()
535  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
536 }
537 
538 // Public methods. Mostly...
539 
546 HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
547  d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
548 
549 {
550  d_accept_deflate = rcr->get_deflate();
551  d_rcr = rcr;
552 
553  // Load in the default headers to send with a request. The empty Pragma
554  // headers overrides libcurl's default Pragma: no-cache header (which
555  // will disable caching by Squid, et c.). The User-Agent header helps
556  // make server logs more readable. 05/05/03 jhrg
557  d_request_headers.push_back(string("Pragma:"));
558  string user_agent = string("User-Agent: ") + string(CNAME)
559  + string("/") + string(CVER);
560  d_request_headers.push_back(user_agent);
561  if (d_accept_deflate)
562  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
563 
564  // HTTPCache::instance returns a valid ptr or 0.
565  if (d_rcr->get_use_cache())
566  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
567  else
568  d_http_cache = 0;
569 
570  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
571  << ")" << endl);
572 
573  if (d_http_cache) {
574  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
575  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
576  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
577  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
578  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
579  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
580  }
581 
582  d_cookie_jar = rcr->get_cookie_jar();
583 
584  www_lib_init(); // This may throw either Error or InternalErr
585 }
586 
587 HTTPConnect::~HTTPConnect()
588 {
589  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
590 
591  curl_easy_cleanup(d_curl);
592 
593  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
594 }
595 
597 class HeaderMatch : public unary_function<const string &, bool> {
598  const string &d_header;
599  public:
600  HeaderMatch(const string &header) : d_header(header) {}
601  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
602 };
603 
616 HTTPResponse *
617 HTTPConnect::fetch_url(const string &url)
618 {
619 #ifdef HTTP_TRACE
620  cout << "GET " << url << " HTTP/1.0" << endl;
621 #endif
622 
623  HTTPResponse *stream;
624 
625  if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
626  stream = caching_fetch_url(url);
627  }
628  else {
629  stream = plain_fetch_url(url);
630  }
631 
632 #ifdef HTTP_TRACE
633  stringstream ss;
634  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
635  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
636  ss << stream->get_headers()->at(i) << endl;
637  }
638  cout << ss.str();
639 #endif
640 
641  ParseHeader parser;
642 
643  // An apparent quirk of libcurl is that it does not pass the Content-type
644  // header to the callback used to save them, but check and add it from the
645  // saved state variable only if it's not there (without this a test failed
646  // in HTTPCacheTest). jhrg 11/12/13
647  if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
648  HeaderMatch("Content-Type:")) == stream->get_headers()->end())
649  stream->get_headers()->push_back("Content-Type: " + d_content_type);
650 
651  parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
652 
653 #ifdef HTTP_TRACE
654  cout << endl << endl;
655 #endif
656 
657  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
658  if (parser.get_location() != "" &&
659  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
660  delete stream;
661  return fetch_url(parser.get_location());
662  }
663 
664  stream->set_type(parser.get_object_type()); // uses the value of content-description
665 
666  stream->set_version(parser.get_server());
667  stream->set_protocol(parser.get_protocol());
668 
669  if (d_use_cpp_streams) {
670  stream->transform_to_cpp();
671  }
672 
673  return stream;
674 }
675 
676 // Look around for a reasonable place to put a temporary file. Check first
677 // the value of the TMPDIR env var. If that does not yeild a path that's
678 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
679 // defined in stdio.h. If both come up empty, then use `./'.
680 
681 // Change this to a version that either returns a string or an open file
682 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
683 // (see open()) to make it more secure. Ideal solution: get deserialize()
684 // methods to read from a stream returned by libcurl, not from a temporary
685 // file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
686 static string
687 get_tempfile_template(const string &file_template)
688 {
689  string c;
690 
691  // Windows has one idea of the standard name(s) for a temporary files dir
692 #ifdef WIN32
693  // white list for a WIN32 directory
694  Regex directory("[-a-zA-Z0-9_:\\]*");
695 
696  // If we're OK to use getenv(), try it.
697 #ifdef USE_GETENV
698  c = getenv("TEMP");
699  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
700  goto valid_temp_directory;
701 
702  c= getenv("TMP");
703  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
704  goto valid_temp_directory;
705 #endif // USE_GETENV
706 
707  // The windows default
708  c = "c:\tmp";
709  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
710  goto valid_temp_directory;
711 
712 #else // Unix/Linux/OSX has another...
713  // white list for a directory
714  Regex directory("[-a-zA-Z0-9_/]*");
715 #ifdef USE_GETENV
716  c = getenv("TMPDIR");
717  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
718  goto valid_temp_directory;
719 #endif // USE_GETENV
720 
721  // Unix defines this sometimes - if present, use it.
722 #ifdef P_tmpdir
723  if (access(P_tmpdir, W_OK | R_OK) == 0) {
724  c = P_tmpdir;
725  goto valid_temp_directory;
726  }
727 #endif
728 
729  // The Unix default
730  c = "/tmp";
731  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
732  goto valid_temp_directory;
733 
734 #endif // WIN32
735 
736  // If we found nothing useful, use the current directory
737  c = ".";
738 
739 valid_temp_directory:
740 
741 #ifdef WIN32
742  c += "\\" + file_template;
743 #else
744  c += "/" + file_template;
745 #endif
746 
747  return c;
748 }
749 
768 string
769 get_temp_file(FILE *&stream) throw(Error)
770 {
771  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
772 
773  vector<char> pathname(dods_temp.length() + 1);
774 
775  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
776 
777  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
778 
779  // Open truncated for update. NB: mkstemp() returns a file descriptor.
780 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
781  stream = fopen(_mktemp(&pathname[0]), "w+b");
782 #else
783  // Make sure that temp files are accessible only by the owner.
784  int mask = umask(077);
785  if (mask < 0)
786  throw Error("Could not set the file creation mask: " + string(strerror(errno)));
787  int fd = mkstemp(&pathname[0]);
788  if (fd < 0)
789  throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
790 
791  stream = fdopen(fd, "w+");
792  umask(mask);
793 #endif
794 
795  if (!stream)
796  throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
797 
798  dods_temp = &pathname[0];
799  return dods_temp;
800 }
801 
802 
808 void
809 close_temp(FILE *s, const string &name)
810 {
811  int res = fclose(s);
812  if (res)
813  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
814 
815  res = unlink(name.c_str());
816  if (res != 0)
817  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
818 }
819 
841 HTTPResponse *
842 HTTPConnect::caching_fetch_url(const string &url)
843 {
844  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
845 
846  vector<string> *headers = new vector<string>;
847  string file_name;
848  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
849  if (!s) {
850  // url not in cache; get it and cache it
851  DBGN(cerr << "no; getting response and caching." << endl);
852  delete headers; headers = 0;
853  time_t now = time(0);
854  HTTPResponse *rs = plain_fetch_url(url);
855  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
856 
857  return rs;
858  }
859  else { // url in cache
860  DBGN(cerr << "yes... ");
861 
862  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
863  DBGN(cerr << "and it's valid; using cached response." << endl);
864  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
865  return crs;
866  }
867  else { // url in cache but not valid; validate
868  DBGN(cerr << "but it's not valid; validating... ");
869 
870  d_http_cache->release_cached_response(s); // This closes 's'
871  headers->clear();
872  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
873  FILE *body = 0;
874  string dods_temp = get_temp_file(body);
875  time_t now = time(0); // When was the request made (now).
876  long http_status;
877 
878  try {
879  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
880  rewind(body);
881  }
882  catch (Error &e) {
883  close_temp(body, dods_temp);
884  delete headers;
885  throw ;
886  }
887 
888  switch (http_status) {
889  case 200: { // New headers and new body
890  DBGN(cerr << "read a new response; caching." << endl);
891 
892  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
893  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
894 
895  return rs;
896  }
897 
898  case 304: { // Just new headers, use cached body
899  DBGN(cerr << "cached response valid; updating." << endl);
900 
901  close_temp(body, dods_temp);
902  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
903  string file_name;
904  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
905  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
906  return crs;
907  }
908 
909  default: { // Oops.
910  close_temp(body, dods_temp);
911  if (http_status >= 400) {
912  delete headers; headers = 0;
913  string msg = "Error while reading the URL: ";
914  msg += url;
915  msg
916  += ".\nThe OPeNDAP server returned the following message:\n";
917  msg += http_status_to_string(http_status);
918  throw Error(msg);
919  }
920  else {
921  delete headers; headers = 0;
922  throw InternalErr(__FILE__, __LINE__,
923  "Bad response from the HTTP server: " + long_to_string(http_status));
924  }
925  }
926  }
927  }
928  }
929 
930  throw InternalErr(__FILE__, __LINE__, "Should never get here");
931 }
932 
944 HTTPResponse *
945 HTTPConnect::plain_fetch_url(const string &url)
946 {
947  DBG(cerr << "Getting URL: " << url << endl);
948  FILE *stream = 0;
949  string dods_temp = get_temp_file(stream);
950  vector<string> *resp_hdrs = new vector<string>;
951 
952  int status = -1;
953  try {
954  status = read_url(url, stream, resp_hdrs); // Throws Error.
955  if (status >= 400) {
956  // delete resp_hdrs; resp_hdrs = 0;
957  string msg = "Error while reading the URL: ";
958  msg += url;
959  msg += ".\nThe OPeNDAP server returned the following message:\n";
960  msg += http_status_to_string(status);
961  throw Error(msg);
962  }
963  }
964 
965  catch (Error &e) {
966  delete resp_hdrs;
967  close_temp(stream, dods_temp);
968  throw;
969  }
970 
971 #if 0
972  if (d_use_cpp_streams) {
973  fclose(stream);
974  fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
975  return new HTTPResponse(in, status, resp_hdrs, dods_temp);
976  }
977  else {
978 #endif
979  rewind(stream);
980  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
981 #if 0
982 }
983 #endif
984 }
985 
997 void
999 {
1000  d_accept_deflate = deflate;
1001 
1002  if (d_accept_deflate) {
1003  if (find(d_request_headers.begin(), d_request_headers.end(),
1004  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1005  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1006  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1007  ostream_iterator<string>(cerr, "\n")));
1008  }
1009  else {
1010  vector<string>::iterator i;
1011  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1012  bind2nd(equal_to<string>(),
1013  string("Accept-Encoding: deflate, gzip, compress")));
1014  d_request_headers.erase(i, d_request_headers.end());
1015  }
1016 }
1017 
1026 void
1027 HTTPConnect::set_xdap_protocol(int major, int minor)
1028 {
1029  // Look for, and remove if one exists, an XDAP-Accept header
1030  vector<string>::iterator i;
1031  i = find_if(d_request_headers.begin(), d_request_headers.end(),
1032  HeaderMatch("XDAP-Accept:"));
1033  if (i != d_request_headers.end())
1034  d_request_headers.erase(i);
1035 
1036  // Record and add the new header value
1037  d_dap_client_protocol_major = major;
1038  d_dap_client_protocol_minor = minor;
1039  ostringstream xdap_accept;
1040  xdap_accept << "XDAP-Accept: " << major << "." << minor;
1041 
1042  d_request_headers.push_back(xdap_accept.str());
1043 
1044  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1045  ostream_iterator<string>(cerr, "\n")));
1046 }
1047 
1063 void
1064 HTTPConnect::set_credentials(const string &u, const string &p)
1065 {
1066  if (u.empty())
1067  return;
1068 
1069  // Store the credentials locally.
1070  d_username = u;
1071  d_password = p;
1072 
1073  d_upstring = u + ":" + p;
1074 }
1075 
1076 } // namespace libdap
libdap::HTTPCache::set_cache_enabled
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
libdap::HTTPCache::instance
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
libdap::HTTPCache::set_max_size
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
libdap::ObjectType
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58
libdap::Error
A class for error processing.
Definition: Error.h:93
libdap::HTTPCache::set_always_validate
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
libdap::InternalErr
A class for software fault reporting.
Definition: InternalErr.h:65
libdap::HTTPConnect::set_credentials
void set_credentials(const string &u, const string &p)
Definition: HTTPConnect.cc:1064
libdap::HTTPResponse
Definition: HTTPResponse.h:57
libdap::HTTPCache::release_cached_response
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1571
libdap::get_description_type
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
libdap::HTTPConnect::is_cache_enabled
bool is_cache_enabled()
Definition: HTTPConnect.h:156
libdap::HTTPCache::is_url_valid
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1388
libdap::HTTPCache::update_response
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1319
libdap::HTTPConnect::fetch_url
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:617
libdap
top level DAP object to house generic methods
Definition: AlarmHandler.h:36
libdap::RCReader
Definition: RCReader.h:55
libdap::parse_mime_header
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:912
libdap::HTTPConnect::set_xdap_protocol
void set_xdap_protocol(int major, int minor)
Definition: HTTPConnect.cc:1027
libdap::Regex
Definition: GNURegex.h:36
libdap::HTTPCache::get_conditional_request_headers
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1249
libdap::HTTPCache::cache_response
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1156
libdap::get_temp_file
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:769
libdap::HTTPConnect::set_accept_deflate
void set_accept_deflate(bool defalte)
Definition: HTTPConnect.cc:998
libdap::close_temp
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:809
libdap::HTTPResponse::transform_to_cpp
void transform_to_cpp()
Definition: HTTPResponse.h:151
libdap::HTTPCache::set_max_entry_size
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772
libdap::HTTPCache::set_default_expiration
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
libdap::HTTPCache::set_expire_ignored
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
libdap::HTTPCache::get_cached_response
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1480