libdap++  Updated for version 3.8.2
HTTPConnect.cc
Go to the documentation of this file.
1 
2 // -*- mode: c++; c-basic-offset:4 -*-
3 
4 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
5 // Access Protocol.
6 
7 // Copyright (c) 2002,2003 OPeNDAP, Inc.
8 // Author: James Gallagher <jgallagher@opendap.org>
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 //
24 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25 
26 
27 #include "config.h"
28 
29 static char rcsid[] not_used =
30  { "$Id: HTTPConnect.cc 24380 2011-03-28 21:47:15Z jimg $"
31  };
32 
33 #ifdef HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36 
37 #include <sys/stat.h>
38 
39 #ifdef WIN32
40 #include <io.h>
41 #endif
42 
43 #include <string>
44 #include <vector>
45 #include <functional>
46 #include <algorithm>
47 #include <sstream>
48 #include <iterator>
49 #include <cstdlib>
50 #include <cstring>
51 
52 // #define DODS_DEBUG
53 //#define DODS_DEBUG2
54 //#define HTTP_TRACE
55 //#define DODS_DEBUG
56 
57 #undef USE_GETENV
58 
59 
60 #include "debug.h"
61 #include "mime_util.h"
62 #include "GNURegex.h"
63 #include "HTTPCache.h"
64 #include "HTTPConnect.h"
65 #include "RCReader.h"
66 #include "HTTPResponse.h"
67 #include "HTTPCacheResponse.h"
68 
69 using namespace std;
70 
71 namespace libdap {
72 
73 // These global variables are not MT-Safe, but I'm leaving them as is because
74 // they are used only for debugging (set them in a debugger like gdb or ddd).
75 // They are not static because I think that many debuggers cannot access
76 // static variables. 08/07/02 jhrg
77 
78 // Set this to 1 to turn on libcurl's verbose mode (for debugging).
79 int www_trace = 0;
80 
81 // Keep the temporary files; useful for debugging.
83 
84 #define CLIENT_ERR_MIN 400
85 #define CLIENT_ERR_MAX 417
86 static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
87  {
88  "Bad Request:",
89  "Unauthorized: Contact the server administrator.",
90  "Payment Required.",
91  "Forbidden: Contact the server administrator.",
92  "Not Found: The data source or server could not be found.\n\
93  Often this means that the OPeNDAP server is missing or needs attention;\n\
94  Please contact the server administrator.",
95  "Method Not Allowed.",
96  "Not Acceptable.",
97  "Proxy Authentication Required.",
98  "Request Time-out.",
99  "Conflict.",
100  "Gone:.",
101  "Length Required.",
102  "Precondition Failed.",
103  "Request Entity Too Large.",
104  "Request URI Too Large.",
105  "Unsupported Media Type.",
106  "Requested Range Not Satisfiable.",
107  "Expectation Failed."
108  };
109 
110 #define SERVER_ERR_MIN 500
111 #define SERVER_ERR_MAX 505
112 static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
113  {
114  "Internal Server Error.",
115  "Not Implemented.",
116  "Bad Gateway.",
117  "Service Unavailable.",
118  "Gateway Time-out.",
119  "HTTP Version Not Supported."
120  };
121 
124 static string
125 http_status_to_string(int status)
126 {
127  if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
128  return string(http_client_errors[status - CLIENT_ERR_MIN]);
129  else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
130  return string(http_server_errors[status - SERVER_ERR_MIN]);
131  else
132  return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
133 }
134 
139 class ParseHeader : public unary_function<const string &, void>
140 {
141  ObjectType type; // What type of object is in the stream?
142  string server; // Server's version string.
143  string protocol; // Server's protocol version.
144  string location; // Url returned by server
145 
146 public:
147  ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
148  { }
149 
150  void operator()(const string &line)
151  {
152  string name, value;
153  parse_mime_header(line, name, value);
154  if (name == "content-description") {
155  DBG2(cerr << name << ": " << value << endl);
156  type = get_description_type(value);
157  }
158  // The second test (== "dods/0.0") tests if xopendap-server has already
159  // been seen. If so, use that header in preference to the old
160  // XDODS-Server header. jhrg 2/7/06
161  else if (name == "xdods-server" && server == "dods/0.0") {
162  DBG2(cerr << name << ": " << value << endl);
163  server = value;
164  }
165  else if (name == "xopendap-server") {
166  DBG2(cerr << name << ": " << value << endl);
167  server = value;
168  }
169  else if (name == "xdap") {
170  DBG2(cerr << name << ": " << value << endl);
171  protocol = value;
172  }
173  else if (server == "dods/0.0" && name == "server") {
174  DBG2(cerr << name << ": " << value << endl);
175  server = value;
176  }
177  else if (name == "location") {
178  DBG2(cerr << name << ": " << value << endl);
179  location = value;
180  }
181  else if (type == unknown_type && name == "content-type"
182  && line.find("text/html") != string::npos) {
183  DBG2(cerr << name << ": text/html..." << endl);
184  type = web_error;
185  }
186  }
187 
188  ObjectType get_object_type()
189  {
190  return type;
191  }
192 
193  string get_server()
194  {
195  return server;
196  }
197 
198  string get_protocol()
199  {
200  return protocol;
201  }
202 
203  string get_location() {
204  return location;
205  }
206 };
207 
224 static size_t
225 save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
226 {
227  DBG2(cerr << "Inside the header parser." << endl);
228  vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
229 
230  // Grab the header, minus the trailing newline. Or \r\n pair.
231  string complete_line;
232  if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
233  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
234  else
235  complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
236 
237  // Store all non-empty headers that are not HTTP status codes
238  if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
239  DBG(cerr << "Header line: " << complete_line << endl);
240  hdrs->push_back(complete_line);
241  }
242 
243  return size * nmemb;
244 }
245 
247 static int
248 curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
249 {
250  string message(msg, size);
251 
252  switch (info) {
253  case CURLINFO_TEXT:
254  cerr << "Text: " << message; break;
255  case CURLINFO_HEADER_IN:
256  cerr << "Header in: " << message; break;
257  case CURLINFO_HEADER_OUT:
258  cerr << "Header out: " << message; break;
259  case CURLINFO_DATA_IN:
260  cerr << "Data in: " << message; break;
261  case CURLINFO_DATA_OUT:
262  cerr << "Data out: " << message; break;
263  case CURLINFO_END:
264  cerr << "End: " << message; break;
265 #ifdef CURLINFO_SSL_DATA_IN
266  case CURLINFO_SSL_DATA_IN:
267  cerr << "SSL Data in: " << message; break;
268 #endif
269 #ifdef CURLINFO_SSL_DATA_OUT
270  case CURLINFO_SSL_DATA_OUT:
271  cerr << "SSL Data out: " << message; break;
272 #endif
273  default:
274  cerr << "Curl info: " << message; break;
275  }
276  return 0;
277 }
278 
282 void
283 HTTPConnect::www_lib_init()
284 {
285  d_curl = curl_easy_init();
286  if (!d_curl)
287  throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
288 
289  // Now set options that will remain constant for the duration of this
290  // CURL object.
291 
292  // Set the proxy host.
293  if (!d_rcr->get_proxy_server_host().empty()) {
294  DBG(cerr << "Setting up a proxy server." << endl);
295  DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
296  << endl);
297  DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
298  << endl);
299  DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
300  << endl);
301  curl_easy_setopt(d_curl, CURLOPT_PROXY,
302  d_rcr->get_proxy_server_host().c_str());
303  curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
304  d_rcr->get_proxy_server_port());
305 
306  // As of 4/21/08 only NTLM, Digest and Basic work.
307 #ifdef CURLOPT_PROXYAUTH
308  curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
309 #endif
310 
311  // Password might not be required. 06/21/04 jhrg
312  if (!d_rcr->get_proxy_server_userpw().empty())
313  curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
314  d_rcr->get_proxy_server_userpw().c_str());
315  }
316 
317  curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
318  // We have to set FailOnError to false for any of the non-Basic
319  // authentication schemes to work. 07/28/03 jhrg
320  curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
321 
322  // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
323  // choosing the the 'safest' one supported by the server.
324  // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
325  curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
326 
327  curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
328  curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
329  curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
330  // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
331  // param of save_raw_http_headers to a vector<string> object.
332 
333  // Follow 302 (redirect) responses
334  curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
335  curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
336 
337  // If the user turns off SSL validation...
338  if (!d_rcr->get_validate_ssl() == 0) {
339  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
340  curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
341  }
342 
343  // Look to see if cookies are turned on in the .dodsrc file. If so,
344  // activate here. We honor 'session cookies' (cookies without an
345  // expiration date) here so that session-base SSO systems will work as
346  // expected.
347  if (!d_cookie_jar.empty()) {
348  DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
349  curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
350  curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
351  }
352 
353  if (www_trace) {
354  cerr << "Curl version: " << curl_version() << endl;
355  curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
356  curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
357  }
358 }
359 
363 class BuildHeaders : public unary_function<const string &, void>
364 {
365  struct curl_slist *d_cl;
366 
367 public:
368  BuildHeaders() : d_cl(0)
369  {}
370 
371  void operator()(const string &header)
372  {
373  DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
374  << endl);
375  d_cl = curl_slist_append(d_cl, header.c_str());
376  }
377 
378  struct curl_slist *get_headers()
379  {
380  return d_cl;
381  }
382 };
383 
398 long
399 HTTPConnect::read_url(const string &url, FILE *stream,
400  vector<string> *resp_hdrs,
401  const vector<string> *headers)
402 {
403  curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
404 
405 #ifdef WIN32
406  // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
407  // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
408  // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
409  // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
410  // this issue is that one should not pass a FILE * to a windows DLL. Close
411  // inspection of libcurl yields that their default write function when using
412  // the CURLOPT_WRITEDATA is just "fwrite".
413  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
414  curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
415 #else
416  curl_easy_setopt(d_curl, CURLOPT_FILE, stream);
417 #endif
418 
419  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
420  ostream_iterator<string>(cerr, "\n")));
421 
422  BuildHeaders req_hdrs;
423  req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
424  req_hdrs);
425  if (headers)
426  req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
427  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
428 
429  // Turn off the proxy for this URL?
430  bool temporary_proxy = false;
431  if ((temporary_proxy = url_uses_no_proxy_for(url))) {
432  DBG(cerr << "Suppress proxy for url: " << url << endl);
433  curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
434  }
435 
436  string::size_type at_sign = url.find('@');
437  // Assume username:password present *and* assume it's an HTTP URL; it *is*
438  // HTTPConnect, after all. 7 is position after "http://"; the second arg
439  // to substr() is the sub string length.
440  if (at_sign != url.npos)
441  d_upstring = url.substr(7, at_sign - 7);
442 
443  if (!d_upstring.empty())
444  curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
445 
446  // Pass save_raw_http_headers() a pointer to the vector<string> where the
447  // response headers may be stored. Callers can use the resp_hdrs
448  // value/result parameter to get the raw response header information .
449  curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
450 
451  CURLcode res = curl_easy_perform(d_curl);
452 
453  // Free the header list and null the value in d_curl.
454  curl_slist_free_all(req_hdrs.get_headers());
455  curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
456 
457  // Reset the proxy?
458  if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
459  curl_easy_setopt(d_curl, CURLOPT_PROXY,
460  d_rcr->get_proxy_server_host().c_str());
461 
462  if (res != 0)
463  throw Error(d_error_buffer);
464 
465  long status;
466  res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
467  if (res != 0)
468  throw Error(d_error_buffer);
469 
470  return status;
471 }
472 
476 bool
477 HTTPConnect::url_uses_proxy_for(const string &url) throw()
478 {
479  if (d_rcr->is_proxy_for_used()) {
480  Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
481  int index = 0, matchlen;
482  return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
483  }
484 
485  return false;
486 }
487 
491 bool
492 HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
493 {
494  return d_rcr->is_no_proxy_for_used()
495  && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
496 }
497 
498 // Public methods. Mostly...
499 
506 HTTPConnect::HTTPConnect(RCReader *rcr) : d_username(""), d_password(""),
507  d_cookie_jar(""),
508  d_dap_client_protocol_major(2),
509  d_dap_client_protocol_minor(0)
510 
511 {
512  d_accept_deflate = rcr->get_deflate();
513  d_rcr = rcr;
514 
515  // Load in the default headers to send with a request. The empty Pragma
516  // headers overrides libcurl's default Pragma: no-cache header (which
517  // will disable caching by Squid, et c.). The User-Agent header helps
518  // make server logs more readable. 05/05/03 jhrg
519  d_request_headers.push_back(string("Pragma:"));
520  string user_agent = string("User-Agent: ") + string(CNAME)
521  + string("/") + string(CVER);
522  d_request_headers.push_back(user_agent);
523  if (d_accept_deflate)
524  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
525 
526  // HTTPCache::instance returns a valid ptr or 0.
527  if (d_rcr->get_use_cache())
528  d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),
529  true);
530  else
531  d_http_cache = 0;
532 
533  DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
534  << ")" << endl);
535 
536  if (d_http_cache) {
537  d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
538  d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
539  d_http_cache->set_max_size(d_rcr->get_max_cache_size());
540  d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
541  d_http_cache->set_default_expiration(d_rcr->get_default_expires());
542  d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
543  }
544 
545  d_cookie_jar = rcr->get_cookie_jar();
546 
547  www_lib_init(); // This may throw either Error or InternalErr
548 }
549 
551 {
552  DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
553 
554  curl_easy_cleanup(d_curl);
555 
556  DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
557 }
558 
571 HTTPResponse *
572 HTTPConnect::fetch_url(const string &url)
573 {
574 #ifdef HTTP_TRACE
575  cout << "GET " << url << " HTTP/1.0" << endl;
576 #endif
577 
578  HTTPResponse *stream;
579 
580  if (d_http_cache && d_http_cache->is_cache_enabled()) {
581  stream = caching_fetch_url(url);
582  }
583  else {
584  stream = plain_fetch_url(url);
585  }
586 
587 #ifdef HTTP_TRACE
588  stringstream ss;
589  ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
590  for (size_t i = 0; i < stream->get_headers()->size(); i++) {
591  ss << stream->get_headers()->at(i) << endl;
592  }
593  cout << ss.str();
594 #endif
595 
596  ParseHeader parser;
597 
598  parser = for_each(stream->get_headers()->begin(),
599  stream->get_headers()->end(), ParseHeader());
600 
601 #ifdef HTTP_TRACE
602  cout << endl << endl;
603 #endif
604 
605  // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
606  if (parser.get_location() != "" &&
607  url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
608  delete stream;
609  return fetch_url(parser.get_location());
610  }
611 
612  stream->set_type(parser.get_object_type());
613  stream->set_version(parser.get_server());
614  stream->set_protocol(parser.get_protocol());
615 
616  return stream;
617 }
618 
619 // Look around for a reasonable place to put a temporary file. Check first
620 // the value of the TMPDIR env var. If that does not yeild a path that's
621 // writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
622 // defined in stdio.h. If both come up empty, then use `./'.
623 
624 // Change this to a version that either returns a string or an open file
625 // descriptor. Use information from https://buildsecurityin.us-cert.gov/
626 // (see open()) to make it more secure. Ideal solution: get deserialize()
627 // methods to read from a stream returned by libcurl, not from a temporary
628 // file. 9/21/07 jhrg Updated to use strings, so other misc changes. 3/22/11
629 static string
630 get_tempfile_template(const string &file_template)
631 {
632  string c;
633 
634  // Windows has one idea of the standard name(s) for a temporary files dir
635 #ifdef WIN32
636  // white list for a WIN32 directory
637  Regex directory("[-a-zA-Z0-9_:\\]*");
638 
639  // If we're OK to use getenv(), try it.
640 #ifdef USE_GETENV
641  c = getenv("TEMP");
642  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
643  goto valid_temp_directory;
644 
645  c= getenv("TMP");
646  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
647  goto valid_temp_directory;
648 #endif // USE_GETENV
649 
650  // The windows default
651  c = "c:\tmp";
652  if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
653  goto valid_temp_directory;
654 
655 #else // Unix/Linux/OSX has another...
656  // white list for a directory
657  Regex directory("[-a-zA-Z0-9_/]*");
658 #ifdef USE_GETENV
659  c = getenv("TMPDIR");
660  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
661  goto valid_temp_directory;
662 #endif // USE_GETENV
663 
664  // Unix defines this sometimes - if present, use it.
665 #ifdef P_tmpdir
666  if (access(P_tmpdir, W_OK | R_OK) == 0) {
667  c = P_tmpdir;
668  goto valid_temp_directory;
669  }
670 #endif
671 
672  // The Unix default
673  c = "/tmp";
674  if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
675  goto valid_temp_directory;
676 
677 #endif // WIN32
678 
679  // If we found nothing useful, use the current directory
680  c = ".";
681 
682 valid_temp_directory:
683 
684 #ifdef WIN32
685  c += "\\" + file_template;
686 #else
687  c += "/" + file_template;
688 #endif
689 
690  return c;
691 }
692 
711 string
712 get_temp_file(FILE *&stream) throw(InternalErr)
713 {
714  string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
715 
716  vector<char> pathname(dods_temp.length() + 1);
717 
718  strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
719 
720  DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
721 
722  // Open truncated for update. NB: mkstemp() returns a file descriptor.
723 #if defined(WIN32) || defined(TEST_WIN32_TEMPS)
724  stream = fopen(_mktemp(&pathname[0]), "w+b");
725 #else
726  // Make sure that temp files are accessible only by the owner.
727  umask(077);
728  stream = fdopen(mkstemp(&pathname[0]), "w+");
729 #endif
730 
731  if (!stream) {
732  throw InternalErr(__FILE__, __LINE__,
733  "Failed to open a temporary file for the data values ("
734  + dods_temp + ")");
735  }
736 
737  dods_temp = &pathname[0];
738  return dods_temp;
739 }
740 
742 void
743 close_temp(FILE *s, const string &name)
744 {
745  int res = fclose(s);
746  if (res)
747  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
748 
749  res = unlink(name.c_str());
750  if (res != 0)
751  throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
752 }
753 
775 HTTPResponse *
776 HTTPConnect::caching_fetch_url(const string &url)
777 {
778  DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
779 
780  vector<string> *headers = new vector<string>;
781  string file_name;
782  FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
783  if (!s) {
784  // url not in cache; get it and cache it
785  DBGN(cerr << "no; getting response and caching." << endl);
786  delete headers; headers = 0;
787  time_t now = time(0);
788  HTTPResponse *rs = plain_fetch_url(url);
789  d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
790 
791  return rs;
792  }
793  else { // url in cache
794  DBGN(cerr << "yes... ");
795 
796  if (d_http_cache->is_url_valid(url)) { // url in cache and valid
797  DBGN(cerr << "and it's valid; using cached response." << endl);
798  HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
799  return crs;
800  }
801  else { // url in cache but not valid; validate
802  DBGN(cerr << "but it's not valid; validating... ");
803 
804  d_http_cache->release_cached_response(s); // This closes 's'
805  headers->clear();
806  vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
807  FILE *body = 0;
808  string dods_temp = get_temp_file(body);
809  time_t now = time(0); // When was the request made (now).
810  long http_status;
811 
812  try {
813  http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
814  rewind(body);
815  }
816  catch (Error &e) {
817  close_temp(body, dods_temp);
818  delete headers;
819  throw ;
820  }
821 
822  switch (http_status) {
823  case 200: { // New headers and new body
824  DBGN(cerr << "read a new response; caching." << endl);
825 
826  d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
827  HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
828 
829  return rs;
830  }
831 
832  case 304: { // Just new headers, use cached body
833  DBGN(cerr << "cached response valid; updating." << endl);
834 
835  close_temp(body, dods_temp);
836  d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
837  string file_name;
838  FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
839  HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
840  return crs;
841  }
842 
843  default: { // Oops.
844  close_temp(body, dods_temp);
845  if (http_status >= 400) {
846  delete headers; headers = 0;
847  string msg = "Error while reading the URL: ";
848  msg += url;
849  msg
850  += ".\nThe OPeNDAP server returned the following message:\n";
851  msg += http_status_to_string(http_status);
852  throw Error(msg);
853  }
854  else {
855  delete headers; headers = 0;
856  throw InternalErr(__FILE__, __LINE__,
857  "Bad response from the HTTP server: " + long_to_string(http_status));
858  }
859  }
860  }
861  }
862  }
863 
864  throw InternalErr(__FILE__, __LINE__, "Should never get here");
865 }
866 
878 HTTPResponse *
879 HTTPConnect::plain_fetch_url(const string &url)
880 {
881  DBG(cerr << "Getting URL: " << url << endl);
882  FILE *stream = 0;
883  string dods_temp = get_temp_file(stream);
884  vector<string> *resp_hdrs = new vector<string>;
885 
886  int status = -1;
887  try {
888  status = read_url(url, stream, resp_hdrs); // Throws Error.
889  if (status >= 400) {
890  delete resp_hdrs;
891  string msg = "Error while reading the URL: ";
892  msg += url;
893  msg += ".\nThe OPeNDAP server returned the following message:\n";
894  msg += http_status_to_string(status);
895  throw Error(msg);
896  }
897  }
898 
899  catch (Error &e) {
900  delete resp_hdrs;
901  close_temp(stream, dods_temp);
902  throw;
903  }
904 
905  rewind(stream);
906 
907  return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
908 }
909 
921 void
923 {
924  d_accept_deflate = deflate;
925 
926  if (d_accept_deflate) {
927  if (find(d_request_headers.begin(), d_request_headers.end(),
928  "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
929  d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
930  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
931  ostream_iterator<string>(cerr, "\n")));
932  }
933  else {
934  vector<string>::iterator i;
935  i = remove_if(d_request_headers.begin(), d_request_headers.end(),
936  bind2nd(equal_to<string>(),
937  string("Accept-Encoding: deflate, gzip, compress")));
938  d_request_headers.erase(i, d_request_headers.end());
939  }
940 }
941 
943 class HeaderMatch : public unary_function<const string &, bool> {
944  const string &d_header;
945  public:
946  HeaderMatch(const string &header) : d_header(header) {}
947  bool operator()(const string &arg) { return arg.find(d_header) == 0; }
948 };
949 
958 void
959 HTTPConnect::set_xdap_protocol(int major, int minor)
960 {
961  // Look for, and remove if one exists, an XDAP-Accept header
962  vector<string>::iterator i;
963  i = find_if(d_request_headers.begin(), d_request_headers.end(),
964  HeaderMatch("XDAP-Accept:"));
965  if (i != d_request_headers.end())
966  d_request_headers.erase(i);
967 
968  // Record and add the new header value
969  d_dap_client_protocol_major = major;
970  d_dap_client_protocol_minor = minor;
971  ostringstream xdap_accept;
972  xdap_accept << "XDAP-Accept: " << major << "." << minor;
973 
974  d_request_headers.push_back(xdap_accept.str());
975 
976  DBG(copy(d_request_headers.begin(), d_request_headers.end(),
977  ostream_iterator<string>(cerr, "\n")));
978 }
979 
995 void
996 HTTPConnect::set_credentials(const string &u, const string &p)
997 {
998  if (u.empty())
999  return;
1000 
1001  // Store the credentials locally.
1002  d_username = u;
1003  d_password = p;
1004 
1005  d_upstring = u + ":" + p;
1006 }
1007 
1008 } // namespace libdap