// $Id: $ #include "Options.h" #include "Iterators.h" URL_Iterator::~URL_Iterator (void) { } int URL_Iterator::destroy (void) { // Commit suicide. delete this; return 0; } HTML_Body_Iterator::HTML_Body_Iterator (URL &url) : url_ (url) { } int HTML_Body_Iterator::next (ACE_CString &url) { if (this->url_.stream ().eof () == 0) { // Initialize this to the NUL-string. url = ACE_CString (""); char *s; size_t href_begin_len = ACE_OS::strlen ("A HREF=\""); size_t http_len = ACE_OS::strlen ("http://"); size_t mailto_len = ACE_OS::strlen ("mailto:"); size_t href_end_len = ACE_OS::strlen ("/A>"); for (char c; (c = this->url_.stream ().get_char ()) != EOF; ) { // We need to skip over tokens like "mailto". int skip = 0; const char *s; if (c == '<' && (s = this->url_.stream ().peek_str (0, href_begin_len)) != 0 && ACE_OS::strncasecmp (s, "A HREF=\"", href_begin_len) == 0) { this->url_.stream ().seek (href_begin_len); if ((s = this->url_.stream ().peek_str (0, mailto_len)) == 0) return 0; else if (ACE_OS::strncasecmp (s, "mailto:", mailto_len) == 0) { this->url_.stream ().seek (mailto_len); skip = 1; } else { if ((s = this->url_.stream ().peek_str (0, http_len)) == 0) return 0; else if (ACE_OS::strncasecmp (s, "http://", http_len) == 0) // Skip over the "http://". this->url_.stream ().seek (http_len); else { // If we're not prefixed with "http://" assume // that we're a "relative" URL, so let's prepend // our current server's hostname and the dirname // portion of our URI onto this. url += ACE_CString (this->url_.url_addr ().get_host_name ()); if (this->url_.stream ().peek_char (0) != '/') { const char *dir = ACE::dirname (this->url_.url_addr ().get_path_name ()); if (ACE_OS::strncmp (s, "../", 3) == 0) { // Remove a level of the dir. dir = ACE::dirname (dir); this->url_.stream ().seek (3); } if (dir[0] != '/') url += ACE_CString ("/"); if (ACE_OS::strcmp (dir, "/") == 0) url += ACE_CString ("/"); else { url += ACE_CString (dir); if (url[url.length ()] != '/') url += ACE_CString ("/"); } } } } // Find the end of the URL. while ((c = this->url_.stream ().get_char ()) != EOF && c != '\"') continue; if (c == EOF) ACE_ERROR_RETURN ((LM_ERROR, "unexpected EOF\n"), 0); if (skip == 0 && this->url_.stream ().recv_len () > 0) url += ACE_CString (this->url_.stream ().recv (), this->url_.stream ().recv_len () - 1); // Skip to the end of this "" terminator. while ((c = this->url_.stream ().get_char ()) != EOF) { if (c == '<' && (s = this->url_.stream ().peek_str (0, href_end_len)) != 0 && ACE_OS::strncmp (s, "/A>", href_end_len) == 0) { this->url_.stream ().seek (href_end_len); break; } } if (skip) continue; else return 1; } } return 0; } else return 0; } HTTP_Header_Iterator::HTTP_Header_Iterator (URL &url) : url_ (url), end_of_header_ (0) { } int HTTP_Header_Iterator::next (ACE_CString &line) { if (this->end_of_header_) return 0; else { for (char c; (c = this->url_.stream ().get_char ()) != EOF; ) { // Check to see if we're at the end of the header line. if (c == '\r' && this->url_.stream ().peek_char (0) == '\n') { line.set (this->url_.stream ().recv (), this->url_.stream ().recv_len () - 1, 1); // Check to see if we're at the end of the header. if (this->url_.stream ().peek_char (1) == '\r' && this->url_.stream ().peek_char (2) == '\n') { this->end_of_header_ = 1; // We're at the end of the header section. this->url_.stream ().seek (3); } else // We're at the end of the line. this->url_.stream ().seek (1); return 1; } // Handle broken Web servers that use '\n' instead of // '\r\n'. else if (c == '\n') { line.set (this->url_.stream ().recv (), (this->url_.stream ().recv_len ()), 1); // Check to see if we're at the end of the header. if (this->url_.stream ().peek_char (0) == '\n') { // We're at the end of the header section. this->url_.stream ().seek (1); this->end_of_header_ = 1; } return 1; } } ACE_ASSERT (!"yikes, found EOF while in header!"); } return 0; } URL_Download_Iterator::URL_Download_Iterator (URL &url) : url_ (url) { } int URL_Download_Iterator::next (ACE_CString &buffer) { size_t len = BUFSIZ; const char *buf = this->url_.stream ().recv (len); if (buf == 0) return 0; else { buffer.set (buf, len, 1); return 1; } }