Main Page | Class List | File List | Class Members | File Members

fetch.c

Go to the documentation of this file.
00001 /*
00002  *  $Id: fetch.c,v 1.19 2006/04/17 22:45:19 ghost666 Exp $
00003  *
00004  *  rssbgr - RSS backgroud reader
00005  *  Copyright (C) 2005, 2006 Piotr 'GhosT' Wydrych
00006  *
00007  *  This program is free software; you can redistribute it and/or modify
00008  *  it under the terms of the GNU General Public License as published by
00009  *  the Free Software Foundation; either version 2 of the License, or
00010  *  (at your option) any later version.
00011  *
00012  *  This program is distributed in the hope that it will be useful,
00013  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015  *  GNU General Public License for more details.
00016  *
00017  *  You should have received a copy of the GNU General Public License
00018  *  along with this program; if not, write to the Free Software
00019  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00020  */
00021 
00026 #include "fetch.h"
00027 
00033 int find_in_file(FILE *fd, char *string)
00034 {
00035  char line[32];
00036 
00037  DEBUG3("find_in_file: %p, %s", fd, string);
00038  if (fd == NULL) return 0;
00039  rewind(fd);
00040  while(!feof(fd))
00041  {
00042   fscanf(fd, "%s\n", line);
00043   if (!strcmp(line, string))
00044    return 1;
00045  }
00046  return 0;
00047 }
00048 
00049 void process_rss(xmlNode *root_node, int config_item_i)
00050 {
00051  xmlNode *cur_node, *cur_sub_node, *cur_sub_sub_node;
00052  char *channel_link = NULL, *channel_title = NULL, *channel_desc = NULL; /* NULL the pointers, because we won't malloc */
00053  char *item_link = NULL, *item_title = NULL, *item_desc = NULL;
00054  char *filename, *filename_new;
00055  char *digest = (char *)malloc(33);
00056  char *command;
00057  FILE *fd, *fd_new;
00058  
00059  filename = (char *)malloc(PATH_MAX);
00060  md5(digest, config[config_item_i].url);
00061  sprintf(filename, "%s/%s", directory, digest);
00062  filename_new = (char *)malloc(PATH_MAX);
00063  sprintf(filename_new, "%s.new", filename);
00064  fd = fopen(filename, "r"); /* don't bother checkig if not NULL, do it in find_in_file() */
00065  fd_new = fopen(filename_new, "w");
00066  if (!strcmp(root_node->name, "rss"))
00067   for (cur_node = root_node->children; cur_node != NULL; cur_node = cur_node->next)
00068    if (!strcmp(cur_node->name, "channel"))
00069     for (cur_sub_node = cur_node->children; cur_sub_node != NULL; cur_sub_node = cur_sub_node->next)
00070     {
00071      if (!strcmp(cur_sub_node->name, "item"))
00072      {
00073       for (cur_sub_sub_node = cur_sub_node->children; cur_sub_sub_node != NULL; cur_sub_sub_node = cur_sub_sub_node->next)
00074       {
00075        if (!strcmp(cur_sub_sub_node->name, "link"))
00076         item_link = xmlNodeListGetString(root_node->doc, cur_sub_sub_node->xmlChildrenNode, 1);
00077        else if (!strcmp(cur_sub_sub_node->name, "title") && strstr(config[config_item_i].action, "{t}"))
00078         item_title = xmlNodeListGetString(root_node->doc, cur_sub_sub_node->xmlChildrenNode, 1);
00079        else if (!strcmp(cur_sub_sub_node->name, "description") && strstr(config[config_item_i].action, "{d}"))
00080         item_desc = xmlNodeListGetString(root_node->doc, cur_sub_sub_node->xmlChildrenNode, 1);
00081         }
00082       md5(digest, item_link);
00083       if (!find_in_file(fd, digest))
00084       {
00085        command=replace_fields(config[config_item_i].action, channel_link, channel_title, channel_desc, item_link, item_title, item_desc);
00086        execute(command);
00087        free(command);
00088       }
00089       if (fd_new != NULL) fprintf(fd_new, "%s\n", digest);
00090       free(item_link); free(item_title); free(item_desc); item_link = item_title = item_desc = NULL;
00091      }
00092      else if (!strcmp(cur_sub_node->name, "link") && strstr(config[config_item_i].action, "{L}"))
00093       channel_link = xmlNodeListGetString(root_node->doc, cur_sub_node->xmlChildrenNode, 1);
00094      else if (!strcmp(cur_sub_node->name, "title") && strstr(config[config_item_i].action, "{T}"))
00095       channel_title = xmlNodeListGetString(root_node->doc, cur_sub_node->xmlChildrenNode, 1);
00096      else if (!strcmp(cur_sub_node->name, "description") && strstr(config[config_item_i].action, "{D}"))
00097       channel_desc = xmlNodeListGetString(root_node->doc, cur_sub_node->xmlChildrenNode, 1);
00098     }
00099  if (fd != NULL) fclose(fd);
00100  if (fd_new != NULL)
00101  {
00102   fclose(fd_new);
00103   rename(filename_new, filename);
00104  }
00105  free(channel_link); free(channel_title); free(channel_title);
00106  free(filename); free(filename_new); free(digest);
00107 }
00108 
00109 time_t string_to_time(char *data)
00110 {
00111  struct tm tm;
00112  if (strptime(data, "%a, %d %b %Y %H:%M:%S", &tm) == NULL) return -1; 
00113  tm.tm_isdst = -1;
00114  return mktime(&tm);
00115 }
00116 
00117 enum HTTP_STATE process_header(char *data, int len, int config_item_i)
00118 {
00119  size_t pos, _len, cookie_pos;
00120  int cookie_i, i;
00121  time_t server_time = time(NULL), time_diff = 0;  /* time_diff = localtime - servertime */
00122  char *url, *host, *request;
00123  uint16_t port;
00124 
00125  if (len < 16) return HTTP_STATE_ERROR; /* 16 = strlen("HTTP/1.x xxx") */
00126  if (strncmp("HTTP/1.", data, 7)) return HTTP_STATE_ERROR; /* are there any HTTP/0.9 servers? ;-) */
00127  pos = 9;
00128  switch(data[pos])
00129  {
00130   case '2':
00131    if (data[pos+2] == '4') return HTTP_STATE_ERROR; /* HTTP/1.0 204 No Content */
00132    while ((pos = (size_t)strchr(data+pos, '\n')) && (pos = pos-(size_t)data+1) <= len)
00133    {
00134     if (!strncmp("Date:", data+pos, 5))
00135     {
00136      server_time = ((server_time = string_to_time(data+pos+5)) != -1)?(server_time):(time(NULL));
00137      time_diff = time(NULL) - server_time;
00138     }
00139     else if (!strncmp("Set-Cookie:", data+pos, 11)) 
00140     {
00141      if (strchr(data+pos+11, '\r') && strchr(data+pos+11, '\n'))
00142       _len = MIN(strchr(data+pos+11, ';'), MIN(strchr(data+pos+11, '\r'), strchr(data+pos+11, '\n')))-(data+pos+11);
00143      else if (strchr(data+pos+11, '\r'))
00144       _len = MIN(strchr(data+pos+11, ';'), strchr(data+pos+11, '\r'))-(data+pos+11);
00145      else
00146       _len = MIN(strchr(data+pos+11, ';'), strchr(data+pos+11, '\n'))-(data+pos+11);
00147      cookie_i = -1;
00148      for (i = 0; i < config[config_item_i].cookies_count; ++i)
00149       if (!strncmp(data+pos+11, config[config_item_i].cookies[i].data, strchr(data+pos+11, '=')-(data+pos+11)))
00150        cookie_i = i;
00151      if (cookie_i == -1)
00152      {
00153       cookie_i = config[config_item_i].cookies_count++;
00154       config[config_item_i].cookies = (struct cookie *)realloc(config[config_item_i].cookies, config[config_item_i].cookies_count*sizeof(struct cookie));
00155      }
00156      else
00157       free(config[config_item_i].cookies[cookie_i].data);
00158      config[config_item_i].cookies[cookie_i].data = (char *)malloc(_len+1);
00159      strlcpy(config[config_item_i].cookies[cookie_i].data, data+pos+11, _len+1);
00160      cookie_pos = ((cookie_pos = (size_t)strstr(data+pos+11, "expires="))?(cookie_pos-(size_t)data):(0));
00161      if (cookie_pos && cookie_pos < (size_t)strchr(data+pos, '\n'))
00162      {
00163       if ((config[config_item_i].cookies[cookie_i].expires = string_to_time(data+cookie_pos+8)) != -1)
00164        config[config_item_i].cookies[cookie_i].expires += time_diff;
00165      }
00166      else
00167       config[config_item_i].cookies[cookie_i].expires = -1;
00168      DEBUG4("process_header: %d cookie: %s, expires: %d", cookie_i, config[config_item_i].cookies[cookie_i].data, (int)config[config_item_i].cookies[cookie_i].expires)
00169     }
00170    }
00171    return HTTP_STATE_OK;
00172   case '3':
00173    while ((pos = (size_t)strchr(data+pos, '\n')) && (pos = pos-(size_t)data+1) <= len)
00174    {
00175     if (!strncmp("Location:", data+pos, 9))
00176     {
00177      _len = MIN(strchr(data+pos+9, '\r'), strchr(data+pos+9, '\n'))-(data+pos+9);
00178      pos+=9; while (data[pos] == ' ') ++pos;
00179      _len = MIN(strchr(data+pos, '\r'), strchr(data+pos, '\n'))-(data+pos);
00180      url = (char *)malloc(_len+1);
00181      strlcpy(url, data+pos, _len+1);
00182      DEBUG2("process_header: Redirect: %s", url)
00183      if (parse_url(url, &host, &port, &request) == -1)
00184      {
00185       free(url); free(host); free(request); return HTTP_STATE_ERROR;
00186      }
00187      if (strcmp(host, config[config_item_i].host))
00188      {
00189       for (; config[config_item_i].cookies_count; --config[config_item_i].cookies_count)
00190        free(config[config_item_i].cookies[config[config_item_i].cookies_count-1].data);
00191       free(config[config_item_i].cookies);
00192      }
00193      config[config_item_i].host = realloc(config[config_item_i].host, strlen(host)+1);
00194      strcpy(config[config_item_i].host, host);
00195      config[config_item_i].port = port;
00196      config[config_item_i].request = realloc(config[config_item_i].request, strlen(request)+1);
00197      strcpy(config[config_item_i].request, request);
00198      free(url); free(host); free(request);
00199      return HTTP_STATE_REDIRECT;
00200     }
00201    }
00202   default:
00203    return HTTP_STATE_ERROR;
00204  }
00205 }
00206 
00207 void *fetch_thread(void *parameters)
00208 {
00209  char *content, *header, *request;
00210  int contentsize, i, len;
00211  char *pos_rn, *pos_n, *pos_c; /* positions of "\r\n\r\n", "\n\n", content */
00212  xmlDoc *xml_doc;
00213  xmlNode *xml_node;
00214 
00215 fetch_thread_loop:
00216  request = (char *)malloc(strlen(config[(size_t)parameters].request)+1);
00217  strcpy(request, config[(size_t)parameters].request);
00218  len = 0;
00219  for (i = 0; i<config[(size_t)parameters].cookies_count; ++i)
00220   if (config[(size_t)parameters].cookies[i].expires == -1 || config[(size_t)parameters].cookies[i].expires >= time(NULL))
00221   {
00222    if (!len)
00223    {
00224     len = strlen(config[(size_t)parameters].cookies[i].data)+11;
00225     request = (char *)realloc(request, strlen(config[(size_t)parameters].request)+len);
00226     strcat(request, "Cookie: ");
00227     strcat(request, config[(size_t)parameters].cookies[i].data);
00228    }
00229    else
00230    {
00231     len += strlen(config[(size_t)parameters].cookies[i].data)+2;
00232     request = (char *)realloc(request, strlen(config[(size_t)parameters].request)+len);
00233     strcat(request, "; ");
00234     strcat(request, config[(size_t)parameters].cookies[i].data);
00235    }
00236   }
00237  if (len) strcat(request, "\r\n");
00238  DEBUG2("Request: %s", request);
00239  if ((content = fetch(&contentsize, config[(size_t)parameters].host, config[(size_t)parameters].port, request)) == NULL) goto skip;
00240  free(request);
00241  pos_rn = strstr(content, "\r\n\r\n");
00242  pos_n = strstr(content, "\n\n");
00243  if ((pos_rn != NULL) && (pos_n != NULL))
00244   pos_c = MIN(pos_rn+4, pos_n+2);
00245  else
00246   if ((pos_rn == NULL) && (pos_n == NULL))
00247    pos_c = content; /* this shouln't happen, but... ;-) */
00248   else
00249    if (pos_rn != NULL)
00250     pos_c = pos_rn+4;
00251    else
00252     pos_c = pos_n+2;
00253 
00254  header = (char *)malloc(pos_c-content+1);
00255  strlcpy(header, content, pos_c-content+1);
00256  switch (process_header(header, pos_c-content, (size_t)parameters))
00257  {
00258   case HTTP_STATE_ERROR: /* error - wait and try again */
00259    free(header);
00260    goto skip;
00261   case HTTP_STATE_REDIRECT: /* redirection - go now to the new location */
00262    free(content);
00263    free(header);
00264    goto fetch_thread_loop;
00265   case HTTP_STATE_OK:
00266    free(header);
00267  }
00268 
00269  LIBXML_TEST_VERSION
00270 
00271  xml_doc = xmlReadMemory(pos_c, content+contentsize-pos_c, NULL, NULL, XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
00272  if (xml_doc != NULL)
00273  {
00274   xml_node = xmlDocGetRootElement(xml_doc);
00275   process_rss(xml_node, (size_t)parameters);
00276   xmlFreeDoc(xml_doc);
00277   xmlCleanupParser();
00278  }
00279 skip:
00280  free(content);
00281  DEBUG2("fetch_thread: sleeping %dsec...", config[(size_t)parameters].freq)
00282  sleep(config[(size_t)parameters].freq);
00283  goto fetch_thread_loop;
00284 }
00285 
00286 char *fetch(int *responsesize, const char *host, const uint16_t port, const char *request)
00287 {
00288  DEBUG1("fetch: start")
00289 
00290  int fetch_socket;
00291  int w, r; /* wrote, read */
00292  char *buffer = (char *)malloc(FETCH_BUFFER_SIZE);
00293  char *buffer2 = (char *)malloc(FETCH_BUFFER_SIZE);
00294  struct sockaddr_in address;
00295  struct hostent *h;
00296  
00297  *responsesize=-1;
00298  DEBUG1("fetch: opening socket");
00299  if ((fetch_socket = socket(AF_INET, SOCK_STREAM, 0)) <= 0) return NULL;
00300  DEBUG1("fetch: socket open")
00301  address.sin_family = AF_INET;
00302  address.sin_port = htons(port);
00303  DEBUG2("fetch: resolving %s", host);
00304  if ((h = gethostbyname(host)) == NULL) return NULL;
00305  memcpy(&address.sin_addr, *(h->h_addr_list), sizeof(address.sin_addr));
00306  if (connect(fetch_socket, (struct sockaddr *)&address, sizeof(address)) != 0) return NULL;
00307  DEBUG1("fetch: connection accepted")
00308  w = 0;
00309  while (w < strlen(request))
00310  {
00311   send(fetch_socket, request+w, (WRITE_BUFFER_SIZE < strlen(request+w))?(WRITE_BUFFER_SIZE):(strlen(request+w)), 0);
00312   w+=WRITE_BUFFER_SIZE;
00313  }
00314  send(fetch_socket, "\r\n", 2, 0);
00315  *responsesize=0;
00316  while ((r = read(fetch_socket, buffer, FETCH_BUFFER_SIZE)) > 0)
00317  {
00318   *responsesize+=r;
00319   buffer2=(char *)realloc(buffer2, (*responsesize)+1);
00320   strncat(buffer2, buffer, r);
00321  }
00322  close(fetch_socket);
00323  free(buffer);
00324 
00325  DEBUG1("fetch: end")
00326  return buffer2;
00327 }

Generated on Fri May 26 08:40:18 2006 for RSSbgr by doxygen 1.3.6