00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00026 #include "fetch.h"
00027
00033 int find_in_file(FILE *fd, char *string)
00034 {
00035 char line[32];
00036
00037 DEBUG3("find_in_file: %p, %s", fd, string);
00038 if (fd == NULL) return 0;
00039 rewind(fd);
00040 while(!feof(fd))
00041 {
00042 fscanf(fd, "%s\n", line);
00043 if (!strcmp(line, string))
00044 return 1;
00045 }
00046 return 0;
00047 }
00048
00049 void process_rss(xmlNode *root_node, int config_item_i)
00050 {
00051 xmlNode *cur_node, *cur_sub_node, *cur_sub_sub_node;
00052 char *channel_link = NULL, *channel_title = NULL, *channel_desc = NULL;
00053 char *item_link = NULL, *item_title = NULL, *item_desc = NULL;
00054 char *filename, *filename_new;
00055 char *digest = (char *)malloc(33);
00056 char *command;
00057 FILE *fd, *fd_new;
00058
00059 filename = (char *)malloc(PATH_MAX);
00060 md5(digest, config[config_item_i].url);
00061 sprintf(filename, "%s/%s", directory, digest);
00062 filename_new = (char *)malloc(PATH_MAX);
00063 sprintf(filename_new, "%s.new", filename);
00064 fd = fopen(filename, "r");
00065 fd_new = fopen(filename_new, "w");
00066 if (!strcmp(root_node->name, "rss"))
00067 for (cur_node = root_node->children; cur_node != NULL; cur_node = cur_node->next)
00068 if (!strcmp(cur_node->name, "channel"))
00069 for (cur_sub_node = cur_node->children; cur_sub_node != NULL; cur_sub_node = cur_sub_node->next)
00070 {
00071 if (!strcmp(cur_sub_node->name, "item"))
00072 {
00073 for (cur_sub_sub_node = cur_sub_node->children; cur_sub_sub_node != NULL; cur_sub_sub_node = cur_sub_sub_node->next)
00074 {
00075 if (!strcmp(cur_sub_sub_node->name, "link"))
00076 item_link = xmlNodeListGetString(root_node->doc, cur_sub_sub_node->xmlChildrenNode, 1);
00077 else if (!strcmp(cur_sub_sub_node->name, "title") && strstr(config[config_item_i].action, "{t}"))
00078 item_title = xmlNodeListGetString(root_node->doc, cur_sub_sub_node->xmlChildrenNode, 1);
00079 else if (!strcmp(cur_sub_sub_node->name, "description") && strstr(config[config_item_i].action, "{d}"))
00080 item_desc = xmlNodeListGetString(root_node->doc, cur_sub_sub_node->xmlChildrenNode, 1);
00081 }
00082 md5(digest, item_link);
00083 if (!find_in_file(fd, digest))
00084 {
00085 command=replace_fields(config[config_item_i].action, channel_link, channel_title, channel_desc, item_link, item_title, item_desc);
00086 execute(command);
00087 free(command);
00088 }
00089 if (fd_new != NULL) fprintf(fd_new, "%s\n", digest);
00090 free(item_link); free(item_title); free(item_desc); item_link = item_title = item_desc = NULL;
00091 }
00092 else if (!strcmp(cur_sub_node->name, "link") && strstr(config[config_item_i].action, "{L}"))
00093 channel_link = xmlNodeListGetString(root_node->doc, cur_sub_node->xmlChildrenNode, 1);
00094 else if (!strcmp(cur_sub_node->name, "title") && strstr(config[config_item_i].action, "{T}"))
00095 channel_title = xmlNodeListGetString(root_node->doc, cur_sub_node->xmlChildrenNode, 1);
00096 else if (!strcmp(cur_sub_node->name, "description") && strstr(config[config_item_i].action, "{D}"))
00097 channel_desc = xmlNodeListGetString(root_node->doc, cur_sub_node->xmlChildrenNode, 1);
00098 }
00099 if (fd != NULL) fclose(fd);
00100 if (fd_new != NULL)
00101 {
00102 fclose(fd_new);
00103 rename(filename_new, filename);
00104 }
00105 free(channel_link); free(channel_title); free(channel_title);
00106 free(filename); free(filename_new); free(digest);
00107 }
00108
00109 time_t string_to_time(char *data)
00110 {
00111 struct tm tm;
00112 if (strptime(data, "%a, %d %b %Y %H:%M:%S", &tm) == NULL) return -1;
00113 tm.tm_isdst = -1;
00114 return mktime(&tm);
00115 }
00116
00117 enum HTTP_STATE process_header(char *data, int len, int config_item_i)
00118 {
00119 size_t pos, _len, cookie_pos;
00120 int cookie_i, i;
00121 time_t server_time = time(NULL), time_diff = 0;
00122 char *url, *host, *request;
00123 uint16_t port;
00124
00125 if (len < 16) return HTTP_STATE_ERROR;
00126 if (strncmp("HTTP/1.", data, 7)) return HTTP_STATE_ERROR;
00127 pos = 9;
00128 switch(data[pos])
00129 {
00130 case '2':
00131 if (data[pos+2] == '4') return HTTP_STATE_ERROR;
00132 while ((pos = (size_t)strchr(data+pos, '\n')) && (pos = pos-(size_t)data+1) <= len)
00133 {
00134 if (!strncmp("Date:", data+pos, 5))
00135 {
00136 server_time = ((server_time = string_to_time(data+pos+5)) != -1)?(server_time):(time(NULL));
00137 time_diff = time(NULL) - server_time;
00138 }
00139 else if (!strncmp("Set-Cookie:", data+pos, 11))
00140 {
00141 if (strchr(data+pos+11, '\r') && strchr(data+pos+11, '\n'))
00142 _len = MIN(strchr(data+pos+11, ';'), MIN(strchr(data+pos+11, '\r'), strchr(data+pos+11, '\n')))-(data+pos+11);
00143 else if (strchr(data+pos+11, '\r'))
00144 _len = MIN(strchr(data+pos+11, ';'), strchr(data+pos+11, '\r'))-(data+pos+11);
00145 else
00146 _len = MIN(strchr(data+pos+11, ';'), strchr(data+pos+11, '\n'))-(data+pos+11);
00147 cookie_i = -1;
00148 for (i = 0; i < config[config_item_i].cookies_count; ++i)
00149 if (!strncmp(data+pos+11, config[config_item_i].cookies[i].data, strchr(data+pos+11, '=')-(data+pos+11)))
00150 cookie_i = i;
00151 if (cookie_i == -1)
00152 {
00153 cookie_i = config[config_item_i].cookies_count++;
00154 config[config_item_i].cookies = (struct cookie *)realloc(config[config_item_i].cookies, config[config_item_i].cookies_count*sizeof(struct cookie));
00155 }
00156 else
00157 free(config[config_item_i].cookies[cookie_i].data);
00158 config[config_item_i].cookies[cookie_i].data = (char *)malloc(_len+1);
00159 strlcpy(config[config_item_i].cookies[cookie_i].data, data+pos+11, _len+1);
00160 cookie_pos = ((cookie_pos = (size_t)strstr(data+pos+11, "expires="))?(cookie_pos-(size_t)data):(0));
00161 if (cookie_pos && cookie_pos < (size_t)strchr(data+pos, '\n'))
00162 {
00163 if ((config[config_item_i].cookies[cookie_i].expires = string_to_time(data+cookie_pos+8)) != -1)
00164 config[config_item_i].cookies[cookie_i].expires += time_diff;
00165 }
00166 else
00167 config[config_item_i].cookies[cookie_i].expires = -1;
00168 DEBUG4("process_header: %d cookie: %s, expires: %d", cookie_i, config[config_item_i].cookies[cookie_i].data, (int)config[config_item_i].cookies[cookie_i].expires)
00169 }
00170 }
00171 return HTTP_STATE_OK;
00172 case '3':
00173 while ((pos = (size_t)strchr(data+pos, '\n')) && (pos = pos-(size_t)data+1) <= len)
00174 {
00175 if (!strncmp("Location:", data+pos, 9))
00176 {
00177 _len = MIN(strchr(data+pos+9, '\r'), strchr(data+pos+9, '\n'))-(data+pos+9);
00178 pos+=9; while (data[pos] == ' ') ++pos;
00179 _len = MIN(strchr(data+pos, '\r'), strchr(data+pos, '\n'))-(data+pos);
00180 url = (char *)malloc(_len+1);
00181 strlcpy(url, data+pos, _len+1);
00182 DEBUG2("process_header: Redirect: %s", url)
00183 if (parse_url(url, &host, &port, &request) == -1)
00184 {
00185 free(url); free(host); free(request); return HTTP_STATE_ERROR;
00186 }
00187 if (strcmp(host, config[config_item_i].host))
00188 {
00189 for (; config[config_item_i].cookies_count; --config[config_item_i].cookies_count)
00190 free(config[config_item_i].cookies[config[config_item_i].cookies_count-1].data);
00191 free(config[config_item_i].cookies);
00192 }
00193 config[config_item_i].host = realloc(config[config_item_i].host, strlen(host)+1);
00194 strcpy(config[config_item_i].host, host);
00195 config[config_item_i].port = port;
00196 config[config_item_i].request = realloc(config[config_item_i].request, strlen(request)+1);
00197 strcpy(config[config_item_i].request, request);
00198 free(url); free(host); free(request);
00199 return HTTP_STATE_REDIRECT;
00200 }
00201 }
00202 default:
00203 return HTTP_STATE_ERROR;
00204 }
00205 }
00206
00207 void *fetch_thread(void *parameters)
00208 {
00209 char *content, *header, *request;
00210 int contentsize, i, len;
00211 char *pos_rn, *pos_n, *pos_c;
00212 xmlDoc *xml_doc;
00213 xmlNode *xml_node;
00214
00215 fetch_thread_loop:
00216 request = (char *)malloc(strlen(config[(size_t)parameters].request)+1);
00217 strcpy(request, config[(size_t)parameters].request);
00218 len = 0;
00219 for (i = 0; i<config[(size_t)parameters].cookies_count; ++i)
00220 if (config[(size_t)parameters].cookies[i].expires == -1 || config[(size_t)parameters].cookies[i].expires >= time(NULL))
00221 {
00222 if (!len)
00223 {
00224 len = strlen(config[(size_t)parameters].cookies[i].data)+11;
00225 request = (char *)realloc(request, strlen(config[(size_t)parameters].request)+len);
00226 strcat(request, "Cookie: ");
00227 strcat(request, config[(size_t)parameters].cookies[i].data);
00228 }
00229 else
00230 {
00231 len += strlen(config[(size_t)parameters].cookies[i].data)+2;
00232 request = (char *)realloc(request, strlen(config[(size_t)parameters].request)+len);
00233 strcat(request, "; ");
00234 strcat(request, config[(size_t)parameters].cookies[i].data);
00235 }
00236 }
00237 if (len) strcat(request, "\r\n");
00238 DEBUG2("Request: %s", request);
00239 if ((content = fetch(&contentsize, config[(size_t)parameters].host, config[(size_t)parameters].port, request)) == NULL) goto skip;
00240 free(request);
00241 pos_rn = strstr(content, "\r\n\r\n");
00242 pos_n = strstr(content, "\n\n");
00243 if ((pos_rn != NULL) && (pos_n != NULL))
00244 pos_c = MIN(pos_rn+4, pos_n+2);
00245 else
00246 if ((pos_rn == NULL) && (pos_n == NULL))
00247 pos_c = content;
00248 else
00249 if (pos_rn != NULL)
00250 pos_c = pos_rn+4;
00251 else
00252 pos_c = pos_n+2;
00253
00254 header = (char *)malloc(pos_c-content+1);
00255 strlcpy(header, content, pos_c-content+1);
00256 switch (process_header(header, pos_c-content, (size_t)parameters))
00257 {
00258 case HTTP_STATE_ERROR:
00259 free(header);
00260 goto skip;
00261 case HTTP_STATE_REDIRECT:
00262 free(content);
00263 free(header);
00264 goto fetch_thread_loop;
00265 case HTTP_STATE_OK:
00266 free(header);
00267 }
00268
00269 LIBXML_TEST_VERSION
00270
00271 xml_doc = xmlReadMemory(pos_c, content+contentsize-pos_c, NULL, NULL, XML_PARSE_NOERROR | XML_PARSE_NOWARNING);
00272 if (xml_doc != NULL)
00273 {
00274 xml_node = xmlDocGetRootElement(xml_doc);
00275 process_rss(xml_node, (size_t)parameters);
00276 xmlFreeDoc(xml_doc);
00277 xmlCleanupParser();
00278 }
00279 skip:
00280 free(content);
00281 DEBUG2("fetch_thread: sleeping %dsec...", config[(size_t)parameters].freq)
00282 sleep(config[(size_t)parameters].freq);
00283 goto fetch_thread_loop;
00284 }
00285
00286 char *fetch(int *responsesize, const char *host, const uint16_t port, const char *request)
00287 {
00288 DEBUG1("fetch: start")
00289
00290 int fetch_socket;
00291 int w, r;
00292 char *buffer = (char *)malloc(FETCH_BUFFER_SIZE);
00293 char *buffer2 = (char *)malloc(FETCH_BUFFER_SIZE);
00294 struct sockaddr_in address;
00295 struct hostent *h;
00296
00297 *responsesize=-1;
00298 DEBUG1("fetch: opening socket");
00299 if ((fetch_socket = socket(AF_INET, SOCK_STREAM, 0)) <= 0) return NULL;
00300 DEBUG1("fetch: socket open")
00301 address.sin_family = AF_INET;
00302 address.sin_port = htons(port);
00303 DEBUG2("fetch: resolving %s", host);
00304 if ((h = gethostbyname(host)) == NULL) return NULL;
00305 memcpy(&address.sin_addr, *(h->h_addr_list), sizeof(address.sin_addr));
00306 if (connect(fetch_socket, (struct sockaddr *)&address, sizeof(address)) != 0) return NULL;
00307 DEBUG1("fetch: connection accepted")
00308 w = 0;
00309 while (w < strlen(request))
00310 {
00311 send(fetch_socket, request+w, (WRITE_BUFFER_SIZE < strlen(request+w))?(WRITE_BUFFER_SIZE):(strlen(request+w)), 0);
00312 w+=WRITE_BUFFER_SIZE;
00313 }
00314 send(fetch_socket, "\r\n", 2, 0);
00315 *responsesize=0;
00316 while ((r = read(fetch_socket, buffer, FETCH_BUFFER_SIZE)) > 0)
00317 {
00318 *responsesize+=r;
00319 buffer2=(char *)realloc(buffer2, (*responsesize)+1);
00320 strncat(buffer2, buffer, r);
00321 }
00322 close(fetch_socket);
00323 free(buffer);
00324
00325 DEBUG1("fetch: end")
00326 return buffer2;
00327 }