/* ** Copyright (c) 2001 The OSSP Project ** Copyright (c) 2001 Cable & Wireless Deutschland ** ** This file is part of OSSP lmtp2nntp, an LMTP speaking local ** mailer which forwards mails as Usenet news articles via NNTP. ** It can be found at http://www.ossp.org/pkg/lmtp2nntp/. ** ** This program is free software; you can redistribute it and/or ** modify it under the terms of the GNU General Public License ** as published by the Free Software Foundation; either version ** 2.0 of the License, or (at your option) any later version. ** ** This program is distributed in the hope that it will be useful, ** but WITHOUT ANY WARRANTY; without even the implied warranty of ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ** General Public License for more details. ** ** You should have received a copy of the GNU General Public License ** along with this file; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 ** USA, or contact the OSSP project . ** ** msg.c: mail message manipulation library */ #include #include #include "msg.h" #include "str.h" #include "argz.h" /* third party */ #include "l2.h" #ifdef HAVE_CONFIG_H #include "config.h" #endif #if defined(HAVE_DMALLOC_H) && defined(DMALLOC) #include "dmalloc.h" #endif msg_t *msg_create(void) { msg_t *msg; if ((msg = (msg_t *)malloc(sizeof(msg_t))) == NULL) return NULL; msg->azEnvgroups = NULL; msg->asEnvgroups = 0; msg->cpMsg = NULL; msg->azHeaders = NULL; msg->asHeaders = 0; msg->cpFid = NULL; msg->cpBody = NULL; msg->cpMsgid = NULL; msg->mail_from = NULL; msg->azRcpt = NULL; msg->asRcpt = 0; msg->azNewsgroups = NULL; msg->asNewsgroups = 0; msg->l2 = NULL; /* this is a copy only */ return msg; } void msg_destroy(msg_t *msg) { if (msg == NULL) return; if (msg->azEnvgroups != NULL) free(msg->azEnvgroups); if (msg->cpMsg != NULL) free(msg->cpMsg); if (msg->azHeaders != NULL) free(msg->azHeaders); if (msg->cpFid != NULL) free(msg->cpFid); if (msg->cpBody != NULL) free(msg->cpBody); if (msg->cpMsgid != NULL) free(msg->cpMsgid); if (msg->mail_from != NULL) free(msg->mail_from); if (msg->azRcpt != NULL) free(msg->azRcpt); if (msg->azNewsgroups != NULL) free(msg->azNewsgroups); msg->l2 = NULL; /* this is a copy only, the "parent" needs to clean this up */ free(msg); return; } msg_rc_t msg_split(msg_t *msg) { char *cpName; char *cpValue; char *cpRem; /* Remainder */ char *cp; char *cpHeaders; /* INPUTS * * msg->cpMsg * must contain the wholly RFC0822 formatted message with native * (unescaped) dots at the beginning of a line, the 'From ' envelope, * headers, double newline, body, NUL, no trailing dot; * * OUTPUTS * * msg->cpMsg * free()d and set to NULL * * msg->azHeaders, msg->asHeaders contains the headers in argz format, one * logical NUL-terminated line per header which might be wrapped into * multiple '\n'-ended physical lines. The "From " envelope, "Received:", * "Path:", "To:" and "Cc:" headers are removed silently. The * "Newsgroups:" and "Message-ID" headers are removed and their values are * stored in separate structures (see below). * * msg->cpBody * contains the unmodified body of the message, NUL-terminated, no * trailing dot. * * msg->cpMsgid * contains the message id including surrounding angle brackets. * * msg->azNewsgroups, asNewsgroups * is a argz-type array of strings containing the Newsgroups based on the * header information. */ log0(msg, DEBUG, "split message into header and body"); if (str_parse(msg->cpMsg, "m/((?:.*?)\\n)\\n(.*)$/s", &cpHeaders, &msg->cpBody) <= 0) return MSG_ERR_SPLITHEADBODY; free(msg->cpMsg); msg->cpMsg = NULL; log0(msg, DEBUG, "replace envelope From w/o colon by X-F: pseudotag"); /* This eliminates the special case of having one header, which is really * an embedded envelope, not ending with a colon while all others do. * After splitting headers into name and value pairs this envelope ist * stripped off. */ if (strncasecmp(cpHeaders, "From", 4) == 0) memcpy(cpHeaders, "X-F:", 4); log0(msg, DEBUG, "unwrap header lines"); /* poor man's s///g simulator as current str library doesn't support global substitution */ while (str_parse(cpHeaders, "s/(.*?)\\n[ \\t]+(.*)/$1 $2/s", &cpRem) > 0) { free(cpHeaders); cpHeaders = cpRem; } log0(msg, DEBUG, "split header lines into names and values"); while (str_parse(cpHeaders, "m/^[> \\t]*([\\x21-\\x7e]+?:)[ \\t]*([^\\n]*?)[ \\t]*\\n(.*)/s", &cpName, &cpValue, &cpRem) > 0) { free(cpHeaders); cpHeaders = cpRem; argz_add(&msg->azHeaders, &msg->asHeaders, cpName); argz_add(&msg->azHeaders, &msg->asHeaders, cpValue); free(cpName); free(cpValue); } log0(msg, DEBUG, "check for headers we care about and do whatever neccessary"); msg->cpMsgid = NULL; msg->azNewsgroups = NULL; msg->asNewsgroups = 0; cp = msg->azHeaders; while (cp != NULL) { log1(msg, DEBUG, "processing header \"%s\"", cp); if (strcasecmp("X-F:", cp) == 0) { argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del name */ argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del value */ continue; } if (strcasecmp("Path:", cp) == 0) { argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del name */ argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del value */ continue; } if (strcasecmp("Received:", cp) == 0) { argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del name */ if ((msg->cpFid == NULL) && (str_parse(cp, "m/\\sid\\s+cpFid) > 0)) log1(msg, DEBUG, "found foreign-ID \"%s\" for logging", msg->cpFid); argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del value */ continue; } if (strcasecmp("To:", cp) == 0) { argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del name */ argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del value */ continue; } if (strcasecmp("Cc:", cp) == 0) { argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del name */ argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del value */ continue; } if (strcasecmp("Message-ID:", cp) == 0) { if (msg->cpMsgid != NULL) return MSG_ERR_SPLITIDMULTI; argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del name */ if ((cp == NULL) || (strlen(cp) == 0)) /* get value */ return MSG_ERR_SPLITIDEMPTY; if ((msg->cpMsgid = strdup(cp)) == NULL) return MSG_ERR_MEM; argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del value */ continue; } if (strcasecmp("Newsgroups:", cp) == 0) { argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del name */ if (argz_add(&msg->azNewsgroups, &msg->asNewsgroups, cp) != 0) /* get value */ return MSG_ERR_MEM; argz_delete(&msg->azHeaders, &msg->asHeaders, cp); /* del value */ continue; } if ((cp = argz_next(msg->azHeaders, msg->asHeaders, cp)) == NULL) /* next value */ break; if ((cp = argz_next(msg->azHeaders, msg->asHeaders, cp)) == NULL) /* next name */ break; } log0(msg, DEBUG, "checking Message-ID"); if (msg->cpMsgid == NULL) return MSG_ERR_SPLITIDNONE; log0(msg, DEBUG, "checking Newsgroups"); if (msg->azNewsgroups != NULL) { argz_stringify(msg->azNewsgroups, msg->asNewsgroups, ','); if (argz_create_sep(msg->azNewsgroups, ',', &msg->azNewsgroups, &msg->asNewsgroups) != 0) return MSG_ERR_MEM; } log0(msg, DEBUG, "adding mandatory Path: header"); argz_add(&msg->azHeaders, &msg->asHeaders, "Path:"); argz_add(&msg->azHeaders, &msg->asHeaders, "lmtp2nntp!not-for-mail"); log0(msg, DEBUG, "split complete"); return MSG_OK; } msg_rc_t msg_join(msg_t *msg) { char *cp; char *cpRem; char **aHeaders; int i; int o; char *cpCut; char *cpWrap; char c; char cOld; int n; char *cpHeaders; char *azNewheaders; size_t asNewheaders; log0(msg, DEBUG, "verify Newsgroups"); if (msg->azNewsgroups == NULL) return MSG_ERR_JOINGROUPNONE; argz_stringify(msg->azNewsgroups, msg->asNewsgroups, ','); if (strlen(msg->azNewsgroups) == 0) return MSG_ERR_JOINGROUPEMPTY; argz_add(&msg->azHeaders, &msg->asHeaders, "Newsgroups:"); argz_add(&msg->azHeaders, &msg->asHeaders, msg->azNewsgroups); log0(msg, DEBUG, "verify Message-ID"); if (msg->cpMsgid == NULL) return MSG_ERR_JOINIDNONE; if (strlen(msg->cpMsgid) == 0) return MSG_ERR_JOINIDEMPTY; argz_add(&msg->azHeaders, &msg->asHeaders, "Message-ID:"); argz_add(&msg->azHeaders, &msg->asHeaders, msg->cpMsgid); log0(msg, DEBUG, "merge name/value pairs into single string"); argz_add(&msg->azHeaders, &msg->asHeaders, ""); /* append empty string */ if ((aHeaders = (char **)malloc((argz_count(msg->azHeaders, msg->asHeaders) + 1) * sizeof(char *))) == NULL) return MSG_ERR_MEM; argz_extract(msg->azHeaders, msg->asHeaders, aHeaders); /* replace the trailing NUL, which is *(cp-1) of the predecessor, with a * space at every second string. Break action when terminating NULL string * is detected */ i=0; while(1) { if ((cp = aHeaders[++i]) == NULL) break; *(cp-1) = ' '; if ((cp = aHeaders[++i]) == NULL) break; } free(aHeaders); log0(msg, DEBUG, "fold headers"); /* A logical line is split into one or more physical '\n'-terminated * lines. The physical line is never longer than WRAPAT characters. This * includes the folded data and the header name + colon + space for the * first line and WRAPUSING string prefix for all other lines. Leading and * trailing blanks of folded lines are removed while blanks inside the * line are preserved. The header is never left alone in a physical line. * Fragments exceeding WRAPAT characters without having a blank as a * splitting point are forcibly cut at a non-blank character. */ azNewheaders = NULL; asNewheaders = 0; cp = NULL; while ((cp = argz_next(msg->azHeaders, msg->asHeaders, cp)) != NULL) { if (strlen(cp) > WRAPAT) { cpRem = cp; cpWrap = NULL; for (o = 0; (cpRem[o] != ':') && (cpRem[o] != NUL); o++); /* offset name so at least one char of value remains in first line */ o += 2; /* skip ": " */ while ((strlen(cpRem) + (cpWrap == NULL ? 0 : strlen(WRAPUSING))) > WRAPAT) { for (i = WRAPAT - 1 - (cpWrap == NULL ? 0 : strlen(WRAPUSING)); (i >= o) && !isspace((int)cpRem[i]); i--); if (i < o) i = WRAPAT - 1 - (cpWrap == NULL ? 0 : strlen(WRAPUSING) - 1); /* sorry, forced cut at non-blank */ cpCut = cpRem; cpRem += i; for (; (isspace((int)*cpRem) && (*cpRem != NUL)); cpRem++); /* skip next lines leading blanks */ for (; (i >= o) && isspace((int)cpCut[i-1]); i--); /* chop off this lines trailing blanks */ if (i >= o) { /* only keep line fragment if some non-blanks inside */ if (cpWrap == NULL) { if ((cpWrap = (char *)malloc(i+strlen(WRAPUSING)+1)) == NULL) return MSG_ERR_MEM; *cpWrap = NUL; o = 1; } else { if ((cpWrap = (char *)realloc(cpWrap, strlen(cpWrap)+i+strlen(WRAPUSING)+1)) == NULL) return MSG_ERR_MEM; strcat(cpWrap, WRAPUSING); } strncat(cpWrap, cpCut, i); } } if (strlen(cpRem) > 0) { if ((cpWrap = (char *)realloc(cpWrap, strlen(cpWrap)+strlen(cpRem)+strlen(WRAPUSING)+1)) == NULL) return MSG_ERR_MEM; strcat(cpWrap, WRAPUSING); strcat(cpWrap, cpRem); } argz_add(&azNewheaders, &asNewheaders, cpWrap); log2(msg, DEBUG, "a folded header \"%{text}D\"", cpWrap, strlen(cpWrap)); free(cpWrap); } else { argz_add(&azNewheaders, &asNewheaders, cp); log2(msg, DEBUG, "verbatim header \"%{text}D\"", cp, strlen(cp)); } } free(msg->azHeaders); msg->azHeaders = azNewheaders; msg->asHeaders = asNewheaders; log0(msg, DEBUG, "strigify headers"); argz_stringify(msg->azHeaders, msg->asHeaders, '\n'); cpHeaders = msg->azHeaders; /******************************************************************** * header + CRLF + body + '.' + CRLF + NUL, replacing NL with CRLF * ********************************************************************/ log0(msg, DEBUG, "assemble header and body"); n = 0; /* count size of headers, reserve space for NL to CRLF conversion */ for (i = 0; ((c = cpHeaders[i]) != NUL); i++) { if (c == '\n') n++; n++; } /* if headers don't end with NL, reserve space for CRLF */ if (i >= 0 && cpHeaders[i - 1] != '\n') n+=2; /* reserve space for CRLF between headers and body */ n+=2; /* count size of body, reserve space for NL-DOT escape and NL to CRLF conversion */ cOld = '\n'; for (i = 0; ((c = msg->cpBody[i]) != NUL); i++) { if (c == '\n') n++; if (c == '.' && cOld == '\n') n++; n++; cOld = c; } /* if body doesn't end with NL, reserve space for CRLF */ if (i >= 0 && msg->cpBody[i - 1] != '\n') n+=2; /* reserve space for terminating '.'-CRLF-NUL at the end of the message */ n+=4; if ((msg->cpMsg = (char *)malloc(n)) == NULL) return MSG_ERR_MEM; n = 0; /* copy headers, do NL to CRLF conversion */ for (i = 0; ((c = cpHeaders[i]) != NUL); i++) { if (c == '\n') msg->cpMsg[n++] = '\r'; msg->cpMsg[n++] = c; } /* if headers don't end with NL, append CRLF */ if (i >= 0 && cpHeaders[i - 1] != '\n') { msg->cpMsg[n++] = '\r'; msg->cpMsg[n++] = '\n'; } /* add CRLF between headers and body */ msg->cpMsg[n++] = '\r'; msg->cpMsg[n++] = '\n'; /* copy body, do NL-DOT escape and NL to CRLF conversion */ cOld = '\n'; for (i = 0; ((c = msg->cpBody[i]) != NUL); i++) { if (c == '\n') msg->cpMsg[n++] = '\r'; if (c == '.' && cOld == '\n') msg->cpMsg[n++] = '.'; msg->cpMsg[n++] = c; cOld = c; } /* if body doesn't end with NL, append CRLF */ if (i >= 0 && msg->cpBody[i - 1] != '\n') { msg->cpMsg[n++] = '\r'; msg->cpMsg[n++] = '\n'; } /* add terminating '.'-CRLF-NUL at the end of the message */ msg->cpMsg[n++] = '.'; msg->cpMsg[n++] = '\r'; msg->cpMsg[n++] = '\n'; msg->cpMsg[n] = NUL; log0(msg, DEBUG, "join complete"); return MSG_OK; } char *msg_error(msg_rc_t rc) { char *str; str = "MSG: no description"; if (rc == MSG_OK ) str = "MSG: no error"; else if (rc == MSG_ERR_MEM ) str = "MSG: memory"; else if (rc == MSG_ERR_SPLITHEADBODY ) str = "MSG: split into header and body failed"; else if (rc == MSG_ERR_SPLITLEN ) str = "MSG: header is too short"; else if (rc == MSG_ERR_SPLITMISSINGFROM ) str = "MSG: header is missing 'From ' envelope"; else if (rc == MSG_ERR_SPLITIDNONE ) str = "MSG: header is missing 'Message-ID'"; else if (rc == MSG_ERR_SPLITIDEMPTY ) str = "MSG: header has empty 'Message-ID'"; else if (rc == MSG_ERR_SPLITIDMULTI ) str = "MSG: header has multiple 'Message-ID's"; else if (rc == MSG_ERR_JOINGROUPNONE ) str = "MSG: join with no 'Newsgroup'"; else if (rc == MSG_ERR_JOINGROUPEMPTY ) str = "MSG: join with empty 'Newsgroup'"; else if (rc == MSG_ERR_JOINIDNONE ) str = "MSG: join with no 'Message-ID'"; else if (rc == MSG_ERR_JOINIDEMPTY ) str = "MSG: join with empty 'Message-ID'"; return str; }