530 lines
17 KiB
Objective-C
530 lines
17 KiB
Objective-C
|
|
#import "MultipartFormDataParser.h"
|
|
#import "DDData.h"
|
|
#import "HTTPLogging.h"
|
|
|
|
#pragma mark log level
|
|
|
|
#ifdef DEBUG
|
|
static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
|
|
#else
|
|
static const int httpLogLevel = HTTP_LOG_LEVEL_WARN;
|
|
#endif
|
|
|
|
#ifdef __x86_64__
|
|
#define FMTNSINT "li"
|
|
#else
|
|
#define FMTNSINT "i"
|
|
#endif
|
|
|
|
|
|
//-----------------------------------------------------------------
|
|
// interface MultipartFormDataParser (private)
|
|
//-----------------------------------------------------------------
|
|
|
|
|
|
@interface MultipartFormDataParser (private)
|
|
+ (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding;
|
|
|
|
- (int) findHeaderEnd:(NSData*) workingData fromOffset:(int) offset;
|
|
- (int) findContentEnd:(NSData*) data fromOffset:(int) offset;
|
|
|
|
- (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(NSUInteger) length encoding:(int) encoding;
|
|
- (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data;
|
|
|
|
- (int) processPreamble:(NSData*) workingData;
|
|
|
|
@end
|
|
|
|
|
|
//-----------------------------------------------------------------
|
|
// implementation MultipartFormDataParser
|
|
//-----------------------------------------------------------------
|
|
|
|
|
|
@implementation MultipartFormDataParser
|
|
@synthesize delegate,formEncoding;
|
|
|
|
- (id) initWithBoundary:(NSString*) boundary formEncoding:(NSStringEncoding) _formEncoding {
|
|
if( nil == (self = [super init]) ){
|
|
return self;
|
|
}
|
|
if( nil == boundary ) {
|
|
HTTPLogWarn(@"MultipartFormDataParser: init with zero boundary");
|
|
return nil;
|
|
}
|
|
boundaryData = [[@"\r\n--" stringByAppendingString:boundary] dataUsingEncoding:NSASCIIStringEncoding];
|
|
|
|
pendingData = [[NSMutableData alloc] init];
|
|
currentEncoding = contentTransferEncoding_binary;
|
|
currentHeader = nil;
|
|
|
|
formEncoding = _formEncoding;
|
|
reachedEpilogue = NO;
|
|
processedPreamble = NO;
|
|
|
|
return self;
|
|
}
|
|
|
|
|
|
- (BOOL) appendData:(NSData *)data {
|
|
// Can't parse without boundary;
|
|
if( nil == boundaryData ) {
|
|
HTTPLogError(@"MultipartFormDataParser: Trying to parse multipart without specifying a valid boundary");
|
|
assert(false);
|
|
return NO;
|
|
}
|
|
NSData* workingData = data;
|
|
|
|
if( pendingData.length ) {
|
|
[pendingData appendData:data];
|
|
workingData = pendingData;
|
|
}
|
|
|
|
// the parser saves parse stat in the offset variable, which indicates offset of unhandled part in
|
|
// currently received chunk. Before returning, we always drop all data up to offset, leaving
|
|
// only unhandled for the next call
|
|
|
|
int offset = 0;
|
|
|
|
// don't parse data unless its size is greater then boundary length, so we couldn't
|
|
// misfind the boundary, if it got split into different data chunks
|
|
NSUInteger sizeToLeavePending = boundaryData.length;
|
|
|
|
if( !reachedEpilogue && workingData.length <= sizeToLeavePending ) {
|
|
// not enough data even to start parsing.
|
|
// save to pending data.
|
|
if( !pendingData.length ) {
|
|
[pendingData appendData:data];
|
|
}
|
|
if( checkForContentEnd ) {
|
|
if( pendingData.length >= 2 ) {
|
|
if( *(uint16_t*)(pendingData.bytes + offset) == 0x2D2D ) {
|
|
// we found the multipart end. all coming next is an epilogue.
|
|
HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
|
|
waitingForCRLF = YES;
|
|
reachedEpilogue = YES;
|
|
offset+= 2;
|
|
}
|
|
else {
|
|
checkForContentEnd = NO;
|
|
waitingForCRLF = YES;
|
|
return YES;
|
|
}
|
|
} else {
|
|
return YES;
|
|
}
|
|
|
|
}
|
|
else {
|
|
return YES;
|
|
}
|
|
}
|
|
while( true ) {
|
|
if( checkForContentEnd ) {
|
|
// the flag will be raised to check if the last part was the last one.
|
|
if( offset < workingData.length -1 ) {
|
|
char* bytes = (char*) workingData.bytes;
|
|
if( *(uint16_t*)(bytes + offset) == 0x2D2D ) {
|
|
// we found the multipart end. all coming next is an epilogue.
|
|
HTTPLogVerbose(@"MultipartFormDataParser: End of multipart message");
|
|
checkForContentEnd = NO;
|
|
reachedEpilogue = YES;
|
|
// still wait for CRLF, that comes after boundary, but before epilogue.
|
|
waitingForCRLF = YES;
|
|
offset += 2;
|
|
}
|
|
else {
|
|
// it's not content end, we have to wait till separator line end before next part comes
|
|
waitingForCRLF = YES;
|
|
checkForContentEnd = NO;
|
|
}
|
|
}
|
|
else {
|
|
// we haven't got enough data to check for content end.
|
|
// save current unhandled data (it may be 1 byte) to pending and recheck on next chunk received
|
|
if( offset < workingData.length ) {
|
|
[pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
|
|
}
|
|
else {
|
|
// there is no unhandled data now, wait for more chunks
|
|
[pendingData setData:[NSData data]];
|
|
}
|
|
return YES;
|
|
}
|
|
}
|
|
if( waitingForCRLF ) {
|
|
|
|
// the flag will be raised in the code below, meaning, we've read the boundary, but
|
|
// didnt find the end of boundary line yet.
|
|
|
|
offset = [self offsetTillNewlineSinceOffset:offset inData:workingData];
|
|
if( -1 == offset ) {
|
|
// didnt find the endl again.
|
|
if( offset ) {
|
|
// we still have to save the unhandled data (maybe it's 1 byte CR)
|
|
if( *((char*)workingData.bytes + workingData.length -1) == '\r' ) {
|
|
[pendingData setData:[NSData dataWithBytes:workingData.bytes + workingData.length-1 length:1]];
|
|
}
|
|
else {
|
|
// or save nothing if it wasnt
|
|
[pendingData setData:[NSData data]];
|
|
}
|
|
}
|
|
return YES;
|
|
}
|
|
waitingForCRLF = NO;
|
|
}
|
|
if( !processedPreamble ) {
|
|
// got to find the first boundary before the actual content begins.
|
|
offset = [self processPreamble:workingData];
|
|
// wait for more data for preamble
|
|
if( -1 == offset )
|
|
return YES;
|
|
// invoke continue to skip newline after boundary.
|
|
continue;
|
|
}
|
|
|
|
if( reachedEpilogue ) {
|
|
// parse all epilogue data to delegate.
|
|
if( [delegate respondsToSelector:@selector(processEpilogueData:)] ) {
|
|
NSData* epilogueData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length: workingData.length - offset freeWhenDone:NO];
|
|
[delegate processEpilogueData: epilogueData];
|
|
}
|
|
return YES;
|
|
}
|
|
|
|
if( nil == currentHeader ) {
|
|
// nil == currentHeader is a state flag, indicating we are waiting for header now.
|
|
// whenever part is over, currentHeader is set to nil.
|
|
|
|
// try to find CRLFCRLF bytes in the data, which indicates header end.
|
|
// we won't parse header parts, as they won't be too large.
|
|
int headerEnd = [self findHeaderEnd:workingData fromOffset:offset];
|
|
if( -1 == headerEnd ) {
|
|
// didn't recieve the full header yet.
|
|
if( !pendingData.length) {
|
|
// store the unprocessed data till next chunks come
|
|
[pendingData appendBytes:data.bytes + offset length:data.length - offset];
|
|
}
|
|
else {
|
|
if( offset ) {
|
|
// save the current parse state; drop all handled data and save unhandled only.
|
|
pendingData = [[NSMutableData alloc] initWithBytes: (char*) workingData.bytes + offset length:workingData.length - offset];
|
|
}
|
|
}
|
|
return YES;
|
|
}
|
|
else {
|
|
|
|
// let the header parser do it's job from now on.
|
|
NSData * headerData = [NSData dataWithBytesNoCopy: (char*) workingData.bytes + offset length:headerEnd + 2 - offset freeWhenDone:NO];
|
|
currentHeader = [[MultipartMessageHeader alloc] initWithData:headerData formEncoding:formEncoding];
|
|
|
|
if( nil == currentHeader ) {
|
|
// we've found the data is in wrong format.
|
|
HTTPLogError(@"MultipartFormDataParser: MultipartFormDataParser: wrong input format, coulnd't get a valid header");
|
|
return NO;
|
|
}
|
|
if( [delegate respondsToSelector:@selector(processStartOfPartWithHeader:)] ) {
|
|
[delegate processStartOfPartWithHeader:currentHeader];
|
|
}
|
|
|
|
HTTPLogVerbose(@"MultipartFormDataParser: MultipartFormDataParser: Retrieved part header.");
|
|
}
|
|
// skip the two trailing \r\n, in addition to the whole header.
|
|
offset = headerEnd + 4;
|
|
}
|
|
// after we've got the header, we try to
|
|
// find the boundary in the data.
|
|
int contentEnd = [self findContentEnd:workingData fromOffset:offset];
|
|
|
|
if( contentEnd == -1 ) {
|
|
|
|
// this case, we didn't find the boundary, so the data is related to the current part.
|
|
// we leave the sizeToLeavePending amount of bytes to make sure we don't include
|
|
// boundary part in processed data.
|
|
NSUInteger sizeToPass = workingData.length - offset - sizeToLeavePending;
|
|
|
|
// if we parse BASE64 encoded data, or Quoted-Printable data, we will make sure we don't break the format
|
|
int leaveTrailing = [self numberOfBytesToLeavePendingWithData:data length:sizeToPass encoding:currentEncoding];
|
|
sizeToPass -= leaveTrailing;
|
|
|
|
if( sizeToPass <= 0 ) {
|
|
// wait for more data!
|
|
if( offset ) {
|
|
[pendingData setData:[NSData dataWithBytes:(char*) workingData.bytes + offset length:workingData.length - offset]];
|
|
}
|
|
return YES;
|
|
}
|
|
// decode the chunk and let the delegate use it (store in a file, for example)
|
|
NSData* decodedData = [MultipartFormDataParser decodedDataFromData:[NSData dataWithBytesNoCopy:(char*)workingData.bytes + offset length:workingData.length - offset - sizeToLeavePending freeWhenDone:NO] encoding:currentEncoding];
|
|
|
|
if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
|
|
HTTPLogVerbose(@"MultipartFormDataParser: Processed %"FMTNSINT" bytes of body",sizeToPass);
|
|
|
|
[delegate processContent: decodedData WithHeader:currentHeader];
|
|
}
|
|
|
|
// store the unprocessed data till the next chunks come.
|
|
[pendingData setData:[NSData dataWithBytes:(char*)workingData.bytes + workingData.length - sizeToLeavePending length:sizeToLeavePending]];
|
|
return YES;
|
|
}
|
|
else {
|
|
|
|
// Here we found the boundary.
|
|
// let the delegate process it, and continue going to the next parts.
|
|
if( [delegate respondsToSelector:@selector(processContent:WithHeader:)] ) {
|
|
[delegate processContent:[NSData dataWithBytesNoCopy:(char*) workingData.bytes + offset length:contentEnd - offset freeWhenDone:NO] WithHeader:currentHeader];
|
|
}
|
|
|
|
if( [delegate respondsToSelector:@selector(processEndOfPartWithHeader:)] ){
|
|
[delegate processEndOfPartWithHeader:currentHeader];
|
|
HTTPLogVerbose(@"MultipartFormDataParser: End of body part");
|
|
}
|
|
currentHeader = nil;
|
|
|
|
// set up offset to continue with the remaining data (if any)
|
|
// cast to int because above comment suggests a small number
|
|
offset = contentEnd + (int)boundaryData.length;
|
|
checkForContentEnd = YES;
|
|
// setting the flag tells the parser to skip all the data till CRLF
|
|
}
|
|
}
|
|
return YES;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------
|
|
#pragma mark private methods
|
|
|
|
- (int) offsetTillNewlineSinceOffset:(int) offset inData:(NSData*) data {
|
|
char* bytes = (char*) data.bytes;
|
|
NSUInteger length = data.length;
|
|
if( offset >= length - 1 )
|
|
return -1;
|
|
|
|
while ( *(uint16_t*)(bytes + offset) != 0x0A0D ) {
|
|
// find the trailing \r\n after the boundary. The boundary line might have any number of whitespaces before CRLF, according to rfc2046
|
|
|
|
// in debug, we might also want to know, if the file is somehow misformatted.
|
|
#ifdef DEBUG
|
|
if( !isspace(*(bytes+offset)) ) {
|
|
HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset) );
|
|
}
|
|
if( !isspace(*(bytes+offset+1)) ) {
|
|
HTTPLogWarn(@"MultipartFormDataParser: Warning, non-whitespace character '%c' between boundary bytes and CRLF in boundary line",*(bytes+offset+1) );
|
|
}
|
|
#endif
|
|
offset++;
|
|
if( offset >= length ) {
|
|
// no endl found within current data
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
offset += 2;
|
|
return offset;
|
|
}
|
|
|
|
|
|
- (int) processPreamble:(NSData*) data {
|
|
int offset = 0;
|
|
|
|
char* boundaryBytes = (char*) boundaryData.bytes + 2; // the first boundary won't have CRLF preceding.
|
|
char* dataBytes = (char*) data.bytes;
|
|
NSUInteger boundaryLength = boundaryData.length - 2;
|
|
NSUInteger dataLength = data.length;
|
|
|
|
// find the boundary without leading CRLF.
|
|
while( offset < dataLength - boundaryLength +1 ) {
|
|
int i;
|
|
for( i = 0;i < boundaryLength; i++ ) {
|
|
if( boundaryBytes[i] != dataBytes[offset + i] )
|
|
break;
|
|
}
|
|
if( i == boundaryLength ) {
|
|
break;
|
|
}
|
|
offset++;
|
|
}
|
|
|
|
if( offset == dataLength ) {
|
|
// the end of preamble wasn't found in this chunk
|
|
NSUInteger sizeToProcess = dataLength - boundaryLength;
|
|
if( sizeToProcess > 0) {
|
|
if( [delegate respondsToSelector:@selector(processPreambleData:)] ) {
|
|
NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: data.length - offset - boundaryLength freeWhenDone:NO];
|
|
[delegate processPreambleData:preambleData];
|
|
HTTPLogVerbose(@"MultipartFormDataParser: processed preamble");
|
|
}
|
|
pendingData = [NSMutableData dataWithBytes: data.bytes + data.length - boundaryLength length:boundaryLength];
|
|
}
|
|
return -1;
|
|
}
|
|
else {
|
|
if ( offset && [delegate respondsToSelector:@selector(processPreambleData:)] ) {
|
|
NSData* preambleData = [NSData dataWithBytesNoCopy: (char*) data.bytes length: offset freeWhenDone:NO];
|
|
[delegate processPreambleData:preambleData];
|
|
}
|
|
offset +=boundaryLength;
|
|
// tells to skip CRLF after the boundary.
|
|
processedPreamble = YES;
|
|
waitingForCRLF = YES;
|
|
}
|
|
return offset;
|
|
}
|
|
|
|
|
|
|
|
- (int) findHeaderEnd:(NSData*) workingData fromOffset:(int)offset {
|
|
char* bytes = (char*) workingData.bytes;
|
|
NSUInteger inputLength = workingData.length;
|
|
uint16_t separatorBytes = 0x0A0D;
|
|
|
|
while( true ) {
|
|
if(inputLength < offset + 3 ) {
|
|
// wait for more data
|
|
return -1;
|
|
}
|
|
if( (*((uint16_t*) (bytes+offset)) == separatorBytes) && (*((uint16_t*) (bytes+offset)+1) == separatorBytes) ) {
|
|
return offset;
|
|
}
|
|
offset++;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
- (int) findContentEnd:(NSData*) data fromOffset:(int) offset {
|
|
char* boundaryBytes = (char*) boundaryData.bytes;
|
|
char* dataBytes = (char*) data.bytes;
|
|
NSUInteger boundaryLength = boundaryData.length;
|
|
NSUInteger dataLength = data.length;
|
|
|
|
while( offset < dataLength - boundaryLength +1 ) {
|
|
int i;
|
|
for( i = 0;i < boundaryLength; i++ ) {
|
|
if( boundaryBytes[i] != dataBytes[offset + i] )
|
|
break;
|
|
}
|
|
if( i == boundaryLength ) {
|
|
return offset;
|
|
}
|
|
offset++;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
- (int) numberOfBytesToLeavePendingWithData:(NSData*) data length:(int) length encoding:(int) encoding {
|
|
// If we have BASE64 or Quoted-Printable encoded data, we have to be sure
|
|
// we don't break the format.
|
|
int sizeToLeavePending = 0;
|
|
|
|
if( encoding == contentTransferEncoding_base64 ) {
|
|
char* bytes = (char*) data.bytes;
|
|
int i;
|
|
for( i = length - 1; i > 0; i++ ) {
|
|
if( * (uint16_t*) (bytes + i) == 0x0A0D ) {
|
|
break;
|
|
}
|
|
}
|
|
// now we've got to be sure that the length of passed data since last line
|
|
// is multiplier of 4.
|
|
sizeToLeavePending = (length - i) & ~0x11; // size to leave pending = length-i - (length-i) %4;
|
|
return sizeToLeavePending;
|
|
}
|
|
|
|
if( encoding == contentTransferEncoding_quotedPrintable ) {
|
|
// we don't pass more less then 3 bytes anyway.
|
|
if( length <= 2 )
|
|
return length;
|
|
// check the last bytes to be start of encoded symbol.
|
|
const char* bytes = data.bytes + length - 2;
|
|
if( bytes[0] == '=' )
|
|
return 2;
|
|
if( bytes[1] == '=' )
|
|
return 1;
|
|
return 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------
|
|
#pragma mark decoding
|
|
|
|
|
|
+ (NSData*) decodedDataFromData:(NSData*) data encoding:(int) encoding {
|
|
switch (encoding) {
|
|
case contentTransferEncoding_base64: {
|
|
return [data base64Decoded];
|
|
} break;
|
|
|
|
case contentTransferEncoding_quotedPrintable: {
|
|
return [self decodedDataFromQuotedPrintableData:data];
|
|
} break;
|
|
|
|
default: {
|
|
return data;
|
|
} break;
|
|
}
|
|
}
|
|
|
|
|
|
+ (NSData*) decodedDataFromQuotedPrintableData:(NSData *)data {
|
|
// http://tools.ietf.org/html/rfc2045#section-6.7
|
|
|
|
const char hex [] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', };
|
|
|
|
NSMutableData* result = [[NSMutableData alloc] initWithLength:data.length];
|
|
const char* bytes = (const char*) data.bytes;
|
|
int count = 0;
|
|
NSUInteger length = data.length;
|
|
while( count < length ) {
|
|
if( bytes[count] == '=' ) {
|
|
[result appendBytes:bytes length:count];
|
|
bytes = bytes + count + 1;
|
|
length -= count + 1;
|
|
count = 0;
|
|
|
|
if( length < 3 ) {
|
|
HTTPLogWarn(@"MultipartFormDataParser: warning, trailing '=' in quoted printable data");
|
|
}
|
|
// soft newline
|
|
if( bytes[0] == '\r' ) {
|
|
bytes += 1;
|
|
if(bytes[1] == '\n' ) {
|
|
bytes += 2;
|
|
}
|
|
continue;
|
|
}
|
|
char encodedByte = 0;
|
|
|
|
for( int i = 0; i < sizeof(hex); i++ ) {
|
|
if( hex[i] == bytes[0] ) {
|
|
encodedByte += i << 4;
|
|
}
|
|
if( hex[i] == bytes[1] ) {
|
|
encodedByte += i;
|
|
}
|
|
}
|
|
[result appendBytes:&encodedByte length:1];
|
|
bytes += 2;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if( (unsigned char) bytes[count] > 126 ) {
|
|
HTTPLogWarn(@"MultipartFormDataParser: Warning, character with code above 126 appears in quoted printable encoded data");
|
|
}
|
|
#endif
|
|
|
|
count++;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
@end
|