// // ICeCoffEEParser.m // ICeCoffEE // // Created by Nicholas Riley on 6/21/07. // Copyright 2007 Nicholas Riley. All rights reserved. // #import "ICeCoffEEParser.h" #import "ICeCoffEE.h" void ICCF_Delimiters(NSCharacterSet **leftPtr, NSCharacterSet **rightPtr) { static NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil; if (urlLeftDelimiters == nil || urlRightDelimiters == nil) { NSMutableCharacterSet *set = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy]; NSMutableCharacterSet *tmpSet; [urlLeftDelimiters release]; [urlRightDelimiters release]; [set autorelease]; [set formUnionWithCharacterSet: [[NSCharacterSet characterSetWithRange: NSMakeRange(0x21, 0x5e)] invertedSet]]; // nonprintable and non-ASCII characters [set formUnionWithCharacterSet: [NSCharacterSet punctuationCharacterSet]]; // XXX obsoleted by RFC 3986 now... use §2.1, 2.2, 2.3 [set removeCharactersInString: @";/?:@&=+$,-_.!~*'(){}[]%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set and {}, [] tmpSet = [[set mutableCopy] autorelease]; [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"<(["]]; urlLeftDelimiters = [tmpSet copy]; // make immutable again - for efficiency tmpSet = [[set mutableCopy] autorelease]; [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @">)]"]]; urlRightDelimiters = [tmpSet copy]; // make immutable again - for efficiency } *leftPtr = urlLeftDelimiters; *rightPtr = urlRightDelimiters; } static ICInstance ICCF_icInst = NULL; void ICCF_StartIC() { OSStatus err; if (ICCF_icInst != NULL) { ICLog(@"ICCF_StartIC: Internet Config is already running!"); ICCF_StopIC(); } err = ICStart(&ICCF_icInst, kICCFCreator); NSCAssert1(err == noErr, ICCF_LocalizedString(@"Unable to start Internet Config (error %d)"), err); } void ICCF_StopIC() { if (ICCF_icInst == NULL) { ICLog(@"ICCF_StopIC: Internet Config is not running!"); } else { ICStop(ICCF_icInst); ICCF_icInst = NULL; } } ICInstance ICCF_GetInst() { NSCAssert(ICCF_icInst != NULL, @"Internal error: Called ICCF_GetInst without ICCF_StartIC"); return ICCF_icInst; } // input/output 'range' is the range of source document which contains 'string' void ICCF_ParseURL(NSString *string, NSRange *range) { OSStatus err; Handle h; long selStart = 0, selEnd = range->length; // local offsets within 'string' char *urlData = NULL; NSCAssert(selEnd == [string length], @"Internal error: URL string is wrong length"); @try { if ([[NSCharacterSet characterSetWithCharactersInString: @";,.!"] characterIsMember: [string characterAtIndex: selEnd - 1]]) { selEnd--; } NSCharacterSet *alphanumericCharacterSet = [NSCharacterSet alphanumericCharacterSet]; unichar opening, closing; while (![alphanumericCharacterSet characterIsMember: (opening = [string characterAtIndex: selStart])]) { closing = [string characterAtIndex: selEnd - 1]; if ((opening == '(' && closing == ')') || (opening == '{' && closing == '}') || (opening == '[' && closing == ']')) { selEnd--; } selStart++; NSCAssert(selStart < selEnd, @"No URL is selected"); } string = [string substringWithRange: NSMakeRange(selStart, selEnd - selStart)]; ICLog(@"Parsing URL |%@|", string); NSCAssert([string canBeConvertedToEncoding: NSASCIIStringEncoding], @"No URL is selected"); urlData = (char *)malloc( (range->length + 1) * sizeof(char)); NSCAssert(urlData != NULL, @"Internal error: can't allocate memory for URL string"); // XXX getCString: is deprecated in 10.4, but this is safe and shouldn't assert because we've already verified the string can be converted to ASCII, which should be a subset of any possible system encoding. The replacement (getCString:maxLength:encoding:) is not available until 10.4, so we leave this until we dump Internet Config and gain IDN friendliness. [string getCString: urlData]; h = NewHandle(0); NSCAssert(h != NULL, @"Internal error: can't allocate URL handle"); err = ICParseURL(ICCF_GetInst(), "\pmailto", urlData, range->length, &selStart, &selEnd, h); DisposeHandle(h); ICCF_OSErrCAssert(err, @"ICParseURL"); range->length = selEnd - selStart; range->location += selStart; } @finally { free(urlData); } } static BOOL ICCF_StringIncludesCharacter(NSString *s, unichar character, NSRange range) { NSRange result = [s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: [NSString stringWithCharacters: &character length: 1]] options: NSLiteralSearch range: range]; return (result.location != NSNotFound); } static BOOL ICCF_IsLikelyURI(NSString *s, NSRange range) { return ([s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @":/.@"] options: NSLiteralSearch range: range].location != NSNotFound); } static BOOL ICCF_IsLikelyIPv6Address(NSString *s, NSRange range) { return ([s rangeOfCharacterFromSet: [[NSCharacterSet characterSetWithCharactersInString: @"ABCDEFabcdef0123456789:"] invertedSet] options: NSLiteralSearch range: range].location == NSNotFound); } NSRange ICCF_URLEnclosingRange(NSString *s, NSRange range) { NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil; NSRange delimiterRange; unsigned extraLen; BOOL multiLine = NO; ICCF_CheckRange(range); ICCF_Delimiters(&urlLeftDelimiters, &urlRightDelimiters); // right delimiter selected? Yes, this can break with ...)URL(.... Oh well. if (range.location > 0 && [urlRightDelimiters characterIsMember: [s characterAtIndex: range.location]]) { --range.location; ++range.length; ICLog(@"expanding past initial %c, now |%@|", [s characterAtIndex: range.location + 1], [s substringWithRange: range]); } expandFront: // XXX instead of 0, make this stop at the max URL length to prevent protracted searches // XXX backport to ICeCoffEETerminal // add 1 to range to trap delimiters that are on the edge of the selection (i.e., <...) delimiterRange = [s rangeOfCharacterFromSet: urlLeftDelimiters options: NSLiteralSearch | NSBackwardsSearch range: NSMakeRange(0, range.location + (range.location != [s length]))]; if (delimiterRange.location == NSNotFound) { // extend to beginning of string range.length += range.location; range.location = 0; } else { NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1"); if ([s characterAtIndex: delimiterRange.location] == '<') { // XXX move to expandBoth to handle clicking in middle multiLine = YES; urlRightDelimiters = [NSCharacterSet characterSetWithCharactersInString: @">"]; } range.length += range.location - delimiterRange.location - 1; range.location = delimiterRange.location + 1; } expandBack: // XXX instead of length of string, make this stop at the max URL length to prevent protracted searches // add 1 to range to trap delimiters that are on the edge of the selection (i.e., ...>) extraLen = [s length] - range.location - range.length; delimiterRange = [s rangeOfCharacterFromSet: urlRightDelimiters options: NSLiteralSearch range: NSMakeRange(range.location + range.length - (range.length != 0), extraLen + (range.length != 0))]; if (delimiterRange.location == NSNotFound) { // extend to end of string range.length += extraLen; extraLen = 0; } else { NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1"); range.length += delimiterRange.location - range.location - range.length; extraLen = [s length] - NSMaxRange(range); unichar opening, closing = [s characterAtIndex: delimiterRange.location]; if (closing == '>' && !multiLine && ICCF_StringIncludesCharacter(s, '<', NSMakeRange(0, range.location))) { urlLeftDelimiters = [NSCharacterSet characterSetWithCharactersInString: @"<"]; goto expandFront; // XXX move to expandBoth to handle clicking in middle } // grow URL past closing paren/brace/bracket if we've seen an open paren/brace/bracket if (closing == ')') opening = '('; else if (closing == '}') opening = '{'; else if (closing == ']') opening = '['; else goto expandBoth; if (!ICCF_StringIncludesCharacter(s, opening, range)) goto expandBoth; if (extraLen == 1) { range.length += 1; --extraLen; ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]); } else { range.length += 2; ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]); goto expandBack; } } expandBoth: if (range.location <= 1) goto checkRange; // nowhere to expand unichar opening = [s characterAtIndex: range.location - 1], closing; if (opening == '(') closing = ')'; else if (opening == '{') closing = '}'; else if (opening == '[') closing = ']'; else goto checkRange; ICLog(@"extraLen = %d", extraLen); // check if we're inside a partial delimited URL: not foolproof, but handles (foo), {UUID} and [IPv6] if (delimiterRange.location != NSNotFound && [s characterAtIndex: delimiterRange.location] == closing && ((opening == '[' && ICCF_IsLikelyIPv6Address(s, range)) || !ICCF_IsLikelyURI(s, range))) { ICLog(@"expanding past %c...%c, was |%@|", opening, closing, [s substringWithRange: range]); range.location -= 2; if (extraLen > 1) range.length += 4; else range.length += 2 + extraLen; ICLog(@"expanding past %c...%c, now |%@|", opening, closing, [s substringWithRange: range]); goto expandFront; } if (ICCF_StringIncludesCharacter(s, closing, range) && ((opening == '[' && ICCF_IsLikelyIPv6Address(s, NSMakeRange(range.location, [s rangeOfString: @"]"].location - range.location))) || !ICCF_IsLikelyURI(s, range))) { range.location -= 2; range.length += 2; ICLog(@"expanding past %c, now |%@|", opening, [s substringWithRange: range]); goto expandFront; } checkRange: ICCF_CheckRange(range); ICCF_ParseURL([s substringWithRange: range], &range); return range; }