[322] | 1 | //
|
---|
| 2 | // ICeCoffEEParser.m
|
---|
| 3 | // ICeCoffEE
|
---|
| 4 | //
|
---|
| 5 | // Created by Nicholas Riley on 6/21/07.
|
---|
| 6 | // Copyright 2007 Nicholas Riley. All rights reserved.
|
---|
| 7 | //
|
---|
| 8 |
|
---|
| 9 | #import "ICeCoffEEParser.h"
|
---|
| 10 | #import "ICeCoffEE.h"
|
---|
| 11 |
|
---|
| 12 | void ICCF_Delimiters(NSCharacterSet **leftPtr, NSCharacterSet **rightPtr) {
|
---|
| 13 | static NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
|
---|
| 14 |
|
---|
| 15 | if (urlLeftDelimiters == nil || urlRightDelimiters == nil) {
|
---|
| 16 | NSMutableCharacterSet *set = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
|
---|
| 17 | NSMutableCharacterSet *tmpSet;
|
---|
| 18 | [urlLeftDelimiters release];
|
---|
| 19 | [urlRightDelimiters release];
|
---|
| 20 |
|
---|
| 21 | [set autorelease];
|
---|
| 22 | [set formUnionWithCharacterSet: [[NSCharacterSet characterSetWithRange: NSMakeRange(0x21, 0x5e)] invertedSet]]; // nonprintable and non-ASCII characters
|
---|
| 23 | [set formUnionWithCharacterSet: [NSCharacterSet punctuationCharacterSet]];
|
---|
| 24 | // XXX obsoleted by RFC 3986 now... use §2.1, 2.2, 2.3
|
---|
[383] | 25 | [set removeCharactersInString: @";/?:@&=+$,-_.!~*'(){}[]%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set and {}, []
|
---|
[322] | 26 |
|
---|
| 27 | tmpSet = [[set mutableCopy] autorelease];
|
---|
[384] | 28 | [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"<(["]];
|
---|
[322] | 29 | urlLeftDelimiters = [tmpSet copy]; // make immutable again - for efficiency
|
---|
| 30 |
|
---|
| 31 | tmpSet = [[set mutableCopy] autorelease];
|
---|
[384] | 32 | [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @">)]"]];
|
---|
[322] | 33 | urlRightDelimiters = [tmpSet copy]; // make immutable again - for efficiency
|
---|
| 34 | }
|
---|
| 35 |
|
---|
| 36 | *leftPtr = urlLeftDelimiters; *rightPtr = urlRightDelimiters;
|
---|
| 37 | }
|
---|
| 38 |
|
---|
| 39 | static ICInstance ICCF_icInst = NULL;
|
---|
| 40 |
|
---|
| 41 | void ICCF_StartIC() {
|
---|
| 42 | OSStatus err;
|
---|
| 43 |
|
---|
| 44 | if (ICCF_icInst != NULL) {
|
---|
| 45 | ICLog(@"ICCF_StartIC: Internet Config is already running!");
|
---|
| 46 | ICCF_StopIC();
|
---|
| 47 | }
|
---|
| 48 | err = ICStart(&ICCF_icInst, kICCFCreator);
|
---|
| 49 | NSCAssert1(err == noErr, ICCF_LocalizedString(@"Unable to start Internet Config (error %d)"), err);
|
---|
| 50 | }
|
---|
| 51 |
|
---|
| 52 | void ICCF_StopIC() {
|
---|
| 53 | if (ICCF_icInst == NULL) {
|
---|
| 54 | ICLog(@"ICCF_StopIC: Internet Config is not running!");
|
---|
| 55 | } else {
|
---|
| 56 | ICStop(ICCF_icInst);
|
---|
| 57 | ICCF_icInst = NULL;
|
---|
| 58 | }
|
---|
| 59 | }
|
---|
| 60 |
|
---|
| 61 | ICInstance ICCF_GetInst() {
|
---|
| 62 | NSCAssert(ICCF_icInst != NULL, @"Internal error: Called ICCF_GetInst without ICCF_StartIC");
|
---|
| 63 | return ICCF_icInst;
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 | // input/output 'range' is the range of source document which contains 'string'
|
---|
| 67 | void ICCF_ParseURL(NSString *string, NSRange *range) {
|
---|
| 68 | OSStatus err;
|
---|
| 69 | Handle h;
|
---|
| 70 | long selStart = 0, selEnd = range->length; // local offsets within 'string'
|
---|
| 71 | char *urlData = NULL;
|
---|
| 72 |
|
---|
| 73 | NSCAssert(selEnd == [string length], @"Internal error: URL string is wrong length");
|
---|
| 74 |
|
---|
[436] | 75 | @try {
|
---|
[473] | 76 | if ([[NSCharacterSet characterSetWithCharactersInString: @";,.!"] characterIsMember:
|
---|
[322] | 77 | [string characterAtIndex: selEnd - 1]]) {
|
---|
| 78 | selEnd--;
|
---|
| 79 | }
|
---|
| 80 | NSCharacterSet *alphanumericCharacterSet = [NSCharacterSet alphanumericCharacterSet];
|
---|
[473] | 81 | unichar opening, closing;
|
---|
| 82 | while (![alphanumericCharacterSet characterIsMember:
|
---|
| 83 | (opening = [string characterAtIndex: selStart])]) {
|
---|
| 84 | closing = [string characterAtIndex: selEnd - 1];
|
---|
| 85 | if ((opening == '(' && closing == ')') ||
|
---|
| 86 | (opening == '{' && closing == '}') ||
|
---|
| 87 | (opening == '[' && closing == ']')) {
|
---|
| 88 | selEnd--;
|
---|
| 89 | }
|
---|
[322] | 90 | selStart++;
|
---|
| 91 | NSCAssert(selStart < selEnd, @"No URL is selected");
|
---|
| 92 | }
|
---|
| 93 |
|
---|
| 94 | string = [string substringWithRange: NSMakeRange(selStart, selEnd - selStart)];
|
---|
| 95 |
|
---|
| 96 | ICLog(@"Parsing URL |%@|", string);
|
---|
| 97 |
|
---|
| 98 | NSCAssert([string canBeConvertedToEncoding: NSASCIIStringEncoding], @"No URL is selected");
|
---|
| 99 |
|
---|
| 100 | urlData = (char *)malloc( (range->length + 1) * sizeof(char));
|
---|
| 101 | NSCAssert(urlData != NULL, @"Internal error: can't allocate memory for URL string");
|
---|
| 102 |
|
---|
| 103 | // XXX getCString: is deprecated in 10.4, but this is safe and shouldn't assert because we've already verified the string can be converted to ASCII, which should be a subset of any possible system encoding. The replacement (getCString:maxLength:encoding:) is not available until 10.4, so we leave this until we dump Internet Config and gain IDN friendliness.
|
---|
| 104 | [string getCString: urlData];
|
---|
| 105 |
|
---|
| 106 | h = NewHandle(0);
|
---|
| 107 | NSCAssert(h != NULL, @"Internal error: can't allocate URL handle");
|
---|
| 108 |
|
---|
| 109 | err = ICParseURL(ICCF_GetInst(), "\pmailto", urlData, range->length, &selStart, &selEnd, h);
|
---|
| 110 | DisposeHandle(h);
|
---|
| 111 |
|
---|
| 112 | ICCF_OSErrCAssert(err, @"ICParseURL");
|
---|
| 113 |
|
---|
| 114 | range->length = selEnd - selStart;
|
---|
| 115 | range->location += selStart;
|
---|
[436] | 116 | } @finally {
|
---|
[322] | 117 | free(urlData);
|
---|
[436] | 118 | }
|
---|
[322] | 119 | }
|
---|
| 120 |
|
---|
[383] | 121 | static BOOL ICCF_StringIncludesCharacter(NSString *s, unichar character, NSRange range) {
|
---|
[388] | 122 | NSRange result = [s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString:
|
---|
[383] | 123 | [NSString stringWithCharacters: &character length: 1]]
|
---|
[388] | 124 | options: NSLiteralSearch range: range];
|
---|
| 125 | return (result.location != NSNotFound);
|
---|
[383] | 126 | }
|
---|
| 127 |
|
---|
| 128 | static BOOL ICCF_IsLikelyURI(NSString *s, NSRange range) {
|
---|
| 129 | return ([s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @":/.@"]
|
---|
| 130 | options: NSLiteralSearch range: range].location != NSNotFound);
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 | static BOOL ICCF_IsLikelyIPv6Address(NSString *s, NSRange range) {
|
---|
| 134 | return ([s rangeOfCharacterFromSet:
|
---|
| 135 | [[NSCharacterSet characterSetWithCharactersInString: @"ABCDEFabcdef0123456789:"] invertedSet]
|
---|
| 136 | options: NSLiteralSearch range: range].location == NSNotFound);
|
---|
| 137 | }
|
---|
| 138 |
|
---|
[322] | 139 | NSRange ICCF_URLEnclosingRange(NSString *s, NSRange range) {
|
---|
| 140 | NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
|
---|
| 141 | NSRange delimiterRange;
|
---|
| 142 | unsigned extraLen;
|
---|
[388] | 143 | BOOL multiLine = NO;
|
---|
[322] | 144 |
|
---|
| 145 | ICCF_CheckRange(range);
|
---|
| 146 |
|
---|
| 147 | ICCF_Delimiters(&urlLeftDelimiters, &urlRightDelimiters);
|
---|
| 148 |
|
---|
[383] | 149 | // right delimiter selected? Yes, this can break with ...)URL(.... Oh well.
|
---|
| 150 | if (range.location > 0 && [urlRightDelimiters characterIsMember: [s characterAtIndex: range.location]]) {
|
---|
| 151 | --range.location;
|
---|
| 152 | ++range.length;
|
---|
| 153 | ICLog(@"expanding past initial %c, now |%@|", [s characterAtIndex: range.location + 1],
|
---|
| 154 | [s substringWithRange: range]);
|
---|
| 155 | }
|
---|
| 156 |
|
---|
[322] | 157 | expandFront:
|
---|
[375] | 158 | // XXX instead of 0, make this stop at the max URL length to prevent protracted searches
|
---|
[383] | 159 | // XXX backport to ICeCoffEETerminal
|
---|
[375] | 160 | // add 1 to range to trap delimiters that are on the edge of the selection (i.e., <...)
|
---|
| 161 | delimiterRange = [s rangeOfCharacterFromSet: urlLeftDelimiters
|
---|
| 162 | options: NSLiteralSearch | NSBackwardsSearch
|
---|
| 163 | range: NSMakeRange(0, range.location + (range.location != [s length]))];
|
---|
[322] | 164 | if (delimiterRange.location == NSNotFound) {
|
---|
| 165 | // extend to beginning of string
|
---|
| 166 | range.length += range.location;
|
---|
| 167 | range.location = 0;
|
---|
| 168 | } else {
|
---|
| 169 | NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
|
---|
[388] | 170 | if ([s characterAtIndex: delimiterRange.location] == '<') { // XXX move to expandBoth to handle clicking in middle
|
---|
| 171 | multiLine = YES;
|
---|
| 172 | urlRightDelimiters = [NSCharacterSet characterSetWithCharactersInString: @">"];
|
---|
| 173 | }
|
---|
[322] | 174 | range.length += range.location - delimiterRange.location - 1;
|
---|
| 175 | range.location = delimiterRange.location + 1;
|
---|
| 176 | }
|
---|
| 177 |
|
---|
[388] | 178 |
|
---|
[322] | 179 | expandBack:
|
---|
[375] | 180 | // XXX instead of length of string, make this stop at the max URL length to prevent protracted searches
|
---|
| 181 | // add 1 to range to trap delimiters that are on the edge of the selection (i.e., ...>)
|
---|
| 182 | extraLen = [s length] - range.location - range.length;
|
---|
[322] | 183 | delimiterRange = [s rangeOfCharacterFromSet: urlRightDelimiters
|
---|
| 184 | options: NSLiteralSearch
|
---|
| 185 | range: NSMakeRange(range.location + range.length - (range.length != 0),
|
---|
| 186 | extraLen + (range.length != 0))];
|
---|
| 187 | if (delimiterRange.location == NSNotFound) {
|
---|
| 188 | // extend to end of string
|
---|
| 189 | range.length += extraLen;
|
---|
[383] | 190 | extraLen = 0;
|
---|
[322] | 191 | } else {
|
---|
| 192 | NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
|
---|
| 193 | range.length += delimiterRange.location - range.location - range.length;
|
---|
[383] | 194 | extraLen = [s length] - NSMaxRange(range);
|
---|
| 195 |
|
---|
[388] | 196 | unichar opening, closing = [s characterAtIndex: delimiterRange.location];
|
---|
| 197 | if (closing == '>' && !multiLine && ICCF_StringIncludesCharacter(s, '<', NSMakeRange(0, range.location))) {
|
---|
| 198 | urlLeftDelimiters = [NSCharacterSet characterSetWithCharactersInString: @"<"];
|
---|
| 199 | goto expandFront; // XXX move to expandBoth to handle clicking in middle
|
---|
| 200 | }
|
---|
[383] | 201 | // grow URL past closing paren/brace/bracket if we've seen an open paren/brace/bracket
|
---|
| 202 | if (closing == ')') opening = '(';
|
---|
| 203 | else if (closing == '}') opening = '{';
|
---|
| 204 | else if (closing == ']') opening = '[';
|
---|
| 205 | else goto expandBoth;
|
---|
| 206 | if (!ICCF_StringIncludesCharacter(s, opening, range))
|
---|
| 207 | goto expandBoth;
|
---|
[322] | 208 |
|
---|
[383] | 209 | if (extraLen == 1) {
|
---|
[375] | 210 | range.length += 1;
|
---|
[383] | 211 | --extraLen;
|
---|
[375] | 212 | ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]);
|
---|
| 213 | } else {
|
---|
[322] | 214 | range.length += 2;
|
---|
[375] | 215 | ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]);
|
---|
[322] | 216 | goto expandBack;
|
---|
| 217 | }
|
---|
| 218 | }
|
---|
| 219 |
|
---|
[383] | 220 | expandBoth:
|
---|
| 221 | if (range.location <= 1)
|
---|
| 222 | goto checkRange; // nowhere to expand
|
---|
| 223 | unichar opening = [s characterAtIndex: range.location - 1], closing;
|
---|
| 224 | if (opening == '(') closing = ')';
|
---|
| 225 | else if (opening == '{') closing = '}';
|
---|
| 226 | else if (opening == '[') closing = ']';
|
---|
| 227 | else goto checkRange;
|
---|
| 228 |
|
---|
| 229 | ICLog(@"extraLen = %d", extraLen);
|
---|
| 230 | // check if we're inside a partial delimited URL: not foolproof, but handles (foo), {UUID} and [IPv6]
|
---|
| 231 | if (delimiterRange.location != NSNotFound && [s characterAtIndex: delimiterRange.location] == closing &&
|
---|
| 232 | ((opening == '[' && ICCF_IsLikelyIPv6Address(s, range)) || !ICCF_IsLikelyURI(s, range))) {
|
---|
| 233 | ICLog(@"expanding past %c...%c, was |%@|", opening, closing, [s substringWithRange: range]);
|
---|
| 234 | range.location -= 2;
|
---|
| 235 | if (extraLen > 1)
|
---|
| 236 | range.length += 4;
|
---|
| 237 | else
|
---|
| 238 | range.length += 2 + extraLen;
|
---|
| 239 | ICLog(@"expanding past %c...%c, now |%@|", opening, closing, [s substringWithRange: range]);
|
---|
| 240 | goto expandFront;
|
---|
| 241 | }
|
---|
| 242 |
|
---|
| 243 | if (ICCF_StringIncludesCharacter(s, closing, range) &&
|
---|
| 244 | ((opening == '[' &&
|
---|
[474] | 245 | ICCF_IsLikelyIPv6Address(s,
|
---|
| 246 | NSMakeRange(range.location,
|
---|
| 247 | [s rangeOfString: @"]" options: NSLiteralSearch range: range].location -
|
---|
| 248 | range.location)))
|
---|
[383] | 249 | || !ICCF_IsLikelyURI(s, range))) {
|
---|
| 250 | range.location -= 2;
|
---|
| 251 | range.length += 2;
|
---|
| 252 | ICLog(@"expanding past %c, now |%@|", opening, [s substringWithRange: range]);
|
---|
| 253 | goto expandFront;
|
---|
| 254 | }
|
---|
| 255 |
|
---|
[375] | 256 | checkRange:
|
---|
[322] | 257 | ICCF_CheckRange(range);
|
---|
| 258 |
|
---|
| 259 | ICCF_ParseURL([s substringWithRange: range], &range);
|
---|
| 260 |
|
---|
| 261 | return range;
|
---|
| 262 | } |
---|