source: trunk/ICeCoffEE/ICeCoffEE/ICeCoffEEParser.m@ 474

Last change on this file since 474 was 474, checked in by Nicholas Riley, 16 years ago

Only scan for closing bracket after opening bracket in IPv6 addresses (duh).

File size: 10.3 KB
RevLine 
[322]1//
2// ICeCoffEEParser.m
3// ICeCoffEE
4//
5// Created by Nicholas Riley on 6/21/07.
6// Copyright 2007 Nicholas Riley. All rights reserved.
7//
8
9#import "ICeCoffEEParser.h"
10#import "ICeCoffEE.h"
11
12void ICCF_Delimiters(NSCharacterSet **leftPtr, NSCharacterSet **rightPtr) {
13 static NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
14
15 if (urlLeftDelimiters == nil || urlRightDelimiters == nil) {
16 NSMutableCharacterSet *set = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
17 NSMutableCharacterSet *tmpSet;
18 [urlLeftDelimiters release];
19 [urlRightDelimiters release];
20
21 [set autorelease];
22 [set formUnionWithCharacterSet: [[NSCharacterSet characterSetWithRange: NSMakeRange(0x21, 0x5e)] invertedSet]]; // nonprintable and non-ASCII characters
23 [set formUnionWithCharacterSet: [NSCharacterSet punctuationCharacterSet]];
24 // XXX obsoleted by RFC 3986 now... use §2.1, 2.2, 2.3
[383]25 [set removeCharactersInString: @";/?:@&=+$,-_.!~*'(){}[]%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set and {}, []
[322]26
27 tmpSet = [[set mutableCopy] autorelease];
[384]28 [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"<(["]];
[322]29 urlLeftDelimiters = [tmpSet copy]; // make immutable again - for efficiency
30
31 tmpSet = [[set mutableCopy] autorelease];
[384]32 [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @">)]"]];
[322]33 urlRightDelimiters = [tmpSet copy]; // make immutable again - for efficiency
34 }
35
36 *leftPtr = urlLeftDelimiters; *rightPtr = urlRightDelimiters;
37}
38
39static ICInstance ICCF_icInst = NULL;
40
41void ICCF_StartIC() {
42 OSStatus err;
43
44 if (ICCF_icInst != NULL) {
45 ICLog(@"ICCF_StartIC: Internet Config is already running!");
46 ICCF_StopIC();
47 }
48 err = ICStart(&ICCF_icInst, kICCFCreator);
49 NSCAssert1(err == noErr, ICCF_LocalizedString(@"Unable to start Internet Config (error %d)"), err);
50}
51
52void ICCF_StopIC() {
53 if (ICCF_icInst == NULL) {
54 ICLog(@"ICCF_StopIC: Internet Config is not running!");
55 } else {
56 ICStop(ICCF_icInst);
57 ICCF_icInst = NULL;
58 }
59}
60
61ICInstance ICCF_GetInst() {
62 NSCAssert(ICCF_icInst != NULL, @"Internal error: Called ICCF_GetInst without ICCF_StartIC");
63 return ICCF_icInst;
64}
65
66// input/output 'range' is the range of source document which contains 'string'
67void ICCF_ParseURL(NSString *string, NSRange *range) {
68 OSStatus err;
69 Handle h;
70 long selStart = 0, selEnd = range->length; // local offsets within 'string'
71 char *urlData = NULL;
72
73 NSCAssert(selEnd == [string length], @"Internal error: URL string is wrong length");
74
[436]75 @try {
[473]76 if ([[NSCharacterSet characterSetWithCharactersInString: @";,.!"] characterIsMember:
[322]77 [string characterAtIndex: selEnd - 1]]) {
78 selEnd--;
79 }
80 NSCharacterSet *alphanumericCharacterSet = [NSCharacterSet alphanumericCharacterSet];
[473]81 unichar opening, closing;
82 while (![alphanumericCharacterSet characterIsMember:
83 (opening = [string characterAtIndex: selStart])]) {
84 closing = [string characterAtIndex: selEnd - 1];
85 if ((opening == '(' && closing == ')') ||
86 (opening == '{' && closing == '}') ||
87 (opening == '[' && closing == ']')) {
88 selEnd--;
89 }
[322]90 selStart++;
91 NSCAssert(selStart < selEnd, @"No URL is selected");
92 }
93
94 string = [string substringWithRange: NSMakeRange(selStart, selEnd - selStart)];
95
96 ICLog(@"Parsing URL |%@|", string);
97
98 NSCAssert([string canBeConvertedToEncoding: NSASCIIStringEncoding], @"No URL is selected");
99
100 urlData = (char *)malloc( (range->length + 1) * sizeof(char));
101 NSCAssert(urlData != NULL, @"Internal error: can't allocate memory for URL string");
102
103 // XXX getCString: is deprecated in 10.4, but this is safe and shouldn't assert because we've already verified the string can be converted to ASCII, which should be a subset of any possible system encoding. The replacement (getCString:maxLength:encoding:) is not available until 10.4, so we leave this until we dump Internet Config and gain IDN friendliness.
104 [string getCString: urlData];
105
106 h = NewHandle(0);
107 NSCAssert(h != NULL, @"Internal error: can't allocate URL handle");
108
109 err = ICParseURL(ICCF_GetInst(), "\pmailto", urlData, range->length, &selStart, &selEnd, h);
110 DisposeHandle(h);
111
112 ICCF_OSErrCAssert(err, @"ICParseURL");
113
114 range->length = selEnd - selStart;
115 range->location += selStart;
[436]116 } @finally {
[322]117 free(urlData);
[436]118 }
[322]119}
120
[383]121static BOOL ICCF_StringIncludesCharacter(NSString *s, unichar character, NSRange range) {
[388]122 NSRange result = [s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString:
[383]123 [NSString stringWithCharacters: &character length: 1]]
[388]124 options: NSLiteralSearch range: range];
125 return (result.location != NSNotFound);
[383]126}
127
128static BOOL ICCF_IsLikelyURI(NSString *s, NSRange range) {
129 return ([s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @":/.@"]
130 options: NSLiteralSearch range: range].location != NSNotFound);
131}
132
133static BOOL ICCF_IsLikelyIPv6Address(NSString *s, NSRange range) {
134 return ([s rangeOfCharacterFromSet:
135 [[NSCharacterSet characterSetWithCharactersInString: @"ABCDEFabcdef0123456789:"] invertedSet]
136 options: NSLiteralSearch range: range].location == NSNotFound);
137}
138
[322]139NSRange ICCF_URLEnclosingRange(NSString *s, NSRange range) {
140 NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
141 NSRange delimiterRange;
142 unsigned extraLen;
[388]143 BOOL multiLine = NO;
[322]144
145 ICCF_CheckRange(range);
146
147 ICCF_Delimiters(&urlLeftDelimiters, &urlRightDelimiters);
148
[383]149 // right delimiter selected? Yes, this can break with ...)URL(.... Oh well.
150 if (range.location > 0 && [urlRightDelimiters characterIsMember: [s characterAtIndex: range.location]]) {
151 --range.location;
152 ++range.length;
153 ICLog(@"expanding past initial %c, now |%@|", [s characterAtIndex: range.location + 1],
154 [s substringWithRange: range]);
155 }
156
[322]157expandFront:
[375]158 // XXX instead of 0, make this stop at the max URL length to prevent protracted searches
[383]159 // XXX backport to ICeCoffEETerminal
[375]160 // add 1 to range to trap delimiters that are on the edge of the selection (i.e., <...)
161 delimiterRange = [s rangeOfCharacterFromSet: urlLeftDelimiters
162 options: NSLiteralSearch | NSBackwardsSearch
163 range: NSMakeRange(0, range.location + (range.location != [s length]))];
[322]164 if (delimiterRange.location == NSNotFound) {
165 // extend to beginning of string
166 range.length += range.location;
167 range.location = 0;
168 } else {
169 NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
[388]170 if ([s characterAtIndex: delimiterRange.location] == '<') { // XXX move to expandBoth to handle clicking in middle
171 multiLine = YES;
172 urlRightDelimiters = [NSCharacterSet characterSetWithCharactersInString: @">"];
173 }
[322]174 range.length += range.location - delimiterRange.location - 1;
175 range.location = delimiterRange.location + 1;
176 }
177
[388]178
[322]179expandBack:
[375]180 // XXX instead of length of string, make this stop at the max URL length to prevent protracted searches
181 // add 1 to range to trap delimiters that are on the edge of the selection (i.e., ...>)
182 extraLen = [s length] - range.location - range.length;
[322]183 delimiterRange = [s rangeOfCharacterFromSet: urlRightDelimiters
184 options: NSLiteralSearch
185 range: NSMakeRange(range.location + range.length - (range.length != 0),
186 extraLen + (range.length != 0))];
187 if (delimiterRange.location == NSNotFound) {
188 // extend to end of string
189 range.length += extraLen;
[383]190 extraLen = 0;
[322]191 } else {
192 NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
193 range.length += delimiterRange.location - range.location - range.length;
[383]194 extraLen = [s length] - NSMaxRange(range);
195
[388]196 unichar opening, closing = [s characterAtIndex: delimiterRange.location];
197 if (closing == '>' && !multiLine && ICCF_StringIncludesCharacter(s, '<', NSMakeRange(0, range.location))) {
198 urlLeftDelimiters = [NSCharacterSet characterSetWithCharactersInString: @"<"];
199 goto expandFront; // XXX move to expandBoth to handle clicking in middle
200 }
[383]201 // grow URL past closing paren/brace/bracket if we've seen an open paren/brace/bracket
202 if (closing == ')') opening = '(';
203 else if (closing == '}') opening = '{';
204 else if (closing == ']') opening = '[';
205 else goto expandBoth;
206 if (!ICCF_StringIncludesCharacter(s, opening, range))
207 goto expandBoth;
[322]208
[383]209 if (extraLen == 1) {
[375]210 range.length += 1;
[383]211 --extraLen;
[375]212 ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]);
213 } else {
[322]214 range.length += 2;
[375]215 ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]);
[322]216 goto expandBack;
217 }
218 }
219
[383]220expandBoth:
221 if (range.location <= 1)
222 goto checkRange; // nowhere to expand
223 unichar opening = [s characterAtIndex: range.location - 1], closing;
224 if (opening == '(') closing = ')';
225 else if (opening == '{') closing = '}';
226 else if (opening == '[') closing = ']';
227 else goto checkRange;
228
229 ICLog(@"extraLen = %d", extraLen);
230 // check if we're inside a partial delimited URL: not foolproof, but handles (foo), {UUID} and [IPv6]
231 if (delimiterRange.location != NSNotFound && [s characterAtIndex: delimiterRange.location] == closing &&
232 ((opening == '[' && ICCF_IsLikelyIPv6Address(s, range)) || !ICCF_IsLikelyURI(s, range))) {
233 ICLog(@"expanding past %c...%c, was |%@|", opening, closing, [s substringWithRange: range]);
234 range.location -= 2;
235 if (extraLen > 1)
236 range.length += 4;
237 else
238 range.length += 2 + extraLen;
239 ICLog(@"expanding past %c...%c, now |%@|", opening, closing, [s substringWithRange: range]);
240 goto expandFront;
241 }
242
243 if (ICCF_StringIncludesCharacter(s, closing, range) &&
244 ((opening == '[' &&
[474]245 ICCF_IsLikelyIPv6Address(s,
246 NSMakeRange(range.location,
247 [s rangeOfString: @"]" options: NSLiteralSearch range: range].location -
248 range.location)))
[383]249 || !ICCF_IsLikelyURI(s, range))) {
250 range.location -= 2;
251 range.length += 2;
252 ICLog(@"expanding past %c, now |%@|", opening, [s substringWithRange: range]);
253 goto expandFront;
254 }
255
[375]256checkRange:
[322]257 ICCF_CheckRange(range);
258
259 ICCF_ParseURL([s substringWithRange: range], &range);
260
261 return range;
262}
Note: See TracBrowser for help on using the repository browser.