source: trunk/ICeCoffEE/ICeCoffEE/ICeCoffEEParser.m

Last change on this file was 474, checked in by Nicholas Riley, 16 years ago

Only scan for closing bracket after opening bracket in IPv6 addresses (duh).

File size: 10.3 KB
Line 
1//
2// ICeCoffEEParser.m
3// ICeCoffEE
4//
5// Created by Nicholas Riley on 6/21/07.
6// Copyright 2007 Nicholas Riley. All rights reserved.
7//
8
9#import "ICeCoffEEParser.h"
10#import "ICeCoffEE.h"
11
12void ICCF_Delimiters(NSCharacterSet **leftPtr, NSCharacterSet **rightPtr) {
13 static NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
14
15 if (urlLeftDelimiters == nil || urlRightDelimiters == nil) {
16 NSMutableCharacterSet *set = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
17 NSMutableCharacterSet *tmpSet;
18 [urlLeftDelimiters release];
19 [urlRightDelimiters release];
20
21 [set autorelease];
22 [set formUnionWithCharacterSet: [[NSCharacterSet characterSetWithRange: NSMakeRange(0x21, 0x5e)] invertedSet]]; // nonprintable and non-ASCII characters
23 [set formUnionWithCharacterSet: [NSCharacterSet punctuationCharacterSet]];
24 // XXX obsoleted by RFC 3986 now... use §2.1, 2.2, 2.3
25 [set removeCharactersInString: @";/?:@&=+$,-_.!~*'(){}[]%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set and {}, []
26
27 tmpSet = [[set mutableCopy] autorelease];
28 [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"<(["]];
29 urlLeftDelimiters = [tmpSet copy]; // make immutable again - for efficiency
30
31 tmpSet = [[set mutableCopy] autorelease];
32 [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @">)]"]];
33 urlRightDelimiters = [tmpSet copy]; // make immutable again - for efficiency
34 }
35
36 *leftPtr = urlLeftDelimiters; *rightPtr = urlRightDelimiters;
37}
38
39static ICInstance ICCF_icInst = NULL;
40
41void ICCF_StartIC() {
42 OSStatus err;
43
44 if (ICCF_icInst != NULL) {
45 ICLog(@"ICCF_StartIC: Internet Config is already running!");
46 ICCF_StopIC();
47 }
48 err = ICStart(&ICCF_icInst, kICCFCreator);
49 NSCAssert1(err == noErr, ICCF_LocalizedString(@"Unable to start Internet Config (error %d)"), err);
50}
51
52void ICCF_StopIC() {
53 if (ICCF_icInst == NULL) {
54 ICLog(@"ICCF_StopIC: Internet Config is not running!");
55 } else {
56 ICStop(ICCF_icInst);
57 ICCF_icInst = NULL;
58 }
59}
60
61ICInstance ICCF_GetInst() {
62 NSCAssert(ICCF_icInst != NULL, @"Internal error: Called ICCF_GetInst without ICCF_StartIC");
63 return ICCF_icInst;
64}
65
66// input/output 'range' is the range of source document which contains 'string'
67void ICCF_ParseURL(NSString *string, NSRange *range) {
68 OSStatus err;
69 Handle h;
70 long selStart = 0, selEnd = range->length; // local offsets within 'string'
71 char *urlData = NULL;
72
73 NSCAssert(selEnd == [string length], @"Internal error: URL string is wrong length");
74
75 @try {
76 if ([[NSCharacterSet characterSetWithCharactersInString: @";,.!"] characterIsMember:
77 [string characterAtIndex: selEnd - 1]]) {
78 selEnd--;
79 }
80 NSCharacterSet *alphanumericCharacterSet = [NSCharacterSet alphanumericCharacterSet];
81 unichar opening, closing;
82 while (![alphanumericCharacterSet characterIsMember:
83 (opening = [string characterAtIndex: selStart])]) {
84 closing = [string characterAtIndex: selEnd - 1];
85 if ((opening == '(' && closing == ')') ||
86 (opening == '{' && closing == '}') ||
87 (opening == '[' && closing == ']')) {
88 selEnd--;
89 }
90 selStart++;
91 NSCAssert(selStart < selEnd, @"No URL is selected");
92 }
93
94 string = [string substringWithRange: NSMakeRange(selStart, selEnd - selStart)];
95
96 ICLog(@"Parsing URL |%@|", string);
97
98 NSCAssert([string canBeConvertedToEncoding: NSASCIIStringEncoding], @"No URL is selected");
99
100 urlData = (char *)malloc( (range->length + 1) * sizeof(char));
101 NSCAssert(urlData != NULL, @"Internal error: can't allocate memory for URL string");
102
103 // XXX getCString: is deprecated in 10.4, but this is safe and shouldn't assert because we've already verified the string can be converted to ASCII, which should be a subset of any possible system encoding. The replacement (getCString:maxLength:encoding:) is not available until 10.4, so we leave this until we dump Internet Config and gain IDN friendliness.
104 [string getCString: urlData];
105
106 h = NewHandle(0);
107 NSCAssert(h != NULL, @"Internal error: can't allocate URL handle");
108
109 err = ICParseURL(ICCF_GetInst(), "\pmailto", urlData, range->length, &selStart, &selEnd, h);
110 DisposeHandle(h);
111
112 ICCF_OSErrCAssert(err, @"ICParseURL");
113
114 range->length = selEnd - selStart;
115 range->location += selStart;
116 } @finally {
117 free(urlData);
118 }
119}
120
121static BOOL ICCF_StringIncludesCharacter(NSString *s, unichar character, NSRange range) {
122 NSRange result = [s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString:
123 [NSString stringWithCharacters: &character length: 1]]
124 options: NSLiteralSearch range: range];
125 return (result.location != NSNotFound);
126}
127
128static BOOL ICCF_IsLikelyURI(NSString *s, NSRange range) {
129 return ([s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @":/.@"]
130 options: NSLiteralSearch range: range].location != NSNotFound);
131}
132
133static BOOL ICCF_IsLikelyIPv6Address(NSString *s, NSRange range) {
134 return ([s rangeOfCharacterFromSet:
135 [[NSCharacterSet characterSetWithCharactersInString: @"ABCDEFabcdef0123456789:"] invertedSet]
136 options: NSLiteralSearch range: range].location == NSNotFound);
137}
138
139NSRange ICCF_URLEnclosingRange(NSString *s, NSRange range) {
140 NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
141 NSRange delimiterRange;
142 unsigned extraLen;
143 BOOL multiLine = NO;
144
145 ICCF_CheckRange(range);
146
147 ICCF_Delimiters(&urlLeftDelimiters, &urlRightDelimiters);
148
149 // right delimiter selected? Yes, this can break with ...)URL(.... Oh well.
150 if (range.location > 0 && [urlRightDelimiters characterIsMember: [s characterAtIndex: range.location]]) {
151 --range.location;
152 ++range.length;
153 ICLog(@"expanding past initial %c, now |%@|", [s characterAtIndex: range.location + 1],
154 [s substringWithRange: range]);
155 }
156
157expandFront:
158 // XXX instead of 0, make this stop at the max URL length to prevent protracted searches
159 // XXX backport to ICeCoffEETerminal
160 // add 1 to range to trap delimiters that are on the edge of the selection (i.e., <...)
161 delimiterRange = [s rangeOfCharacterFromSet: urlLeftDelimiters
162 options: NSLiteralSearch | NSBackwardsSearch
163 range: NSMakeRange(0, range.location + (range.location != [s length]))];
164 if (delimiterRange.location == NSNotFound) {
165 // extend to beginning of string
166 range.length += range.location;
167 range.location = 0;
168 } else {
169 NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
170 if ([s characterAtIndex: delimiterRange.location] == '<') { // XXX move to expandBoth to handle clicking in middle
171 multiLine = YES;
172 urlRightDelimiters = [NSCharacterSet characterSetWithCharactersInString: @">"];
173 }
174 range.length += range.location - delimiterRange.location - 1;
175 range.location = delimiterRange.location + 1;
176 }
177
178
179expandBack:
180 // XXX instead of length of string, make this stop at the max URL length to prevent protracted searches
181 // add 1 to range to trap delimiters that are on the edge of the selection (i.e., ...>)
182 extraLen = [s length] - range.location - range.length;
183 delimiterRange = [s rangeOfCharacterFromSet: urlRightDelimiters
184 options: NSLiteralSearch
185 range: NSMakeRange(range.location + range.length - (range.length != 0),
186 extraLen + (range.length != 0))];
187 if (delimiterRange.location == NSNotFound) {
188 // extend to end of string
189 range.length += extraLen;
190 extraLen = 0;
191 } else {
192 NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
193 range.length += delimiterRange.location - range.location - range.length;
194 extraLen = [s length] - NSMaxRange(range);
195
196 unichar opening, closing = [s characterAtIndex: delimiterRange.location];
197 if (closing == '>' && !multiLine && ICCF_StringIncludesCharacter(s, '<', NSMakeRange(0, range.location))) {
198 urlLeftDelimiters = [NSCharacterSet characterSetWithCharactersInString: @"<"];
199 goto expandFront; // XXX move to expandBoth to handle clicking in middle
200 }
201 // grow URL past closing paren/brace/bracket if we've seen an open paren/brace/bracket
202 if (closing == ')') opening = '(';
203 else if (closing == '}') opening = '{';
204 else if (closing == ']') opening = '[';
205 else goto expandBoth;
206 if (!ICCF_StringIncludesCharacter(s, opening, range))
207 goto expandBoth;
208
209 if (extraLen == 1) {
210 range.length += 1;
211 --extraLen;
212 ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]);
213 } else {
214 range.length += 2;
215 ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]);
216 goto expandBack;
217 }
218 }
219
220expandBoth:
221 if (range.location <= 1)
222 goto checkRange; // nowhere to expand
223 unichar opening = [s characterAtIndex: range.location - 1], closing;
224 if (opening == '(') closing = ')';
225 else if (opening == '{') closing = '}';
226 else if (opening == '[') closing = ']';
227 else goto checkRange;
228
229 ICLog(@"extraLen = %d", extraLen);
230 // check if we're inside a partial delimited URL: not foolproof, but handles (foo), {UUID} and [IPv6]
231 if (delimiterRange.location != NSNotFound && [s characterAtIndex: delimiterRange.location] == closing &&
232 ((opening == '[' && ICCF_IsLikelyIPv6Address(s, range)) || !ICCF_IsLikelyURI(s, range))) {
233 ICLog(@"expanding past %c...%c, was |%@|", opening, closing, [s substringWithRange: range]);
234 range.location -= 2;
235 if (extraLen > 1)
236 range.length += 4;
237 else
238 range.length += 2 + extraLen;
239 ICLog(@"expanding past %c...%c, now |%@|", opening, closing, [s substringWithRange: range]);
240 goto expandFront;
241 }
242
243 if (ICCF_StringIncludesCharacter(s, closing, range) &&
244 ((opening == '[' &&
245 ICCF_IsLikelyIPv6Address(s,
246 NSMakeRange(range.location,
247 [s rangeOfString: @"]" options: NSLiteralSearch range: range].location -
248 range.location)))
249 || !ICCF_IsLikelyURI(s, range))) {
250 range.location -= 2;
251 range.length += 2;
252 ICLog(@"expanding past %c, now |%@|", opening, [s substringWithRange: range]);
253 goto expandFront;
254 }
255
256checkRange:
257 ICCF_CheckRange(range);
258
259 ICCF_ParseURL([s substringWithRange: range], &range);
260
261 return range;
262}
Note: See TracBrowser for help on using the repository browser.