source: trunk/ICeCoffEE/ICeCoffEE/ICeCoffEEParser.m@ 342

Last change on this file since 342 was 322, checked in by Nicholas Riley, 17 years ago

ICeCoffEE.[hm]: Move parsing functions (ICCF_CheckRange,
ICCF_Delimiters, ICCF_ParseURL) and Internet Config start/stop
routines (ICCF_Stop/StartIC) to ICeCoffEEParser.[hm] so they can be
tested outside the APE. Also move ICCF_MAX_URL_LEN definition, and
extract guts of NSTextView parsing into ICCF_URLEnclosingRange.
Remove comment about TXNClick; if MLTE is deprecated I'm not going to
mess with it.

ICeCoffEEParser.[hm]: Moved everything discussed above to here.

ICeCoffEEServices.m: Some comments, now I realize how irritating the
service localization problem is.

ICeCoffEETerminal.m: Remove long-unused reference to
ICeCoffEEScanner.h (was from 1.2?).

ICeCoffEEScanner.[hm]: Removed, no longer in use.

TestParser.m: Very simple first pass at testing. There's much more I
want to do here.

urls.plist: First pass at URL test cases.

ICeCoffEE.xcodeproj: Add TestParser target (yes, it uses ZeroLink
because my machine is slow and it actually helps).

File size: 8.4 KB
Line 
1//
2// ICeCoffEEParser.m
3// ICeCoffEE
4//
5// Created by Nicholas Riley on 6/21/07.
6// Copyright 2007 Nicholas Riley. All rights reserved.
7//
8
9#import "ICeCoffEEParser.h"
10#import "ICeCoffEE.h"
11
12// RFC-ordained max URL length, just to avoid passing IC/LS multi-megabyte documents
13#if ICCF_DEBUG
14const long ICCF_MAX_URL_LEN = 120; // XXX change later
15#else
16const long ICCF_MAX_URL_LEN = 1024;
17#endif
18
19void ICCF_CheckRange(NSRange range) {
20 NSCAssert(range.length > 0, ICCF_LocalizedString(@"No URL is selected"));
21 NSCAssert1(range.length <= ICCF_MAX_URL_LEN, ICCF_LocalizedString(@"The potential URL is longer than %lu characters"), ICCF_MAX_URL_LEN);
22}
23
24void ICCF_Delimiters(NSCharacterSet **leftPtr, NSCharacterSet **rightPtr) {
25 static NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
26
27 if (urlLeftDelimiters == nil || urlRightDelimiters == nil) {
28 NSMutableCharacterSet *set = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
29 NSMutableCharacterSet *tmpSet;
30 [urlLeftDelimiters release];
31 [urlRightDelimiters release];
32
33 [set autorelease];
34 [set formUnionWithCharacterSet: [[NSCharacterSet characterSetWithRange: NSMakeRange(0x21, 0x5e)] invertedSet]]; // nonprintable and non-ASCII characters
35 [set formUnionWithCharacterSet: [NSCharacterSet punctuationCharacterSet]];
36 // XXX obsoleted by RFC 3986 now... use §2.1, 2.2, 2.3
37 [set removeCharactersInString: @";/?:@&=+$,-_.!~*'()%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set
38
39 tmpSet = [[set mutableCopy] autorelease];
40 [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><("]];
41 urlLeftDelimiters = [tmpSet copy]; // make immutable again - for efficiency
42
43 tmpSet = [[set mutableCopy] autorelease];
44 [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><)"]];
45 urlRightDelimiters = [tmpSet copy]; // make immutable again - for efficiency
46 }
47
48 *leftPtr = urlLeftDelimiters; *rightPtr = urlRightDelimiters;
49}
50
51static ICInstance ICCF_icInst = NULL;
52
53void ICCF_StartIC() {
54 OSStatus err;
55
56 if (ICCF_icInst != NULL) {
57 ICLog(@"ICCF_StartIC: Internet Config is already running!");
58 ICCF_StopIC();
59 }
60 err = ICStart(&ICCF_icInst, kICCFCreator);
61 NSCAssert1(err == noErr, ICCF_LocalizedString(@"Unable to start Internet Config (error %d)"), err);
62}
63
64void ICCF_StopIC() {
65 if (ICCF_icInst == NULL) {
66 ICLog(@"ICCF_StopIC: Internet Config is not running!");
67 } else {
68 ICStop(ICCF_icInst);
69 ICCF_icInst = NULL;
70 }
71}
72
73ICInstance ICCF_GetInst() {
74 NSCAssert(ICCF_icInst != NULL, @"Internal error: Called ICCF_GetInst without ICCF_StartIC");
75 return ICCF_icInst;
76}
77
78// input/output 'range' is the range of source document which contains 'string'
79void ICCF_ParseURL(NSString *string, NSRange *range) {
80 OSStatus err;
81 Handle h;
82 long selStart = 0, selEnd = range->length; // local offsets within 'string'
83 char *urlData = NULL;
84
85 NSCAssert(selEnd == [string length], @"Internal error: URL string is wrong length");
86
87 NS_DURING
88 if ([[NSCharacterSet characterSetWithCharactersInString: @";,."] characterIsMember:
89 [string characterAtIndex: selEnd - 1]]) {
90 selEnd--;
91 }
92 NSCharacterSet *alphanumericCharacterSet = [NSCharacterSet alphanumericCharacterSet];
93 while (![alphanumericCharacterSet characterIsMember: [string characterAtIndex: selStart]]) {
94 selStart++;
95 NSCAssert(selStart < selEnd, @"No URL is selected");
96 }
97
98 string = [string substringWithRange: NSMakeRange(selStart, selEnd - selStart)];
99
100 ICLog(@"Parsing URL |%@|", string);
101
102 NSCAssert([string canBeConvertedToEncoding: NSASCIIStringEncoding], @"No URL is selected");
103
104 urlData = (char *)malloc( (range->length + 1) * sizeof(char));
105 NSCAssert(urlData != NULL, @"Internal error: can't allocate memory for URL string");
106
107 // XXX getCString: is deprecated in 10.4, but this is safe and shouldn't assert because we've already verified the string can be converted to ASCII, which should be a subset of any possible system encoding. The replacement (getCString:maxLength:encoding:) is not available until 10.4, so we leave this until we dump Internet Config and gain IDN friendliness.
108 [string getCString: urlData];
109
110 h = NewHandle(0);
111 NSCAssert(h != NULL, @"Internal error: can't allocate URL handle");
112
113 err = ICParseURL(ICCF_GetInst(), "\pmailto", urlData, range->length, &selStart, &selEnd, h);
114 DisposeHandle(h);
115
116 ICCF_OSErrCAssert(err, @"ICParseURL");
117
118 range->length = selEnd - selStart;
119 range->location += selStart;
120 NS_HANDLER
121 free(urlData);
122 [localException raise];
123 NS_ENDHANDLER
124
125 free(urlData);
126}
127
128NSRange ICCF_URLEnclosingRange(NSString *s, NSRange range) {
129 NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
130 NSRange delimiterRange;
131 unsigned extraLen;
132
133 ICCF_CheckRange(range);
134
135 ICCF_Delimiters(&urlLeftDelimiters, &urlRightDelimiters);
136
137expandFront:
138 // XXX instead of 0, make this stop at the max URL length to prevent protracted searches
139
140 // XXX here's how this is supposed to work:
141 // (http://web.sabi.net/) and <http://web.sabi.net/> should work if they are the entire document, even if clicking at the end/beginning of the document, not barfing with "no URL" (correct, as now) or selecting the final >, or ) (what would happen if we remove this "add 1" accommodation). But how about "http://web.sabi.net/(foo)"? That should work too, as long as it's not preceded by a (.
142 // Should probably backport to ICeCoffEETerminal, now I finally understand the method to this madness.
143 // add 1 to range to trap delimiters that are on the edge of the selection (i.e., <...)
144 delimiterRange = [s rangeOfCharacterFromSet: urlLeftDelimiters
145 options: NSLiteralSearch | NSBackwardsSearch
146 range: NSMakeRange(0, range.location + (range.location != [s length]))];
147 if (delimiterRange.location == NSNotFound) {
148 // extend to beginning of string
149 range.length += range.location;
150 range.location = 0;
151 } else {
152 NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
153 range.length += range.location - delimiterRange.location - 1;
154 range.location = delimiterRange.location + 1;
155
156 // in url/(parens)stuff, handle clicking inside or after (parens).
157 if ([s characterAtIndex: delimiterRange.location] == '(' &&
158 range.location > 2 /* prevent wrapping, ordinarily not necessary */) {
159 if ([s rangeOfString: @")" options: NSLiteralSearch range: range].location != NSNotFound ||
160 [s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @"/."]
161 options: NSLiteralSearch range: range].location == NSNotFound) {
162 range.location -= 2;
163 range.length += 2;
164 ICLog(@"expanding past (, now |%@|", [s substringWithRange: range]);
165 goto expandFront;
166 }
167 }
168 }
169
170 ICCF_CheckRange(range);
171
172expandBack:
173 // XXX instead of length of string, make this stop at the max URL length to prevent protracted searches
174 // add 1 to range to trap delimiters that are on the edge of the selection (i.e., ...>)
175 extraLen = [s length] - range.location - range.length;
176 delimiterRange = [s rangeOfCharacterFromSet: urlRightDelimiters
177 options: NSLiteralSearch
178 range: NSMakeRange(range.location + range.length - (range.length != 0),
179 extraLen + (range.length != 0))];
180 if (delimiterRange.location == NSNotFound) {
181 // extend to end of string
182 range.length += extraLen;
183 } else {
184 NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
185 range.length += delimiterRange.location - range.location - range.length;
186
187 // grow URL past closing paren if we've seen an open paren
188 if ([s characterAtIndex: delimiterRange.location] == ')' &&
189 [s rangeOfString: @"(" options: NSLiteralSearch range: range].location != NSNotFound) {
190 range.length += 2;
191 ICLog(@"expanding past ), now |%@|", [s substringWithRange: range]);
192 goto expandBack;
193 }
194 }
195
196 ICCF_CheckRange(range);
197
198 ICCF_ParseURL([s substringWithRange: range], &range);
199
200 return range;
201}
Note: See TracBrowser for help on using the repository browser.