source: trunk/ICeCoffEE/ICeCoffEE/ICeCoffEEParser.m @ 322

Last change on this file since 322 was 322, checked in by Nicholas Riley, 13 years ago

ICeCoffEE.[hm]: Move parsing functions (ICCF_CheckRange,
ICCF_Delimiters, ICCF_ParseURL) and Internet Config start/stop
routines (ICCF_Stop/StartIC) to ICeCoffEEParser.[hm] so they can be
tested outside the APE. Also move ICCF_MAX_URL_LEN definition, and
extract guts of NSTextView parsing into ICCF_URLEnclosingRange.
Remove comment about TXNClick; if MLTE is deprecated I'm not going to
mess with it.

ICeCoffEEParser.[hm]: Moved everything discussed above to here.

ICeCoffEEServices.m: Some comments, now I realize how irritating the
service localization problem is.

ICeCoffEETerminal.m: Remove long-unused reference to
ICeCoffEEScanner.h (was from 1.2?).

ICeCoffEEScanner.[hm]: Removed, no longer in use.

TestParser?.m: Very simple first pass at testing. There's much more I
want to do here.

urls.plist: First pass at URL test cases.

ICeCoffEE.xcodeproj: Add TestParser? target (yes, it uses ZeroLink?
because my machine is slow and it actually helps).

File size: 8.4 KB
Line 
1//
2//  ICeCoffEEParser.m
3//  ICeCoffEE
4//
5//  Created by Nicholas Riley on 6/21/07.
6//  Copyright 2007 Nicholas Riley. All rights reserved.
7//
8
9#import "ICeCoffEEParser.h"
10#import "ICeCoffEE.h"
11
12// RFC-ordained max URL length, just to avoid passing IC/LS multi-megabyte documents
13#if ICCF_DEBUG
14const long ICCF_MAX_URL_LEN = 120; // XXX change later
15#else
16const long ICCF_MAX_URL_LEN = 1024;
17#endif
18
19void ICCF_CheckRange(NSRange range) {
20    NSCAssert(range.length > 0, ICCF_LocalizedString(@"No URL is selected"));
21    NSCAssert1(range.length <= ICCF_MAX_URL_LEN, ICCF_LocalizedString(@"The potential URL is longer than %lu characters"), ICCF_MAX_URL_LEN);
22}
23
24void ICCF_Delimiters(NSCharacterSet **leftPtr, NSCharacterSet **rightPtr) {
25    static NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
26   
27    if (urlLeftDelimiters == nil || urlRightDelimiters == nil) {
28        NSMutableCharacterSet *set = [[NSCharacterSet whitespaceAndNewlineCharacterSet] mutableCopy];
29        NSMutableCharacterSet *tmpSet;
30        [urlLeftDelimiters release];
31        [urlRightDelimiters release];
32       
33        [set autorelease];
34        [set formUnionWithCharacterSet: [[NSCharacterSet characterSetWithRange: NSMakeRange(0x21, 0x5e)] invertedSet]]; // nonprintable and non-ASCII characters
35        [set formUnionWithCharacterSet: [NSCharacterSet punctuationCharacterSet]];
36        // XXX obsoleted by RFC 3986 now... use §2.1, 2.2, 2.3
37        [set removeCharactersInString: @";/?:@&=+$,-_.!~*'()%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set
38       
39        tmpSet = [[set mutableCopy] autorelease];
40        [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><("]];
41        urlLeftDelimiters = [tmpSet copy]; // make immutable again - for efficiency
42       
43        tmpSet = [[set mutableCopy] autorelease];
44        [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><)"]];
45        urlRightDelimiters = [tmpSet copy]; // make immutable again - for efficiency
46    }
47   
48    *leftPtr = urlLeftDelimiters; *rightPtr = urlRightDelimiters;
49}
50
51static ICInstance ICCF_icInst = NULL;
52
53void ICCF_StartIC() {
54    OSStatus err;
55   
56    if (ICCF_icInst != NULL) {
57        ICLog(@"ICCF_StartIC: Internet Config is already running!");
58        ICCF_StopIC();
59    }
60    err = ICStart(&ICCF_icInst, kICCFCreator);
61    NSCAssert1(err == noErr, ICCF_LocalizedString(@"Unable to start Internet Config (error %d)"), err);
62}
63
64void ICCF_StopIC() {
65    if (ICCF_icInst == NULL) {
66        ICLog(@"ICCF_StopIC: Internet Config is not running!");
67    } else {
68        ICStop(ICCF_icInst);
69        ICCF_icInst = NULL;
70    }
71}
72
73ICInstance ICCF_GetInst() {
74    NSCAssert(ICCF_icInst != NULL, @"Internal error: Called ICCF_GetInst without ICCF_StartIC");
75    return ICCF_icInst;
76}
77
78// input/output 'range' is the range of source document which contains 'string'
79void ICCF_ParseURL(NSString *string, NSRange *range) {
80    OSStatus err;
81    Handle h;
82    long selStart = 0, selEnd = range->length; // local offsets within 'string'
83    char *urlData = NULL;
84   
85    NSCAssert(selEnd == [string length], @"Internal error: URL string is wrong length");
86   
87    NS_DURING
88        if ([[NSCharacterSet characterSetWithCharactersInString: @";,."] characterIsMember:
89            [string characterAtIndex: selEnd - 1]]) {
90            selEnd--;
91        }
92        NSCharacterSet *alphanumericCharacterSet = [NSCharacterSet alphanumericCharacterSet];
93        while (![alphanumericCharacterSet characterIsMember: [string characterAtIndex: selStart]]) {
94            selStart++;
95            NSCAssert(selStart < selEnd, @"No URL is selected");
96        }
97       
98        string = [string substringWithRange: NSMakeRange(selStart, selEnd - selStart)];
99       
100        ICLog(@"Parsing URL |%@|", string);
101       
102        NSCAssert([string canBeConvertedToEncoding: NSASCIIStringEncoding], @"No URL is selected");
103       
104        urlData = (char *)malloc( (range->length + 1) * sizeof(char));
105        NSCAssert(urlData != NULL, @"Internal error: can't allocate memory for URL string");
106       
107        // XXX getCString: is deprecated in 10.4, but this is safe and shouldn't assert because we've already verified the string can be converted to ASCII, which should be a subset of any possible system encoding.  The replacement (getCString:maxLength:encoding:) is not available until 10.4, so we leave this until we dump Internet Config and gain IDN friendliness.
108        [string getCString: urlData];
109       
110        h = NewHandle(0);
111        NSCAssert(h != NULL, @"Internal error: can't allocate URL handle");
112       
113        err = ICParseURL(ICCF_GetInst(), "\pmailto", urlData, range->length, &selStart, &selEnd, h);
114        DisposeHandle(h);
115       
116        ICCF_OSErrCAssert(err, @"ICParseURL");
117       
118        range->length = selEnd - selStart;
119        range->location += selStart;
120    NS_HANDLER
121        free(urlData);
122        [localException raise];
123    NS_ENDHANDLER
124   
125    free(urlData);
126}
127
128NSRange ICCF_URLEnclosingRange(NSString *s, NSRange range) {
129    NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
130    NSRange delimiterRange;
131    unsigned extraLen;
132   
133    ICCF_CheckRange(range);
134   
135    ICCF_Delimiters(&urlLeftDelimiters, &urlRightDelimiters);
136   
137expandFront:
138        // XXX instead of 0, make this stop at the max URL length to prevent protracted searches
139       
140        // XXX here's how this is supposed to work:
141        // (http://web.sabi.net/) and <http://web.sabi.net/> should work if they are the entire document, even if clicking at the end/beginning of the document, not barfing with "no URL" (correct, as now) or selecting the final >, or ) (what would happen if we remove this "add 1" accommodation).  But how about "http://web.sabi.net/(foo)"?  That should work too, as long as it's not preceded by a (.
142        // Should probably backport to ICeCoffEETerminal, now I finally understand the method to this madness.
143        // add 1 to range to trap delimiters that are on the edge of the selection (i.e., <...)
144        delimiterRange = [s rangeOfCharacterFromSet: urlLeftDelimiters
145                                            options: NSLiteralSearch | NSBackwardsSearch
146                                              range: NSMakeRange(0, range.location + (range.location != [s length]))];
147    if (delimiterRange.location == NSNotFound) {
148        // extend to beginning of string
149        range.length += range.location;
150        range.location = 0;
151    } else {
152        NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
153        range.length += range.location - delimiterRange.location - 1;
154        range.location = delimiterRange.location + 1;
155       
156        // in url/(parens)stuff, handle clicking inside or after (parens).
157        if ([s characterAtIndex: delimiterRange.location] == '(' &&
158            range.location > 2 /* prevent wrapping, ordinarily not necessary */) {
159            if ([s rangeOfString: @")" options: NSLiteralSearch range: range].location != NSNotFound ||
160                [s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @"/."]
161                                   options: NSLiteralSearch range: range].location == NSNotFound) {
162                range.location -= 2;
163                range.length += 2;
164                ICLog(@"expanding past (, now |%@|", [s substringWithRange: range]);
165                goto expandFront;
166            }
167        }       
168    }
169   
170    ICCF_CheckRange(range);
171   
172expandBack:
173        // XXX instead of length of string, make this stop at the max URL length to prevent protracted searches
174        // add 1 to range to trap delimiters that are on the edge of the selection (i.e., ...>)
175        extraLen = [s length] - range.location - range.length;
176    delimiterRange = [s rangeOfCharacterFromSet: urlRightDelimiters
177                                        options: NSLiteralSearch
178                                          range: NSMakeRange(range.location + range.length - (range.length != 0),
179                                                             extraLen + (range.length != 0))];
180    if (delimiterRange.location == NSNotFound) {
181        // extend to end of string
182        range.length += extraLen;
183    } else {
184        NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
185        range.length += delimiterRange.location - range.location - range.length;
186       
187        // grow URL past closing paren if we've seen an open paren
188        if ([s characterAtIndex: delimiterRange.location] == ')' &&
189            [s rangeOfString: @"(" options: NSLiteralSearch range: range].location != NSNotFound) {
190            range.length += 2;
191            ICLog(@"expanding past ), now |%@|", [s substringWithRange: range]);
192            goto expandBack;
193        }
194    }
195   
196    ICCF_CheckRange(range);
197   
198    ICCF_ParseURL([s substringWithRange: range], &range);
199   
200    return range;
201}
Note: See TracBrowser for help on using the repository browser.