Ignore:
Timestamp:
02/11/08 06:35:36 (16 years ago)
Author:
Nicholas Riley
Message:

ICeCoffEEParser.m: Work much better, though still no multiline URLs.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/ICeCoffEE/ICeCoffEE/ICeCoffEEParser.m

    r375 r383  
    2323        [set formUnionWithCharacterSet: [NSCharacterSet punctuationCharacterSet]];
    2424        // XXX obsoleted by RFC 3986 now... use §2.1, 2.2, 2.3
    25         [set removeCharactersInString: @";/?:@&=+$,-_.!~*'()%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set
     25        [set removeCharactersInString: @";/?:@&=+$,-_.!~*'(){}[]%#"]; // RFC 2396 §2.2, 2.3, 2.4, plus % and # from "delims" set and {}, []
    2626       
    2727        tmpSet = [[set mutableCopy] autorelease];
    28         [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><("]];
     28        [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><(["]];
    2929        urlLeftDelimiters = [tmpSet copy]; // make immutable again - for efficiency
    3030       
    3131        tmpSet = [[set mutableCopy] autorelease];
    32         [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><)"]];
     32        [tmpSet formUnionWithCharacterSet: [NSCharacterSet characterSetWithCharactersInString: @"><)]"]];
    3333        urlRightDelimiters = [tmpSet copy]; // make immutable again - for efficiency
    3434    }
     
    114114}
    115115
     116static BOOL ICCF_StringIncludesCharacter(NSString *s, unichar character, NSRange range) {
     117    return ([s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString:
     118                                         [NSString stringWithCharacters: &character length: 1]]
     119                               options: NSLiteralSearch range: range].location != NSNotFound);
     120}
     121
     122static BOOL ICCF_IsLikelyURI(NSString *s, NSRange range) {
     123    return ([s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @":/.@"]
     124                               options: NSLiteralSearch range: range].location != NSNotFound);
     125}
     126
     127static BOOL ICCF_IsLikelyIPv6Address(NSString *s, NSRange range) {
     128    return ([s rangeOfCharacterFromSet:
     129             [[NSCharacterSet characterSetWithCharactersInString: @"ABCDEFabcdef0123456789:"] invertedSet]
     130                               options: NSLiteralSearch range: range].location == NSNotFound);
     131}
     132
    116133NSRange ICCF_URLEnclosingRange(NSString *s, NSRange range) {
    117134    NSCharacterSet *urlLeftDelimiters = nil, *urlRightDelimiters = nil;
     
    123140    ICCF_Delimiters(&urlLeftDelimiters, &urlRightDelimiters);
    124141   
     142    // right delimiter selected?  Yes, this can break with ...)URL(....  Oh well.
     143    if (range.location > 0 && [urlRightDelimiters characterIsMember: [s characterAtIndex: range.location]]) {
     144        --range.location;
     145        ++range.length;
     146        ICLog(@"expanding past initial %c, now |%@|", [s characterAtIndex: range.location + 1],
     147              [s substringWithRange: range]);
     148    }
     149       
    125150expandFront:
    126151    // XXX instead of 0, make this stop at the max URL length to prevent protracted searches
    127    
    128     // XXX here's how this is supposed to work:
    129     // (http://web.sabi.net/) and <http://web.sabi.net/> should work if they are the entire document, even if clicking at the end/beginning of the document, not barfing with "no URL" (correct, as now) or selecting the final >, or ) (what would happen if we remove this "add 1" accommodation).  But how about "http://web.sabi.net/(foo)"?  That should work too, as long as it's not preceded by a (.
    130     // Should probably backport to ICeCoffEETerminal, now I finally understand the method to this madness.
     152    // XXX backport to ICeCoffEETerminal
    131153    // add 1 to range to trap delimiters that are on the edge of the selection (i.e., <...)
    132154    delimiterRange = [s rangeOfCharacterFromSet: urlLeftDelimiters
     
    141163        range.length += range.location - delimiterRange.location - 1;
    142164        range.location = delimiterRange.location + 1;
    143        
    144         // in url/(parens)stuff, handle clicking inside or after (parens).
    145         /*if ([s characterAtIndex: delimiterRange.location] == '(' &&
    146             range.location > 2) { // prevent wrapping, ordinarily not necessary
    147             if ([s rangeOfString: @")" options: NSLiteralSearch range: range].location != NSNotFound ||
    148                 [s rangeOfCharacterFromSet: [NSCharacterSet characterSetWithCharactersInString: @"/."]
    149                                    options: NSLiteralSearch range: range].location == NSNotFound) {
    150                 range.location -= 2;
    151                 range.length += 2;
    152                 ICLog(@"expanding past (, now |%@|", [s substringWithRange: range]);
    153                 goto expandFront;
    154             }
    155         } */       
    156     }
    157    
    158     ICCF_CheckRange(range);
     165    }
    159166   
    160167expandBack:
     
    169176        // extend to end of string
    170177        range.length += extraLen;
     178        extraLen = 0;
    171179    } else {
    172180        NSCAssert(delimiterRange.length == 1, @"Internal error: delimiter matched range is not of length 1");
    173181        range.length += delimiterRange.location - range.location - range.length;
    174        
    175         // grow URL past closing paren/brace if we've seen an open paren/brace
    176         unichar closing = [s characterAtIndex: delimiterRange.location];
    177         NSString *opening;
    178         if (closing == ')') opening = @"(";
    179         else if (closing == '}') opening = @"{";
    180         else goto checkRange;
    181         if ([s rangeOfString: opening options: NSLiteralSearch range: range].location == NSNotFound)
    182             goto checkRange;
    183        
    184         if (extraLen == 0) {
     182        extraLen = [s length] - NSMaxRange(range);
     183
     184        // grow URL past closing paren/brace/bracket if we've seen an open paren/brace/bracket
     185        unichar opening, closing = [s characterAtIndex: delimiterRange.location];
     186        if (closing == ')') opening = '(';
     187        else if (closing == '}') opening = '{';
     188        else if (closing == ']') opening = '[';
     189        else goto expandBoth;
     190        if (!ICCF_StringIncludesCharacter(s, opening, range))
     191            goto expandBoth;
     192       
     193        if (extraLen == 1) {
    185194            range.length += 1;
     195            --extraLen;
    186196            ICLog(@"expanding past %c, now |%@|", closing, [s substringWithRange: range]);
    187197        } else {
     
    192202    }
    193203   
     204expandBoth:
     205    if (range.location <= 1)
     206        goto checkRange; // nowhere to expand
     207    unichar opening = [s characterAtIndex: range.location - 1], closing;
     208    if (opening == '(') closing = ')';
     209    else if (opening == '{') closing = '}';
     210    else if (opening == '[') closing = ']';
     211    else goto checkRange;
     212
     213    ICLog(@"extraLen = %d", extraLen);
     214    // check if we're inside a partial delimited URL: not foolproof, but handles (foo), {UUID} and [IPv6]
     215    if (delimiterRange.location != NSNotFound && [s characterAtIndex: delimiterRange.location] == closing &&
     216        ((opening == '[' && ICCF_IsLikelyIPv6Address(s, range)) || !ICCF_IsLikelyURI(s, range))) {
     217        ICLog(@"expanding past %c...%c, was |%@|", opening, closing, [s substringWithRange: range]);
     218        range.location -= 2;
     219        if (extraLen > 1)
     220            range.length += 4;
     221        else
     222            range.length += 2 + extraLen;
     223        ICLog(@"expanding past %c...%c, now |%@|", opening, closing, [s substringWithRange: range]);
     224        goto expandFront;
     225    }
     226
     227    if (ICCF_StringIncludesCharacter(s, closing, range) &&
     228        ((opening == '[' &&
     229          ICCF_IsLikelyIPv6Address(s, NSMakeRange(range.location,
     230                                                  [s rangeOfString: @"]"].location - range.location)))
     231         || !ICCF_IsLikelyURI(s, range))) {
     232        ICLog(@"expanding past %c, was |%@|", opening, [s substringWithRange: range]);
     233        ICLog(@"remaining: |%@|", [s substringWithRange: NSMakeRange(range.location, [s rangeOfString: [NSString stringWithCharacters: &closing length: 1]].location - range.location)]);
     234        range.location -= 2;
     235        range.length += 2;
     236        ICLog(@"expanding past %c, now |%@|", opening, [s substringWithRange: range]);
     237        goto expandFront;
     238    }
     239
    194240checkRange:
    195241    ICCF_CheckRange(range);
Note: See TracChangeset for help on using the changeset viewer.