jpayne@68: # word.tcl --
jpayne@68: #
jpayne@68: # This file defines various procedures for computing word boundaries in
jpayne@68: # strings. This file is primarily needed so Tk text and entry widgets behave
jpayne@68: # properly for different platforms.
jpayne@68: #
jpayne@68: # Copyright (c) 1996 Sun Microsystems, Inc.
jpayne@68: # Copyright (c) 1998 Scritpics Corporation.
jpayne@68: #
jpayne@68: # See the file "license.terms" for information on usage and redistribution
jpayne@68: # of this file, and for a DISCLAIMER OF ALL WARRANTIES.
jpayne@68: 
jpayne@68: # The following variables are used to determine which characters are
jpayne@68: # interpreted as white space.
jpayne@68: 
jpayne@68: if {$::tcl_platform(platform) eq "windows"} {
jpayne@68:     # Windows style - any but a unicode space char
jpayne@68:     if {![info exists ::tcl_wordchars]} {
jpayne@68: 	set ::tcl_wordchars {\S}
jpayne@68:     }
jpayne@68:     if {![info exists ::tcl_nonwordchars]} {
jpayne@68: 	set ::tcl_nonwordchars {\s}
jpayne@68:     }
jpayne@68: } else {
jpayne@68:     # Motif style - any unicode word char (number, letter, or underscore)
jpayne@68:     if {![info exists ::tcl_wordchars]} {
jpayne@68: 	set ::tcl_wordchars {\w}
jpayne@68:     }
jpayne@68:     if {![info exists ::tcl_nonwordchars]} {
jpayne@68: 	set ::tcl_nonwordchars {\W}
jpayne@68:     }
jpayne@68: }
jpayne@68: 
jpayne@68: # Arrange for caches of the real matcher REs to be kept, which enables the REs
jpayne@68: # themselves to be cached for greater performance (and somewhat greater
jpayne@68: # clarity too).
jpayne@68: 
jpayne@68: namespace eval ::tcl {
jpayne@68:     variable WordBreakRE
jpayne@68:     array set WordBreakRE {}
jpayne@68: 
jpayne@68:     proc UpdateWordBreakREs args {
jpayne@68: 	# Ignores the arguments
jpayne@68: 	global tcl_wordchars tcl_nonwordchars
jpayne@68: 	variable WordBreakRE
jpayne@68: 
jpayne@68: 	# To keep the RE strings short...
jpayne@68: 	set letter $tcl_wordchars
jpayne@68: 	set space $tcl_nonwordchars
jpayne@68: 
jpayne@68: 	set WordBreakRE(after)		"$letter$space|$space$letter"
jpayne@68: 	set WordBreakRE(before)		"^.*($letter$space|$space$letter)"
jpayne@68: 	set WordBreakRE(end)		"$space*$letter+$space"
jpayne@68: 	set WordBreakRE(next)		"$letter*$space+$letter"
jpayne@68: 	set WordBreakRE(previous)	"$space*($letter+)$space*\$"
jpayne@68:     }
jpayne@68: 
jpayne@68:     # Initialize the cache
jpayne@68:     UpdateWordBreakREs
jpayne@68:     trace add variable ::tcl_wordchars write ::tcl::UpdateWordBreakREs
jpayne@68:     trace add variable ::tcl_nonwordchars write ::tcl::UpdateWordBreakREs
jpayne@68: }
jpayne@68: 
jpayne@68: # tcl_wordBreakAfter --
jpayne@68: #
jpayne@68: # This procedure returns the index of the first word boundary after the
jpayne@68: # starting point in the given string, or -1 if there are no more boundaries in
jpayne@68: # the given string. The index returned refers to the first character of the
jpayne@68: # pair that comprises a boundary.
jpayne@68: #
jpayne@68: # Arguments:
jpayne@68: # str -		String to search.
jpayne@68: # start -	Index into string specifying starting point.
jpayne@68: 
jpayne@68: proc tcl_wordBreakAfter {str start} {
jpayne@68:     variable ::tcl::WordBreakRE
jpayne@68:     set result {-1 -1}
jpayne@68:     regexp -indices -start $start -- $WordBreakRE(after) $str result
jpayne@68:     return [lindex $result 1]
jpayne@68: }
jpayne@68: 
jpayne@68: # tcl_wordBreakBefore --
jpayne@68: #
jpayne@68: # This procedure returns the index of the first word boundary before the
jpayne@68: # starting point in the given string, or -1 if there are no more boundaries in
jpayne@68: # the given string. The index returned refers to the second character of the
jpayne@68: # pair that comprises a boundary.
jpayne@68: #
jpayne@68: # Arguments:
jpayne@68: # str -		String to search.
jpayne@68: # start -	Index into string specifying starting point.
jpayne@68: 
jpayne@68: proc tcl_wordBreakBefore {str start} {
jpayne@68:     variable ::tcl::WordBreakRE
jpayne@68:     set result {-1 -1}
jpayne@68:     regexp -indices -- $WordBreakRE(before) [string range $str 0 $start] result
jpayne@68:     return [lindex $result 1]
jpayne@68: }
jpayne@68: 
jpayne@68: # tcl_endOfWord --
jpayne@68: #
jpayne@68: # This procedure returns the index of the first end-of-word location after a
jpayne@68: # starting index in the given string. An end-of-word location is defined to be
jpayne@68: # the first whitespace character following the first non-whitespace character
jpayne@68: # after the starting point. Returns -1 if there are no more words after the
jpayne@68: # starting point.
jpayne@68: #
jpayne@68: # Arguments:
jpayne@68: # str -		String to search.
jpayne@68: # start -	Index into string specifying starting point.
jpayne@68: 
jpayne@68: proc tcl_endOfWord {str start} {
jpayne@68:     variable ::tcl::WordBreakRE
jpayne@68:     set result {-1 -1}
jpayne@68:     regexp -indices -start $start -- $WordBreakRE(end) $str result
jpayne@68:     return [lindex $result 1]
jpayne@68: }
jpayne@68: 
jpayne@68: # tcl_startOfNextWord --
jpayne@68: #
jpayne@68: # This procedure returns the index of the first start-of-word location after a
jpayne@68: # starting index in the given string. A start-of-word location is defined to
jpayne@68: # be a non-whitespace character following a whitespace character. Returns -1
jpayne@68: # if there are no more start-of-word locations after the starting point.
jpayne@68: #
jpayne@68: # Arguments:
jpayne@68: # str -		String to search.
jpayne@68: # start -	Index into string specifying starting point.
jpayne@68: 
jpayne@68: proc tcl_startOfNextWord {str start} {
jpayne@68:     variable ::tcl::WordBreakRE
jpayne@68:     set result {-1 -1}
jpayne@68:     regexp -indices -start $start -- $WordBreakRE(next) $str result
jpayne@68:     return [lindex $result 1]
jpayne@68: }
jpayne@68: 
jpayne@68: # tcl_startOfPreviousWord --
jpayne@68: #
jpayne@68: # This procedure returns the index of the first start-of-word location before
jpayne@68: # a starting index in the given string.
jpayne@68: #
jpayne@68: # Arguments:
jpayne@68: # str -		String to search.
jpayne@68: # start -	Index into string specifying starting point.
jpayne@68: 
jpayne@68: proc tcl_startOfPreviousWord {str start} {
jpayne@68:     variable ::tcl::WordBreakRE
jpayne@68:     set word {-1 -1}
jpayne@68:     if {$start > 0} {
jpayne@68: 	regexp -indices -- $WordBreakRE(previous) [string range $str 0 $start-1] \
jpayne@68: 		result word
jpayne@68:     }
jpayne@68:     return [lindex $word 0]
jpayne@68: }