strcomp.h

documentation
 #charset "us-ascii"
 
 /*
  *   Copyright 2000, 2006 Michael J. Roberts.
  *   
  *   This file is part of TADS 3.
  *   
  *   This header defines the StringComparator intrinsic class.  
  */
 
 /* include our base class definition */
 #include "systype.h"
 
 /*
  *   StringComparator intrinsic class.  This class provides support for
  *   dictionaries based on complex string matches, including truncation
  *   (matching an input word to a dictionary word when the input word is at
  *   least some minimum length, and matches the dictionary word up to the
  *   full length of the input word, but the input word is shorter than the
  *   dictionary word); case folding (matching upper-case letters to
  *   lower-case letters and vice versa); and character equivalences (for
  *   matching accented characters to non-accented equivalents, or matching
  *   special characters to multi-character equivalents, such as matching a
  *   German "ess-zet" ("sharp-s") ligature to a pair of lower-case "s"
  *   characters in input).  
  */
 intrinsic class StringComparator 'string-comparator/030000': Object
 {
     /*
      *   Constructor:
      *   
      *   new StringComparator(truncLen, caseSensitive, mappings)
      *   
      *   truncLen = the minimum truncation length.  An input string that
      *   matches a dictionary string up to the full length of the input
      *   string, and is shorter than the dictionary string but at least this
      *   truncation length, will match the dictionary string.  If truncLen is
      *   zero or nil, no truncated matches are allowed.
      *   
      *   caseSensitive = true if matches are to be sensitive to case, nil if
      *   not.  If this parameter is nil, then an upper-case letter in an
      *   input string will match a lower-case letter in a dictionary string,
      *   and vice versa.  If this parameter is true, each character must
      *   match exactly.
      *   
      *   mappings is a list of equivalent character mappings.  Each mapping
      *   in the list is a sublist in this format:
      *   
      *.     ['dictChar', 'inputString', ucFlags, lcFlags]
      *   
      *   'dictChar' is a one-character string giving the character to be
      *   mapped in dictionary strings.  'inputString' is a string of one or
      *   more characters that is to be considered equivalent to the
      *   dictionary character when the inputString appears in an input
      *   string.  ucFlags and lcFlags are integer values giving the flag
      *   values to bitwise-OR into the results when this mapping is used to
      *   match an upper-case or lower-case input string, respectively.
      *   
      *   For example, a mapping to allow the German ess-zet character (whose
      *   Unicode value is 0x00DF) to match "ss" sequences in input strings,
      *   with no result flag additions, would look like this:
      *   
      *.    ['\x00DF', 'ss', 0, 0]
      *   
      *   Only one mapping is allowed for each dictionary character.  If more
      *   than one mapping is given for a single dictionary character, only
      *   the latest one in the list is actually used.
      *   
      *   Flag values 0x0001 through 0x0080 are reserved for use by
      *   StringComparator itself.  Callers are free to use any flag values
      *   0x0100 and above.  Note that the system flag values are used as
      *   bitwise OR'd values, so callers should not define any flag values
      *   'f' for which (f & 0xFF) != 0.  
      */
 
     /*
      *   Calculate a hash value.  This returns an integer giving the hash
      *   value for the given string. 
      */
     calcHash(str);
 
     /*
      *   Match two values.  The first value is the input string, and the
      *   second is the dictionary string.  Each character in the dictionary
      *   string can match the corresponding input string character exactly
      *   (with or without case sensitivity, as specified in our
      *   constructor), or can match the equivalence mapping sequence for the
      *   dictionary character.
      *   
      *   The return value is zero if the values do not match.  If the values
      *   do match, the return value is a non-zero integer, which will be a
      *   bitwise OR combination of all of the flag values applicable to the
      *   match.  This is a combination of pre-defined flag values (see
      *   below) and any flag values from equivalence mappings.  The flag
      *   values from ALL equivalence mappings that were actually used to
      *   make the match are included.  
      */
     matchValues(inputStr, dictStr);
 }
 
 /*
  *   Pre-defined matchValues result flags.  These are set when applicable in
  *   the return value of matchValues().
  *   
  *   This class reserves flag values 0x0001 through 0x0080.  Callers should
  *   not use any flag values with any of these bits set.  Even though we
  *   don't define values for all of these flags currently, the ones we don't
  *   use are reserved for possible use in future versions; to ensure
  *   compatibility with future versions, callers should not use any of the
  *   reserved flags for their own purposes.  
  */
 
 /* 
  *   Match - this flag is set in the return code for all matching strings.
  *   (This flag isn't as useless as it might sound; its purpose is to ensure
  *   that the return value from matchValues() is non-zero for all matches,
  *   even when no other flag values are applicable.)
  */
 #define StrCompMatch     0x0001
 
 /* 
  *   Case folding - this flag is set when the two values match, but one or
  *   more characters differ in case (in other words, an upper-case letter in
  *   the input string matched a lower-case letter in the dictionary string,
  *   or vice versa).  
  */
 #define StrCompCaseFold  0x0002
 
 /* 
  *   Truncation - this flag is set when the input string is shorter than the
  *   value string (but matches the dictionary completely up to the input
  *   string's full length, and is at least as long as the truncation length
  *   specified in the constructor).  This flag can only be returned when
  *   truncation is allowed (as indicated by a non-zero truncation length in
  *   the constructor), because truncated strings will never match at all
  *   when truncation isn't allowed.  
  */
 #define StrCompTrunc     0x0004
 
TADS 3 Library Manual
Generated on 9/8/2006 from TADS version 3.0.11