Edit file File name : tryaffix Content :#!/bin/sh # # $Id: tryaffix.X,v 1.13 2005/04/27 01:18:35 geoff Exp $ # # Copyright 1987-1989, 1992, 1993, 1999, 2001, 2005, Geoff Kuenning, # Claremont, CA # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All modifications to the source code must be clearly marked as # such. Binary redistributions based on modified source code # must be clearly marked as modified versions in the documentation # and/or other materials provided with the distribution. # 4. The code that causes the 'ispell -v' command to display a prominent # link to the official ispell Web site may not be removed. # 5. The name of Geoff Kuenning may not be used to endorse or promote # products derived from this software without specific prior # written permission. # # THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # Try out affixes to see if they produce valid roots # # Usage: # # tryaffix [-p | -s] [-c] dict-file affix[+addition] ... # # The -p and -s flags specify whether prefixes or suffixes # are being tried; if neither is specified, suffixes are assumed. # # If the -c flag is given, statistics on the various affixes are given: # a count of words it potentially applies to, and an estimate of the # number of dictionary bytes the flag would save. The estimate will # be high if the flag generates words that are currently generated # by other flags. # # The dictionary file, dict-file, must already be expanded and sorted, # and things will work best if uppercase has been folded to lower with # 'tr'. # # The "affixes" are things to be stripped from the dictionary # file to produce trial roots: for English, "con" and "ing" # are examples. The "additions" are letters that would have # been stripped off the root before adding the affix. For # example, the affix "ing" strips "e" for words ending in "e" # (as in "like --> liking") so we might run: # # tryaffix ing ing+e # # to cover both cases. # # $Log: tryaffix.X,v $ # Revision 1.13 2005/04/27 01:18:35 geoff # Fix a typo in a comment. Work around idiotic POSIX incompatibilities # in sort. Add secure temp-file handling. # # Revision 1.12 2005/04/14 14:40:13 geoff # Use /tmp as the default temp directory # # Revision 1.11 2005/04/14 14:38:23 geoff # Update license. Protect against modernized (i.e., incompatible) and # internationalized sort commands. # # Revision 1.10 2001/09/06 00:30:29 geoff # Changes from Eli Zaretskii to support DJGPP compilation. # # Revision 1.9 2001/07/25 21:51:47 geoff # Minor license update. # # Revision 1.8 2001/07/23 20:24:04 geoff # Update the copyright and the license. # # Revision 1.7 1999/01/07 01:57:48 geoff # Update the copyright. # # Revision 1.6 1994/01/25 07:12:18 geoff # Get rid of all old RCS log lines in preparation for the 3.1 release. # # USAGE='tryaffix [-p | -s] [-c] dict-file affix[+addition] ...' counts=no pre= suf='$' while : do case "$1" in -p) pre='^' suf= ;; -s) pre= suf='$' ;; -c) counts=yes ;; -*) echo "$USAGE" 1>&2 exit 1 ;; *) break ;; esac shift done dict="$1" shift if [ ! -r "$dict" ] then echo "Can't read $dict" 1>&2 echo "$USAGE" 1>&2 exit 1 elif [ $# -eq 0 ] then echo "$USAGE" 1>&2 exit 1 fi while [ $# -ne 0 ] do case "$1" in *+*) affix=`expr "$1" : '\(.*\)+'` addition=`expr "$1" : '.*+\(.*\)'` sedscript="s/$pre$affix$suf/$addition/p" ;; *) sedscript="s/$pre$1$suf//p" ;; esac if [ "$counts" = no ] then echo ===== "$1" ===== sed -n -e "$sedscript" "$dict" | comm -12 - "$dict" else echo "$1" \ `sed -n -e "$sedscript" "$dict" | comm -12 - "$dict" | wc -lc` fi shift done Save