/[libdata]/trunk/admin/include/fuzzy.php
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/admin/include/fuzzy.php

Parent Directory Parent Directory | Revision Log Revision Log


Revision 72 - (hide annotations)
Thu Mar 18 20:33:37 2004 UTC (20 years, 1 month ago) by dpavlin
File size: 4801 byte(s)
changes made in version 2.00

1 dpavlin 10 <?php
2     /**********************************************************
3     Function: newResPageHeader
4     Author: Paul Bramscher
5     Last Modified: 06.25.2003
6     ***********************************************************
7     Draws the HTML header of the enter new resource page at the
8     point of checking for possible matches. The header is
9     pulled out separately here, since we may not always want
10     it to display. In the event of no fuzzy (or exact)
11     matches we redirect the user to formResource and, hence,
12     cannot output this header.
13     **********************************************************/
14     function newResPageHeader() {
15    
16     // Load globals
17     include ("global_vars.php");
18    
19     // HTML header
20     printf("<HTML>\n");
21     printf("<HEAD>\n");
22     printf("<title>LibData: Enter a New Resource</title>\n");
23     printf("<link rel=\"stylesheet\" href=\"%s\" type=\"text/css\">\n", $GLOBAL_ADMIN_CSS);
24     printf("</HEAD>\n");
25    
26     // Page header
27     include($GLOBAL_ADMIN_HEADER);
28     printf("<center>\n");
29     printf("<h3>Adding New Resource..</h3>\n");
30    
31     }
32    
33    
34     /**********************************************************
35     Function: resTitlePat
36 dpavlin 72 Original Author: Paul Bramscher <brams006@umn.edu>
37     Last Modified: 03.03.2004
38 dpavlin 10 ***********************************************************
39     Comments:
40     This function takes $string as input and builds eight
41     permutations of it into an SQL query against resource
42     titles. These permutations are meant as possible matches.
43     This is not an exact science, and this algorithm could
44     certainly be further tweaked.
45    
46     Pass definitions:
47    
48     pass1 = left 50% of string, remainder wildcarded
49     pass2 = middle 80%, both ends wildcarded
50     pass3 = compacted without spaces
51     pass4 = a/and/the articles removed
52     pass5 = every other starting with 0 position
53     pass6 = every other starting with 1 position
54     pass7 = replacing 'ies' with 'y'
55     pass8 = replacing 'y' with 'ies'
56     **********************************************************/
57 dpavlin 72 function resTitlePat($string) {
58 dpavlin 10
59     // Save this for display purposes
60     $title = $string;
61     $title = stripslashes($string);
62    
63     // Clean up the string -- we must remove single quotes and slashes
64     $string = ereg_replace("'","",$string);
65     $string = stripslashes($string);
66    
67     // Initialize
68     $pass1 = "";
69     $pass2 = "";
70     $pass3 = "";
71     $pass4 = "";
72     $pass5 = "";
73     $pass6 = "";
74     $pass7 = "";
75     $pass8 = "";
76    
77     // String sizes.
78     $length = strlen($string);
79     $max_length = ceil($length * 2);
80    
81     // Permute #1: Match left 50%
82     $left50_pos = floor($length * .5);
83     $pass1 = substr($string, 0, $left50_pos);
84    
85     // Permute #2: Cut-out and float match middle 80% substring
86     $left20_pos = floor($length * .2);
87     $pass2 = substr($string, $left20_pos, $length - ($left20_pos * 2));
88    
89     // Permute #3: Compacting, remove all whitespaces.
90     $pass3 = $string;
91     $pass3 = ereg_replace(" ","",$pass3);
92    
93     // Permute #4: Pulling out a/an/the articles
94     $pass4 = $string;
95     $pass4 = ereg_replace("the ","",$pass4);
96     $pass4 = ereg_replace("a ","",$pass4);
97     $pass4 = ereg_replace("an ","",$pass4);
98    
99     // Permute #5: Wildcard every other position, starting with 0
100     $pass5 = $string;
101     for ($x = 0; $x < strlen($pass5); $x++) {
102    
103     if ($x % 2 == 0) $pass5[$x] = "_";
104    
105     }
106    
107     // Permute #6: Wildcard every other position, starting with 1
108     $pass6 = $string;
109     for ($x = 0; $x < strlen($pass6); $x++) {
110    
111     if ($x % 2 == 1) $pass6[$x] = "_";
112    
113     }
114    
115     // Permute #7: Replacing 'ies' with 'y'
116     $pass7 = $string;
117     $pass7 = ereg_replace("ies","y", $pass7);
118    
119     // Permute #8: Replacing 'y' with 'ies'
120     $pass8 = $string;
121     $pass8 = ereg_replace("y","ies", $pass8);
122    
123     // Assemble the SQL
124     $sql = "SELECT resource_id, title FROM resource WHERE (title LIKE '"
125     . $pass1
126     . "%' OR title LIKE '%"
127     . $pass2
128     . "%' OR title LIKE '%"
129     . $pass3
130     . "%' OR title LIKE '%"
131     . $pass4
132     . "%' OR title LIKE '%"
133     . $pass5
134     . "%' OR title LIKE '%"
135     . $pass6
136     . "%' OR title LIKE '%"
137     . $pass7
138     . "%' OR title LIKE '%"
139     . $pass8
140     . "%') AND (LENGTH(title) >= "
141     . $left50_pos
142     . " AND LENGTH(title) <= "
143     . $max_length
144     . ")";
145    
146     // Debugging
147     // printf("sql was: %s", $sql);
148    
149 dpavlin 72 $rs = xx_tryquery($sql);
150 dpavlin 42 $hits = xx_num_rows($rs);
151 dpavlin 10 if ($hits > 0) {
152    
153     // Draw page header
154     newResPageHeader();
155    
156     // Table
157     printf("<table width = \"60%%\" border = \"3\" cellpadding =\"4\" class=\"backLight\">\n");
158 dpavlin 72 printf("<tr><td>\n");
159     printf("<b>Messages:</b><br>\n");
160 dpavlin 10
161     printf ("Possible duplicate resource titles found with title '%s'. Are you sure you want to continue?<br><br>\n", $title);
162 dpavlin 72 while ($row = xx_fetch_array ($rs, xx_ASSOC)) {
163 dpavlin 10 $title = $row["title"];
164     $resource_id = $row["resource_id"];
165    
166     printf("<b>Resource ID:</b> %d ", $resource_id);
167 dpavlin 72 printf("<b>Title:</b> %s<BR>\n", $title);
168 dpavlin 10
169     } // this result set
170    
171     // Close things
172     printf("<br><br>\n");
173     printf("</td></tr></table>\n");
174     printf("</center>\n");
175    
176     }
177    
178    
179     return $hits;
180    
181     } // end of php function
182 dpavlin 72 ?>

  ViewVC Help
Powered by ViewVC 1.1.26