This repository has been archived by the owner on Mar 5, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Autokeyword.php
237 lines (200 loc) · 10.9 KB
/
Autokeyword.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
<?php
/******************************************************************
Projectname: Automatic Keyword Generator
Version: 0.3
Author: Ver Pangonilo <smp_AT_itsp.info>
Last modified: 26 July 2006
Copyright (C): 2006 Ver Pangonilo, All Rights Reserved
* GNU General Public License (Version 2, June 1991)
*
* This program is free software; you can redistribute
* it and/or modify it under the terms of the GNU
* General Public License as published by the Free
* Software Foundation; either version 2 of the License,
* or (at your option) any later version.
*
* This program is distributed in the hope that it will
* be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A
* PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
Description:
This class can generates automatically META Keywords for your
web pages based on the contents of your articles. This will
eliminate the tedious process of thinking what will be the best
keywords that suits your article. The basis of the keyword
generation is the number of iterations any word or phrase
occured within an article.
This automatic keyword generator will create single words,
two word phrase and three word phrases. Single words will be
filtered from a common words list.
Change Log:
===========
0.2 Ver Pangonilo - 22 July 2005
================================
Added user configurable parameters and commented codes
for easier end user understanding.
0.3 Vasilich (vasilich_AT_grafin.kiev.ua) - 26 July 2006
=========================================================
Added encoding parameter to work with UTF texts, min number
of the word/phrase occurrences,
******************************************************************/
class Gears_Autokeyword {
//declare variables
//the site contents
protected $contents;
protected $encoding = 'UTF-8';
//the generated keywords
protected $keywords;
//minimum word length for inclusion into the single word
//metakeys
protected $wordLengthMin = 5;
protected $wordOccuredMin = 2;
//minimum word length for inclusion into the 2 word
//phrase metakeys
protected $word2WordPhraseLengthMin = 3;
protected $phrase2WordLengthMinOccur = 2;
//minimum word length for inclusion into the 3 word
//phrase metakeys
protected $word3WordPhraseLengthMin = 3;
//minimum phrase length for inclusion into the 2 word
//phrase metakeys
protected $phrase2WordLengthMin = 10;
protected $phrase3WordLengthMinOccur = 2;
//minimum phrase length for inclusion into the 3 word
//phrase metakeys
protected $phrase3WordLengthMin = 10;
public function __construct($params, $encoding = 'UTF-8') {
//get parameters
$this->encoding = $encoding;
mb_internal_encoding($encoding);
$this->contents = $this->replace_chars($params['content']);
// single word
$this->wordLengthMin = isset($params['min_word_length']) ? $params['min_word_length'] : $this->wordLengthMin;
$this->wordOccuredMin = isset($params['min_word_occur']) ? $params['min_word_occur'] : $this->wordOccuredMin;
// 2 word phrase
$this->word2WordPhraseLengthMin = isset($params['min_2words_length']) ? $params['min_2words_length'] : $this->word2WordPhraseLengthMin;
$this->phrase2WordLengthMin = isset($params['min_2words_phrase_length']) ? $params['min_2words_phrase_length'] : $this->phrase2WordLengthMin;
$this->phrase2WordLengthMinOccur = isset($params['min_2words_phrase_occur']) ? $params['min_2words_phrase_occur'] : $this->phrase2WordLengthMinOccur;
// 3 word phrase
$this->word3WordPhraseLengthMin = isset($params['min_3words_length']) ? $params['min_3words_length'] : $this->word3WordPhraseLengthMin;
$this->phrase3WordLengthMin = isset($params['min_3words_phrase_length']) ? $params['min_3words_phrase_length'] : $this->phrase3WordLengthMin;
$this->phrase3WordLengthMinOccur = isset($params['min_3words_phrase_occur']) ? $params['min_3words_phrase_occur'] : $this->phrase3WordLengthMinOccur;
//parse single, two words and three words
}
public function get_keywords() {
$keywords = $this->parse_words().$this->parse_2words().$this->parse_3words();
return substr($keywords, 0, -2);
}
//turn the site contents into an array
//then replace common html tags.
public function replace_chars($content) {
//convert all characters to lower case
$content = mb_strtolower($content);
//$content = mb_strtolower($content, "UTF-8");
$content = strip_tags($content);
$punctuations = array(',', ')', '(', '.', "'", '"',
'<', '>', ';', '!', '?', '/', '-',
'_', '[', ']', ':', '+', '=', '#',
'$', '"', '©', '>', '<',
chr(10), chr(13), chr(9));
$content = str_replace($punctuations, " ", $content);
// replace multiple gaps
$content = preg_replace('/ {2,}/si', " ", $content);
return $content;
}
//single words META KEYWORDS
public function parse_words() {
//list of commonly used words
// this can be edited to suit your needs
$common = array("able", "about", "above", "act", "add", "afraid", "after", "again", "against", "age", "ago", "agree", "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "amount", "an", "and", "anger", "angry", "animal", "another", "answer", "any", "appear", "apple", "are", "arrive", "arm", "arms", "around", "arrive", "as", "ask", "at", "attempt", "aunt", "away", "back", "bad", "bag", "bay", "be", "became", "because", "become", "been", "before", "began", "begin", "behind", "being", "bell", "belong", "below", "beside", "best", "better", "between", "beyond", "big", "body", "bone", "born", "borrow", "both", "bottom", "box", "boy", "break", "bring", "brought", "bug", "built", "busy", "but", "buy", "by", "call", "came", "can", "cause", "choose", "close", "close", "consider", "come", "consider", "considerable", "contain", "continue", "could", "cry", "cut", "dare", "dark", "deal", "dear", "decide", "deep", "did", "die", "do", "does", "dog", "done", "doubt", "down", "during", "each", "ear", "early", "eat", "effort", "either", "else", "end", "enjoy", "enough", "enter", "even", "ever", "every", "except", "expect", "explain", "fail", "fall", "far", "fat", "favor", "fear", "feel", "feet", "fell", "felt", "few", "fill", "find", "fit", "fly", "follow", "for", "forever", "forget", "from", "front", "gave", "get", "gives", "goes", "gone", "good", "got", "gray", "great", "green", "grew", "grow", "guess", "had", "half", "hang", "happen", "has", "hat", "have", "he", "hear", "heard", "held", "hello", "help", "her", "here", "hers", "high", "hill", "him", "his", "hit", "hold", "hot", "how", "however", "I", "if", "ill", "in", "indeed", "instead", "into", "iron", "is", "it", "its", "just", "keep", "kept", "knew", "know", "known", "late", "least", "led", "left", "lend", "less", "let", "like", "likely", "likr", "lone", "long", "look", "lot", "make", "many", "may", "me", "mean", "met", "might", "mile", "mine", "moon", "more", "most", "move", "much", "must", "my", "near", "nearly", "necessary", "neither", "never", "next", "no", "none", "nor", "not", "note", "nothing", "now", "number", "of", "off", "often", "oh", "on", "once", "only", "or", "other", "ought", "our", "out", "please", "prepare", "probable", "pull", "pure", "push", "put", "raise", "ran", "rather", "reach", "realize", "reply", "require", "rest", "run", "said", "same", "sat", "saw", "say", "see", "seem", "seen", "self", "sell", "sent", "separate", "set", "shall", "she", "should", "side", "sign", "since", "so", "sold", "some", "soon", "sorry", "stay", "step", "stick", "still", "stood", "such", "sudden", "suppose", "take", "taken", "talk", "tall", "tell", "ten", "than", "thank", "that", "the", "their", "them", "then", "there", "therefore", "these", "they", "this", "those", "though", "through", "till", "to", "today", "told", "tomorrow", "too", "took", "tore", "tought", "toward", "tried", "tries", "trust", "try", "turn", "two", "under", "until", "up", "upon", "us", "use", "usual", "various", "verb", "very", "visit", "want", "was", "we", "well", "went", "were", "what", "when", "where", "whether", "which", "while", "white", "who", "whom", "whose", "why", "will", "with", "within", "without", "would", "yes", "yet", "you", "young", "your", "br", "img", "p","lt", "gt", "quot", "copy");
//create an array out of the site contents
$s = split(" ", $this->contents);
//initialize array
$k = array();
//iterate inside the array
foreach( $s as $key=>$val ) {
//delete single or two letter words and
//Add it to the list if the word is not
//contained in the common words list.
if(mb_strlen(trim($val)) >= $this->wordLengthMin && !in_array(trim($val), $common) && !is_numeric(trim($val))) {
$k[] = trim($val);
}
}
//count the words
$k = array_count_values($k);
//sort the words from
//highest count to the
//lowest.
$occur_filtered = $this->occure_filter($k, $this->wordOccuredMin);
arsort($occur_filtered);
$imploded = $this->implode(", ", $occur_filtered);
//release unused variables
unset($k);
unset($s);
return $imploded;
}
public function parse_2words() {
//create an array out of the site contents
$x = split(" ", $this->contents);
//initilize array
$y = array();
for ($i=0; $i < count($x)-1; $i++) {
//delete phrases lesser than 5 characters
if( (mb_strlen(trim($x[$i])) >= $this->word2WordPhraseLengthMin ) && (mb_strlen(trim($x[$i+1])) >= $this->word2WordPhraseLengthMin) )
{
$y[] = trim($x[$i])." ".trim($x[$i+1]);
}
}
//count the 2 word phrases
$y = array_count_values($y);
$occur_filtered = $this->occure_filter($y, $this->phrase2WordLengthMinOccur);
//sort the words from highest count to the lowest.
arsort($occur_filtered);
$imploded = $this->implode(", ", $occur_filtered);
//release unused variables
unset($y);
unset($x);
return $imploded;
}
public function parse_3words() {
//create an array out of the site contents
$a = split(" ", $this->contents);
//initilize array
$b = array();
for ($i=0; $i < count($a)-2; $i++) {
//delete phrases lesser than 5 characters
if( (mb_strlen(trim($a[$i])) >= $this->word3WordPhraseLengthMin) && (mb_strlen(trim($a[$i+1])) > $this->word3WordPhraseLengthMin) && (mb_strlen(trim($a[$i+2])) > $this->word3WordPhraseLengthMin) && (mb_strlen(trim($a[$i]).trim($a[$i+1]).trim($a[$i+2])) > $this->phrase3WordLengthMin) ) {
$b[] = trim($a[$i])." ".trim($a[$i+1])." ".trim($a[$i+2]);
}
}
//count the 3 word phrases
$b = array_count_values($b);
//sort the words from
//highest count to the
//lowest.
$occur_filtered = $this->occure_filter($b, $this->phrase3WordLengthMinOccur);
arsort($occur_filtered);
$imploded = $this->implode(", ", $occur_filtered);
//release unused variables
unset($a);
unset($b);
return $imploded;
}
public function occure_filter($array_count_values, $min_occur) {
$occur_filtered = array();
foreach ($array_count_values as $word => $occured) {
if ($occured >= $min_occur) {
$occur_filtered[$word] = $occured;
}
}
return $occur_filtered;
}
function implode($gule, $array) {
$c = "";
foreach($array as $key=>$val) {
@$c .= $key.$gule;
}
return $c;
}
}