Source for file _antispam.funcs.php
Documentation is available at _antispam.funcs.php
* This file implements Antispam handling functions.
* This file is part of the Quam Plures project - {@link http://quamplures.net/}.
* See also {@link https://launchpad.net/quam-plures}.
* @copyright (c) 2009 - 2011 by the Quam Plures developers - {@link http://quamplures.net/}
* @copyright (c)2003-2009 by Francois PLANQUE - {@link http://fplanque.net/}.
* Parts of this file are copyright (c)2004-2005 by Daniel HAHLER - {@link http://thequod.de/contact}.
* Parts of this file are copyright (c)2004 by Vegar BERG GULDAL - {@link http://funky-m.com/}.
* Parts of this file are copyright (c)2005 by The University of North Carolina at Charlotte as
* contributed by Jason Edgecombe {@link http://tst.uncc.edu/}.
* @license http://quamplures.net/license.html GNU General Public License (GPL)
* {@internal Open Source relicensing agreement:
* Daniel HAHLER grants Francois PLANQUE the right to license
* Daniel HAHLER's contributions to this file and the b2evolution project
* under any OSI approved OSS license (http://www.opensource.org/licenses/).
* Vegar BERG GULDAL grants Francois PLANQUE the right to license
* Vegar BERG GULDAL's contributions to this file and the b2evolution project
* under any OSI approved OSS license (http://www.opensource.org/licenses/).
* The University of North Carolina at Charlotte grants Francois PLANQUE the right to license
* Jason EDGECOMBE's contributions to this file and the b2evolution project
* under the GNU General Public License (http://www.opensource.org/licenses/gpl-license.php)
* and the Mozilla Public License (http://www.opensource.org/licenses/mozilla1.1.php).
* {@internal Below is a list of authors who have contributed to design/coding of this file: }}
* @author blueyed: Daniel HAHLER.
* @author fplanque: Francois PLANQUE.
* @author vegarg: Vegar BERG GULDAL.
if( !defined('QP_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
* Insert a new abuse string into DB
// Cut the crap if the string is empty:
$abuse_string = trim( $abuse_string );
if( empty( $abuse_string ) )
// Check if the string already is in the blacklist:
// Insert new string into DB:
$sql = "INSERT INTO T_antispam( aspm_string, aspm_source )
VALUES( '". $DB->escape($abuse_string). "', '$aspm_source' )";
* antispam_update_source(-)
* Note: We search by string because we sometimes don't know the ID
* (e-g when download already in list/cache)
$sql = "UPDATE T_antispam
SET aspm_source = '$aspm_source'
WHERE aspm_string = '". $DB->escape($aspm_string). "'";
* Remove an entry from the ban list
$sql = "DELETE FROM T_antispam
WHERE aspm_ID = $string_ID";
* Check if a string contains abusive substrings
* Note: Letting the database do the LIKE %% match is a little faster than doing in it PHP,
* not to mention the incredibly long overhead of preloading the list into PHP
* @todo dh> IMHO this method is too generic used! It gets used for:
* - comment/message author email
* - message (email) content
* ..and validates all this against the antispam blacklist!
* We should rather differentiate here more and make it pluggable!
* @return string blacklisted keyword found or false if no spam detected
// TODO: 'SELECT COUNT(*) FROM T_antispam WHERE aspm_string LIKE "%'.$url.'%" ?
$Timer->resume( 'antispam_url' ); // resuming to get the total number..
WHERE '. $DB->quote($haystack). ' LIKE CONCAT("%",aspm_string,"%")
LIMIT 0, 1', 0, 0, 'Check URL against antispam blacklist' );
$Debuglog->add( 'Spam block: '. $block );
return $block; // SPAM detected!
$Timer->pause( 'antispam_url' );
return false; // no problem.
* Get the base domain that could be blacklisted from an URL.
* We want to concentrate on the main domain and we want to prefix it with either . or // in order not
* to blacklist too large.
* {@internal This function gets tested in _misc.funcs.simpletest.php}}
* @param string URL or domain
* @return string|falsethe pattern to match this domain in the blacklist; false if we could not extract the base domain
// Remove http:// part + everything after the last path element ( '/' alone is ignored on purpose )
$domain = preg_replace( '~^ ([a-z]+://)? ([^/#]+) (/ ([^/]*/)+ )? .* ~xi', '\\2\\3', $url );
{ // All numeric = IP address, don't try to cut it any further
// Remove any www*. prefix:
$base_domain = preg_replace( '~^(www \w* \. )~xi', '', $domain );
if( empty($base_domain) )
{ // The guy is spamming with subdomains (or www):
// The guy is spamming with the base domain:
return '//'. $base_domain;
|