From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Message-ID: <43D11D03.1070003@stc.donpac.ru> Date: Fri, 20 Jan 2006 20:25:23 +0300 From: Eugene Prokopiev User-Agent: Mozilla/5.0 (X11; U; Linux i686; ru-RU; rv:1.7.2) Gecko/20040808 X-Accept-Language: ru-ru, ru MIME-Version: 1.0 To: unix9@yandex.ru, ALT Linux Community Subject: Re: [Comm] =?KOI8-R?Q?=EB=C1=CB_=CF=C2=D5=DE=C9=D4=D8_SpamAs?= =?KOI8-R?Q?sassin=3F?= References: <43CFC310.000003.22147@colgate.yandex.ru> In-Reply-To: <43CFC310.000003.22147@colgate.yandex.ru> Content-Type: multipart/mixed; boundary="------------050709010802010402090609" Cc: X-BeenThere: community@lists.altlinux.org X-Mailman-Version: 2.1.5 Precedence: list Reply-To: ALT Linux Community List-Id: ALT Linux Community List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 20 Jan 2006 17:24:47 -0000 Archived-At: List-Archive: List-Post: This is a multi-part message in MIME format. --------------050709010802010402090609 Content-Type: text/plain; charset=KOI8-R; format=flowed Content-Transfer-Encoding: 8bit unix9 пишет: > Приветствую всех! > Подскажите, как собственно вы настраиваете и потом обучаете SpamAssassin? А dspam попробовать не хотите? Преимущество перед SpamAssassin - это демон на С, интегрированный с ClamAV. Только что в бэкпорты отправилась (т.е. после ближайшей пересборки там появится) даже более свежая версия, нежели то, что есть в Сизифе - надеюсь, что со временем это исправится ;) Способы обучения: 1) его собственный web-интерфейс (неопакеченный, но есть в архиве с исходниками) 2) утилита dspam. Например, чтобы сказать ей, что содержимое некоего mbox пользователь user@domain.com считает спамом, нужно скомандовать нечто вроде: cat mbox | /usr/bin/dspam --class=spam --source=corpus --user 'user@domain.com' --mode=teft --feature=chained,noise Я даже сваял простейший питоновский скрипт, который вытягивает содержимое папок с названием Spam у всех IMAP-пользователей DBMail и скармливает его таким образом dspam'у - см. аттач. Разумеется WITHOUT ANY WARRANTY :) -- С уважением, Прокопьев Евгений --------------050709010802010402090609 Content-Type: text/plain; name="dspam-learn.py" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="dspam-learn.py" #!/usr/bin/python # Copyright (C) 2006 Eugene Prokopiev # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. import sys, os, getopt, email.Parser, re def export(connection): parser = email.Parser.Parser() from_extractor = re.compile(r"[<>]") cursor_mailboxes = connection.cursor() cursor_messages = connection.cursor() cursor_messageblks = connection.cursor() cursor_modify = connection.cursor() sql_mailboxes = """select distinct alias from dbmail_aliases """ # alter table dbmail_messages add dspam_flag smallint not null default 0::smallint; sql_messages = """select message_idnr, internal_date from dbmail_aliases inner join dbmail_users on dbmail_aliases.deliver_to = dbmail_users.user_idnr inner join dbmail_mailboxes on dbmail_users.user_idnr = dbmail_mailboxes.owner_idnr inner join dbmail_messages on dbmail_mailboxes.mailbox_idnr = dbmail_messages.mailbox_idnr inner join dbmail_physmessage on dbmail_messages.physmessage_id = dbmail_physmessage.id where dbmail_messages.deleted_flag=0 and dbmail_messages.dspam_flag=0 and dbmail_mailboxes.name = 'Spam' and alias = %s """ sql_messageblks = """ select messageblk, is_header from dbmail_messageblks inner join dbmail_messages on dbmail_messageblks.physmessage_id = dbmail_messages.physmessage_id where message_idnr = %s order by dbmail_messageblks.messageblk_idnr """ cursor_mailboxes.execute(sql_mailboxes) for alias in cursor_mailboxes.fetchall(): count = 0 mbox = os.popen(("/usr/bin/dspam --class=spam --source=corpus --user '%s' --mode=teft --feature=chained,noise" % alias), "w") #mbox = open(("%s" % alias), "w") cursor_messages.execute(sql_messages, alias) for message_idnr, internal_date in cursor_messages.fetchall(): count = count + 1 cursor_messageblks.execute(sql_messageblks, (message_idnr,)) for messageblk, is_header in cursor_messageblks.fetchall(): if (is_header == 1): from_header = from_extractor.split(parser.parsestr(messageblk).get("From")) if (len(from_header) == 1): from_header = from_header[0] elif (len(from_header) == 3): from_header = from_header[1] else: from_header = "-" mbox.write("From "+from_header+" "+internal_date.strftime()) mbox.write(messageblk) mbox.close() print "mailbox : %s \t - spam messages : %s" % (("%s" % alias), count) cursor_modify.execute("update dbmail_messages set dspam_flag=1 where dspam_flag=0") def usage(): usage_text = """ dspam-learn - DBMail Spam mailboxes -> mbox -> DSPAM arguments: -h|--help - show this text -t|--type - database driver type -s|--server - server where DBMail database installed -d|--database - DBMail database name -l|--login - login to database -p|--password - password to database before using this script you need to run something like: alter table dbmail_messages add dspam_flag smallint not null default 0::smallint; """ print usage_text def main(argv): type = "psycopg" server = "localhost" database = "dbmail" login = "dbmail" password = "dbmailpwd" try: opts, args = getopt.getopt(argv, "ht:s:d:l:p:", ["help", "type=", "server=", "database=", "login=", "password="]) except getopt.GetoptError: usage() sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit() elif opt in ("-t", "--type"): type = arg elif opt in ("-s", "--server"): server = arg elif opt in ("-d", "--database"): database = arg elif opt in ("-l", "--login"): login = arg elif opt in ("-p", "--password"): password = arg exec "import "+type+" as db" connection = db.connect("host="+server+" dbname="+database+" user="+login+" password="+password) export(connection) connection.commit() if __name__ == "__main__": main(sys.argv[1:]) --------------050709010802010402090609--