WordPress – collapse redundant tags

I’ve been experimenting with automation of WordPress posts. Probably I’m doing something wrong with the way I make the XML RPC calls, but I find that I end up with redundant tags in my database. For instance, if I tagged two separate, RPC-posted posts with “orange”, I get two different tags both called “orange”. Until I figure out how to fix this properly, here’s a little script that will clean up the database by consolidating all redundantly named tags to one tag. You might want to back up your database before using this…

#!/usr/bin/perl
use strict;
use DBI;
 
######configuration
my $PREFIX = 'wp_h5otpn_';
my $DB = '';
my $HOST = '';
my $USER = '';
my $PASS = '';
######
my $dbh = DBI->connect(qq(dbi:mysql:database=$DB;host=$HOST), $USER, $PASS) or die $!;
 
my $term_sth   = $dbh->prepare(qq(SELECT * FROM (SELECT name, count(name) AS c FROM ${PREFIX}terms GROUP BY name) AS d WHERE d.c > 1));
my $name_sth   = $dbh->prepare(qq(SELECT term_id FROM ${PREFIX}terms WHERE name = ?));
my $update_sth = $dbh->prepare(qq(UPDATE ${PREFIX}term_relationships SET term_taxonomy_id = (SELECT term_taxonomy_id FROM ${PREFIX}term_taxonomy WHERE term_id = ?) WHERE term_taxonomy_id = (SELECT term_taxonomy_id FROM ${PREFIX}term_taxonomy WHERE term_id = ?)));
my $delete1_sth = $dbh->prepare(qq(DELETE FROM ${PREFIX}term_taxonomy WHERE term_id = ?));
my $delete2_sth = $dbh->prepare(qq(DELETE FROM ${PREFIX}terms WHERE term_id = ?));
$term_sth->execute();
 
while ( my ( $name, $count ) = $term_sth->fetchrow_array() ) {
  $name_sth->execute( $name );
  my $new = undef;
  while ( my ( $term_id ) = $name_sth->fetchrow_array() ) {
    if ( ! $new ) {
      $new = $term_id;
      next;
    }
    warn "$name\t$term_id\t->\t$new";
    $update_sth->execute( $new, $term_id );
    $delete1_sth->execute( $term_id );
    $delete2_sth->execute( $term_id );
  }
}
 
__DATA__
SELECT t.term_id, t.name, r.*, s.* FROM wp_h5otpn_terms AS t, wp_h5otpn_term_taxonomy AS r, wp_h5otpn_term_relationships AS s WHERE s.term_taxonomy_id = r.term_taxonomy_id AND r.term_id = t.term_id AND r.taxonomy = 'post_tag' AND t.name = 'whatever';