i have one table store post id and it\'s tags like :
Post_id | Tags
--------------------------------------
1 | keyword1,keyword2,keyword3
do something like this:
-- TABLES
drop table if exists post_tags;
create table post_tags
(
post_id int unsigned not null auto_increment primary key,
tags_csv varchar(1024) not null
)
engine=innodb;
drop table if exists keywords;
create table keywords
(
keyword_id mediumint unsigned not null auto_increment primary key,
name varchar(255) unique not null
)
engine=innodb;
-- optimised for queries such as - select all posts that have keyword 3
drop table if exists post_keywords;
create table post_keywords
(
keyword_id mediumint unsigned not null,
post_id int unsigned not null,
primary key (keyword_id, post_id), -- clustered composite PK !
key (post_id)
)
engine=innodb;
-- STORED PROCEDURES
drop procedure if exists normalise_post_tags;
delimiter #
create procedure normalise_post_tags()
proc_main:begin
declare v_cursor_done tinyint unsigned default 0;
-- watch out for variable names that have the same names as fields !!
declare v_post_id int unsigned;
declare v_tags_csv varchar(1024);
declare v_keyword varchar(255);
declare v_keyword_id mediumint unsigned;
declare v_tags_done tinyint unsigned;
declare v_tags_idx int unsigned;
declare v_cursor cursor for select post_id, tags_csv from post_tags order by post_id;
declare continue handler for not found set v_cursor_done = 1;
set autocommit = 0;
open v_cursor;
repeat
fetch v_cursor into v_post_id, v_tags_csv;
-- split the out the v_tags_csv and insert
set v_tags_done = 0;
set v_tags_idx = 1;
while not v_tags_done do
set v_keyword = substring(v_tags_csv, v_tags_idx,
if(locate(',', v_tags_csv, v_tags_idx) > 0,
locate(',', v_tags_csv, v_tags_idx) - v_tags_idx,
length(v_tags_csv)));
if length(v_keyword) > 0 then
set v_tags_idx = v_tags_idx + length(v_keyword) + 1;
set v_keyword = trim(v_keyword);
-- add the keyword if it doesnt already exist
insert ignore into keywords (name) values (v_keyword);
select keyword_id into v_keyword_id from keywords where name = v_keyword;
-- add the post_keywords
insert ignore into post_keywords (keyword_id, post_id) values (v_keyword_id, v_post_id);
else
set v_tags_done = 1;
end if;
end while;
until v_cursor_done end repeat;
close v_cursor;
commit;
end proc_main #
delimiter ;
-- TEST DATA
insert into post_tags (tags_csv) values
('keyword1,keyword2,keyword3'),
('keyword1,keyword5'),
('keyword4,keyword3,keyword6,keyword1');
-- TESTING
call normalise_post_tags();
select * from post_tags order by post_id;
select * from keywords order by keyword_id;
select * from post_keywords order by keyword_id, post_id;
for each keyword, do
insert into newtable (id, keyword) select id, 'aKeyword' from oldtable where oldtable.keywords like '%aKeyword%'
if oldtable.keywords is just a VARCHAR, or
insert into newtable (id, keyword) select id, 'aKeyword' from oldtable where FIND_IN_SET('aKeyword',keywords)>0
if it's a SET type.