问题
I have a table with col1 id int, col2 as varchar(comma seperated value) and column 3 for assigning group to them. Table looks like
col1 col2 group
..............................
1 2,3,4
2 5,6
3 1,2,5
4 7,8
5 11,3
6 22,8
this is only the sample of real data, now i have to assign a group no to them in such a way that output looks like
col1 col2 group
..............................
1 2,3,4 1
2 5,6 1
3 1,2,5 1
4 7,8 2
5 11,3 1
6 22,8 2
The logic for assigning group no is that every similar comma seperated value of string in col2 have to be same group no as every where in col2 where '2' is there it has to be same group no but the complication is that 2,3,4 are together so they all three int value if found in any where in col2 will be assigned same group. the major part is 2,3,4 and 1,2,5 both in col2 have 2 so all int 1,2,3,4,5 have to assign same group no. Tried store procedure with match against on col2 but not getting desired result
Most imp( i can't use normalization,because i cant afford to make new table from my original table which have millions of record) ,even normalization is not helpfull in my context.
Achieved so far...... Ihave set the group column auto increment and then wrote this procedure:-
BEGIN
declare cil1_new,col2_new,group_new int;
declare done tinyint default 0;
declare group_new varchar(100);
declare cur1 cursor for select col1,col2,`group` from company ;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done=1;
open cur1;
REPEAT
fetch cur1 into col1_new,col2_new,group_new;
update company set group=group_new where
match(col2) against(concat("'",col2_new,"'"));
until done end repeat;
close cur1;
select * from company;
END
This procedure is working,no syntex mistake but the proble is that i am not acheiving the desired result exectly.
回答1:
It is possible to do but I'm not sure how long this will take on your very big table. I assume that you are allowed to create a new tables holding all groups and there numbers as the group column is populated.
Also, this cannot run on a live table. It isn't possible to write it so it's not a limitation of my design. Think about what would happen if you added a new row with values 7 and '6,7', that would bridge group 1 and 2 and all work would have to be dropped.
This proc needs to be re-run every time there is additions to the table. If that is not acceptable run it once and then replace it with triggers that maintain the values and merges groups when needed.
Here is the procedure. It could benefit from some modularization but it works. I've taken Jay Pipes split_string function and included it.
First the DDL and some test data
CREATE TABLE `company` (
`col1` int(11) DEFAULT NULL,
`col2` varchar(100) DEFAULT NULL,
`grp` int(11) DEFAULT NULL
);
CREATE TABLE `groups` (
`number` int(11) NOT NULL DEFAULT '0',
`grp` int(11) NOT NULL DEFAULT '0',
`processed` tinyint(1) DEFAULT NULL,
PRIMARY KEY (`number`,`grp`),
KEY `grp` (`grp`)
);
insert into company (col1, col2) values
(1,'2,3,4'),
(2,'5,6'),
(3,'1,2,5'),
(4,'7,8'),
(5,'11,3'),
(6,'22,8');
And now the procedure
use test;
drop procedure if exists group_it;
delimiter //
create procedure group_it ()
begin
declare current_group int default 0;
declare ids varchar(100);
-- clear out all data from before
update company set grp = null;
truncate groups;
main: loop
-- take one unmapped (new group)
set ids := null;
select col2 into ids from company where grp is null limit 1;
if ids is null then
leave main;
end if;
set current_group := current_group + 1;
-- put each value into groups table and mark as unprocessed
call split_string(ids, ',');
insert into groups select value, current_group, false from SplitValues;
-- while unprocessed value in groups
begin
declare unprocessed int;
unprocessed: loop
set unprocessed = null;
select number
into unprocessed
from groups
where not processed
limit 1;
if unprocessed is null then
leave unprocessed;
end if;
begin
-- find all rows in company that matches this group
declare row_id int;
declare ids2 varchar(100);
declare cur2_done boolean;
declare cur2 cursor for
select col1, col2
from company
where col2 regexp concat('^', unprocessed, '$')
or col2 regexp concat('^', unprocessed, ',')
or col2 regexp concat(',', unprocessed, '$')
or col2 regexp concat(',', unprocessed, ',');
declare continue handler for not found set cur2_done := true;
open cur2;
numbers: loop
set cur2_done := false;
fetch cur2 into row_id, ids2;
if cur2_done then
close cur2;
leave numbers;
end if;
update company set grp = current_group where col1 = row_id;
-- add all new values to groups marked as unprocessed
call split_string(ids2, ',');
insert ignore into groups select value, current_group, false from SplitValues;
end loop numbers;
update groups set processed = true where number = unprocessed;
end;
end loop unprocessed;
end;
end loop main;
end//
delimiter ;
This is Jay Pipes split_string
DELIMITER //
DROP PROCEDURE IF EXISTS split_string //
CREATE PROCEDURE split_string (
IN input TEXT
, IN `delimiter` VARCHAR(10)
)
SQL SECURITY INVOKER
COMMENT
'Splits a supplied string using using the given delimiter,
placing values in a temporary table'
BEGIN
DECLARE cur_position INT DEFAULT 1 ;
DECLARE remainder TEXT;
DECLARE cur_string VARCHAR(1000);
DECLARE delimiter_length TINYINT UNSIGNED;
DROP TEMPORARY TABLE IF EXISTS SplitValues;
CREATE TEMPORARY TABLE SplitValues (
value VARCHAR(1000) NOT NULL PRIMARY KEY
) ENGINE=MyISAM;
SET remainder = input;
SET delimiter_length = CHAR_LENGTH(delimiter);
WHILE CHAR_LENGTH(remainder) > 0 AND cur_position > 0 DO
SET cur_position = INSTR(remainder, `delimiter`);
IF cur_position = 0 THEN
SET cur_string = remainder;
ELSE
SET cur_string = LEFT(remainder, cur_position - 1);
END IF;
IF TRIM(cur_string) != '' THEN
INSERT INTO SplitValues VALUES (cur_string);
END IF;
SET remainder = SUBSTRING(remainder, cur_position + delimiter_length);
END WHILE;
END //
DELIMITER ;
来源:https://stackoverflow.com/questions/9950216/grouping-a-comma-separated-value-on-common-data