I am writing a cache gen-server for the company Use. I am wondering how to search an item from the list as I want the cost of the search for comparing various data structure
Thanks for the Answers. I have written a Erlang code to find the timings for Inserting & Fetching Date into various data Structures like LIST, DICT, QUEUE, SET and PARALLEL MAP of the data structures. Only list's insert & fetch is completed but rest has parallel map insert. I would like to share the code & results of the code. Following is the module test.erl
-module(test).
-author('ankit@hover.in').
-export ([
%%list
create_list/1, fetch_list/2, test_list/1, parmap_create_list/1 , ins_list/3, parallel_list/1,
para_fetch_list/2, pfetch_list/1,
%%dict
create_dict/1, fetch_dict/2, test_dict/1, parmap_create_dict/1 , ins_dict/3, parallel_dict/1,
%queue
create_q/1, fetch_q/2, test_q/1, parmap_create_q/1, ins_q/3, parallel_q/1,
%set
create_set/1, fetch_set/2, test_set/1, parmap_create_set/1 , ins_set/3, parallel_set/1,
create/1, multi_create/0,
pcreate/1, parallel_multi_create/0
%test/1 , multi_test/0
]).
%%creation tests
%% For List
create_list(N) ->
lists:foldl ( fun(X,Prev) -> Prev ++ [X] end, [] , lists:seq(1,N)).
test_list(N) ->
{R1,List} = timer:tc(?MODULE, create_list, [N]),
%%Set = lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)),
{R2,_} = timer:tc(?MODULE, fetch_list,[ N, List ]),
{list,inserT,R1,fetchT,R2}.
%% For Dict
create_dict(N) ->
lists:foldl( fun(X,Prev)-> dict:append([],X,Prev) end, dict:new(), lists:seq(1,N)).
test_dict(N) ->
{R1,Dict} = timer:tc(?MODULE, create_dict, [N]),
%%Set = lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)),
{R2,_} = timer:tc(?MODULE, fetch_dict,[ N, Dict ]),
{dict,inserT,R1,fetchT,R2}.
%% For Queue
create_q(N) ->
lists:foldl(fun(X,Prev) -> queue:in(X,Prev) end, queue:new() , lists:seq(1,N)).
test_q(N)->
{R1,Q} = timer:tc(?MODULE, create_q,[N]),
%%Q = lists:foldl(fun(X,Prev) -> queue:in(X,Prev) end,Q0, lists:seq(1,N)),
{R2,_} = timer:tc(?MODULE, fetch_q, [ N,Q ] ),
{queue,inserT,R1,fetchT,R2}.
%% For Set
create_set(N) ->
SET = sets:new(),
lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)).
test_set(N) ->
{R1,Set} = timer:tc(?MODULE, create_set, [N]),
%%Set = lists:foldl(fun(X,Prev) -> sets:add_element(X,Prev) end,SET, lists:seq(1,N)),
{R2,_} = timer:tc(?MODULE, fetch_set,[ N, Set ]),
{set,inserT,R1,fetchT,R2}.
create(N)->
[ timer:tc(?MODULE, X , [N]) || X <- [ test_list, test_dict, test_q,test_set ] ].
xmulti_create()->
[ ?MODULE:create(X) || X<- [10,100,1000,10000,100000] ].
multi_create() ->
InputRange = [10,100,1000,10000,100000],
lists:map(fun(X) ->
?MODULE:create(X)
end,InputRange).
%%fetching midpoint tests
fetch_q(N,Q)->
fetch_q(N,Q,100).
fetch_q(N,Q,Find) ->
F = fun(I) -> queue:out_r(I) end,
R = lists:foldl( fun(_X,{Bool,PrevQ} ) ->
{{value, Ele},QQ} = F(PrevQ),
Ret1 = case Ele of
Temp when Temp =:= Find ->
true;
_ ->
Bool
end,
Ret2 = QQ,
{Ret1,Ret2}
end,{false,Q},
lists:seq(1,N)),
R.
fetch_set(N,Set) ->
fetch_set(N,Set,100).
fetch_set(_N,Set,Find) ->
Return = sets:is_element(Find,Set),
Return.
fetch_list(N,List) ->
fetch_list(N,List,500).
fetch_list(_N,List,Find) ->
Ret = lists:foldl(fun(X,Prev) when X =:= Find -> true;
(X, Prev) -> Prev
end,false,List),
Ret.
fetch_dict(N,Dict) ->
{ok,List} = dict:find([],Dict),
fetch_dict(N,List,500).
fetch_dict(_N,List,Find) ->
Ret = lists:foldl(fun(X,Prev) when X =:= Find -> true;
(X, Prev) -> Prev
end,false,List),
Ret.
%% parallel operation
%% Parallel Map for Queue
parallel_q(N) ->
{R1,Set} = timer:tc(?MODULE, parmap_create_q, [N]),
{parallel_q,pcreate,R1}.
parmap_create_q(N) ->
PID = spawn(fun() ->
?MODULE:ins_q( queue:new(),0,N) end),
[PID ! {self(),X} || X <- lists:seq(1,N) ],
receive
{ok, Q} ->
%%io:format("~n Still insertin, Q till now ~p",[Q]),
ok;
{done, Q} ->
io:format("~n *******DONE*********~n ~p",[Q]);
_E ->
io:format("unexpected ~p",[_E])
end.
ins_q(Q,Ctr, Max) ->
receive
{From, N} ->
%%io:format("~n insert ~p, ctr ~p, Q ~p ~n",[N,Ctr,Q]),
NewQ = queue:in( N, Q) ,
case Ctr
of Temp when Temp < Max ->
From ! {ok, NewQ};
_->
From ! {done, NewQ}
end,
?MODULE:ins_q(NewQ,Ctr+1, Max);
_E ->
io:format("unexpected ~p",[_E]),
?MODULE:ins_q(Q, Ctr, Max)
end.
%% Parallel Map for set
parallel_set(N) ->
{R1,Set} = timer:tc(?MODULE, parmap_create_set, [N]),
{parallel_set,pcreate,R1}.
parmap_create_set(N) ->
PID = spawn(fun() ->
?MODULE:ins_set( sets:new(),0,N) end),
[PID ! {self(),X} || X <- lists:seq(1,N) ],
receive
{ok, Sets} ->
%%io:format("~n Still insertin, Sets till now ~p",[Sets]),
ok;
{done, Sets} ->
io:format("~n *******DONE*********~n ~p",[Sets]);
_E ->
io:format("unexpected ~p",[_E])
end.
ins_set(Sets,Ctr, Max) ->
receive
{From, N} ->
%%io:format("~n insert ~p, ctr ~p, Sets ~p ~n",[N,Ctr,Sets]),
NewSets = sets:add_element(N,Sets),
case Ctr
of Temp when Temp < Max ->
From ! {ok, NewSets};
_->
From ! {done, NewSets}
end,
?MODULE:ins_set(NewSets,Ctr+1, Max);
_E ->
io:format("unexpected ~p",[_E]),
?MODULE:ins_set(Sets, Ctr, Max)
end.
%% Parallel Map for dict
parallel_dict(N) ->
{R1,Set} = timer:tc(?MODULE, parmap_create_dict, [N]),
{parallel_dict,pcreate,R1}.
parmap_create_dict(N) ->
PID = spawn(fun() ->
?MODULE:ins_dict( dict:new(),0,N) end),
[PID ! {self(),X} || X <- lists:seq(1,N) ],
receive
{ok, Dict} ->
%%io:format("~n Still insertin, Sets till now ~p",[Dict]),
ok;
{done, Dict} ->
io:format("~n *******DONE*********~n ~p",[Dict]);
_E ->
io:format("unexpected ~p",[_E])
end.
ins_dict(Dict,Ctr, Max) ->
receive
{From, N} ->
%%io:format("~n insert ~p, ctr ~p, Dict ~p ~n",[N,Ctr,Dict]),
NewDict = dict:append([],N,Dict),
case Ctr
of Temp when Temp < Max ->
From ! {ok, NewDict};
_->
From ! {done, NewDict}
end,
?MODULE:ins_dict(NewDict,Ctr+1, Max);
_E ->
io:format("unexpected ~p",[_E]),
?MODULE:ins_dict(Dict, Ctr, Max)
end.
%% Parallel Map for list
parallel_list(N) ->
{R1,List} = timer:tc(?MODULE, parmap_create_list, [N]),
{R2,_} = timer:tc(?MODULE, para_fetch_list, [N,List]),
{parallel_list,pCreate,R1,pFetch,R2}.
parmap_create_list(N) ->
PID = spawn(fun() ->
?MODULE:ins_list( [],0,N) end),
[PID ! {self(),X} || X <- lists:seq(1,N) ],
receive
{ok, List} ->
%%io:format("~n Still insertin, List till now ~p",[List]),
ok;
{done, List} ->
io:format("~n *******DONE*********~n ~p",[List]);
_E ->
io:format("unexpected ~p",[_E])
end.
ins_list(List,Ctr, Max) ->
receive
{From, N} ->
%%io:format("~n insert ~p, ctr ~p, Sets ~p ~n",[N,Ctr,Dict]),
NewList = List ++ [N] ,
case Ctr
of Temp when Temp < Max ->
From ! {ok, NewList};
_->
From ! {done, NewList}
end,
?MODULE:ins_list(NewList,Ctr+1, Max);
_E ->
io:format("unexpected ~p",[_E]),
?MODULE:ins_list(List, Ctr, Max)
end.
para_fetch_list(List, FindN) ->
Pid = spawn(fun() ->
?MODULE:pfetch_list(FindN) end),
Self = self(),
Now1 = now(),
lists:map ( fun(X) ->
Pid ! {Self, X},
receive
{true,Find} ->
io:format("~n Found ~p ",[Find]),
true;
{false,Find} ->
%io:format("~n NOT FOUND ~p ",[Find]),
false;
_E ->
io:format("para main unexpected ~p",[_E])
after 4000 ->
io:format("timerout ",[])
end
end, List ) ,
Now2 = now(),
timer:now_diff(Now2,Now1).
pfetch_list(Find) ->
receive
{From, CurrN} ->
_Ret = case CurrN of
Temp when Temp =:= Find ->
From ! {true,Find},
true;
_ ->
From ! {false,Find},
false
end,
%%io:format("~n insert ~p, ctr ~p, Sets ~p ~n",[N,Ctr,Dict]),
?MODULE:pfetch_list(Find);
_E ->
io:format("pfetch unexpected ~p",[_E]),
?MODULE:pfetch_list(Find)
end.
pcreate(N)->
[ timer:tc(?MODULE, X , [N]) || X <- [ parallel_list ] ].
parallel_multi_create() ->
InputRange = [10,100,1000,10000],
lists:map(fun(X) ->
?MODULE:pcreate(X)
end,InputRange).
Following are the results: For parallel Map DS
(ankit@127.0.0.1)2> test:pcreate(1000).
[{5399,{parallel_list,pCreate,5352,pFetch,30}},
{9886,{parallel_dict,pcreate,9875}},
{87603,{parallel_q,pcreate,87593}},
{42271,{parallel_set,pcreate,42223}}]
For All DS
(ankit@127.0.0.1)3> test:multi_create().
%% For 10
[[{38,{list,inserT,14,fetchT,8}},
{91,{dict,inserT,67,fetchT,11}},
{47,{queue,inserT,13,fetchT,21}},
{81,{set,inserT,61,fetchT,7}}],
%%for 100
[{234,{list,inserT,191,fetchT,30}},
{690,{dict,inserT,642,fetchT,35}},
{218,{queue,inserT,60,fetchT,144}},
{559,{set,inserT,540,fetchT,6}}],
%% for 1000
[{13746,{list,inserT,13452,fetchT,262}},
{96137,{dict,inserT,96009,fetchT,116}},
{967,{queue,inserT,284,fetchT,674}},
{5562,{set,inserT,5547,fetchT,5}}],
%% for 10000
[{438301,{list,inserT,437256,fetchT,1027}},
{361450,{dict,inserT,360412,fetchT,1020}},
{7937,{queue,inserT,2292,fetchT,5636}},
{31293,{set,inserT,31279,fetchT,5}}],
% for 100000
[{43750210,{list,inserT,43739956,fetchT,10238}},
{51517971,{dict,inserT,51507134,fetchT,10819}},
{92503,{queue,inserT,29676,fetchT,62811}},
{682118,{set,inserT,682100,fetchT,5}}]]
Hope this information helps you to determine which is the best DS for using for different purpose. I'll update it when I am finished with whole code.
If you want to search a list use the functions in the list module which is part of the extensive documentation that comes with Erlang.
If you want to know the best data structures to use - that's a slightly different question which will require a bit more information.
Use keyfind
function from lists
module. For example:
30> List = [{"A1",["ankit","sush", "Hover", "x4", "a3","nilesh","mike","erlang"]}].
[{"A1",
["ankit","sush","Hover","x4","a3","nilesh","mike",
"erlang"]}]
31> lists:keyfind("A1", 1, List).
{"A1",
["ankit","sush","Hover","x4","a3","nilesh","mike","erlang"]}
If your list is a simple one Term based item list, then the simplest solution is:
listFind ( Element, [] ) ->
false;
listFind ( Element, [ Item | ListTail ] ) ->
case ( Item == Element ) of
true -> true;
false -> listFind(Element, ListTail)
end.
The above will work assuming that how you check the equality of your items in the list is with the == operator. You can modify that operator for accomodating other types of comparison/equality as you need.
If you want this kind of "List" you can easily hand craft your own search:
search_key(Key, [{Key,_}=V|_]) -> {ok, V};
search_key(Key, [_|T]) -> search_key(Key, T);
search_key(_, []) -> not_found.
search_val(Val, [{_,L}=X|T]) ->
case _search_val(Val, L) of
ok -> {ok, X};
not_found -> search_val(Val, T)
end;
search_val(_, []) -> not_found.
_search_val(Val, [Val|_])-> ok;
_search_val(Val, [_|T])-> _search_val(Val, T);
_search_val(_,[])-> not_found.
But I'm not sure what exactly you want. For example if you want search for key "A1" and than for value "mike" in the list, it will be different solution. And if you want know how store this sort in best structure it is just another one question. You should provide more information.
Just to simplify on the example at https://stackoverflow.com/a/15587565/56250:
listFind(Element, List) ->
lists:member(Element, List).
lists:member
works with empty lists. Quick look at the source (https://github.com/erlang/otp/blob/07b8f441ca711f9812fad9e9115bab3c3aa92f79/erts/emulator/beam/erl_bif_lists.c#L184) suggests it executes lazily.