How to implement a Least Frequently Used (LFU) cache?

后端 未结 7 1449
孤城傲影
孤城傲影 2021-01-31 04:50

Least Frequently Used (LFU) is a type of cache algorithm used to manage memory within a computer. The standard characteristics of this method involve the system keeping track of

相关标签:
7条回答
  • 2021-01-31 05:18

    You might benefit from the LFU implementation of ActiveMQ: LFUCache

    They have provided some good functionality.

    0 讨论(0)
  • 2021-01-31 05:22

    I have tried to implement this below LFU cache implementation. Took reference from this - LFU paper. My implementation is working nicely.

    If anyone wants to provide any further suggestion to improve it again, please let me know.

    import java.util.HashMap;
    import java.util.Map;
    import java.util.Objects;
    import java.util.TreeMap;
    
    public class LFUCacheImplementation {
    
        private Map<Integer, Node> cache = new HashMap<>();
        private Map<Integer, Integer> counts = new HashMap<>();
        private TreeMap<Integer, DoublyLinkedList> frequencies = new TreeMap<>();
        private final int CAPACITY;
    
        public LFUCache(int capacity) {
            this.CAPACITY = capacity;
        }
    
        public int get(int key) {
            if (!cache.containsKey(key)) {
                return -1;
            }
    
            Node node = cache.get(key);
    
            int frequency = counts.get(key);
            frequencies.get(frequency).remove(new Node(node.key(), node.value()));
            removeFreq(frequency);
            frequencies.computeIfAbsent(frequency + 1, k -> new DoublyLinkedList()).add(new Node(node.key(), node.value()));
    
            counts.put(key, frequency + 1);
            return cache.get(key).value();
        }
    
        public void set(int key, int value) {
            if (!cache.containsKey(key)) {
    
                Node node = new Node(key, value);
    
                if (cache.size() == CAPACITY) {
    
                    int l_count = frequencies.firstKey();
                    Node deleteThisNode = frequencies.get(l_count).head();
                    frequencies.get(l_count).remove(deleteThisNode);
    
                    int deleteThisKey = deleteThisNode.key();
                    removeFreq(l_count);
                    cache.remove(deleteThisKey);
                    counts.remove(deleteThisKey);
                }
    
                cache.put(key, node);
                counts.put(key, 1);
                frequencies.computeIfAbsent(1, k -> new DoublyLinkedList()).add(node);
            }
        }
    
        private void removeFreq(int frequency) {
            if (frequencies.get(frequency).size() == 0) {
                frequencies.remove(frequency);
            }
        }
    
        public Map<Integer, Node> getCache() {
            return cache;
        }
    
        public Map<Integer, Integer> getCounts() {
            return counts;
        }
    
        public TreeMap<Integer, DoublyLinkedList> getFrequencies() {
            return frequencies;
        }
    }
    
    class Node {
        private int key;
        private int value;
        private Node next;
        private Node prev;
    
        public Node(int key, int value) {
            this.key = key;
            this.value = value;
        }
    
        public Node getNext() {
            return next;
        }
    
        public void setNext(Node next) {
            this.next = next;
        }
    
        public Node getPrev() {
            return prev;
        }
    
        public void setPrev(Node prev) {
            this.prev = prev;
        }
    
        public int key() {
            return key;
        }
    
        public int value() {
            return value;
        }
    
        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (!(o instanceof Node)) return false;
            Node node = (Node) o;
            return key == node.key &&
                    value == node.value;
        }
    
        @Override
        public int hashCode() {
            return Objects.hash(key, value);
        }
    
        @Override
        public String toString() {
            return "Node{" +
                    "key=" + key +
                    ", value=" + value +
                    '}';
        }
    }
    
    class DoublyLinkedList {
        private int size;
        private Node head;
        private Node tail;
    
        public void add(Node node) {
            if (null == head) {
                head = node;
            } else {
                tail.setNext(node);
                node.setPrev(tail);
            }
            tail = node;
            size++;
        }
    
        public void remove(Node node) {
            if(null == head || null == node) {
                return;
            }
            if(this.size() == 1 && head.equals(node)) {
                head = null;
                tail = null;
            } else if (head.equals(node)) {
                head = node.getNext();
                head.setPrev(null);
            } else if (tail.equals(node)) {
                Node prevToTail = tail.getPrev();
                prevToTail.setNext(null);
                tail = prevToTail;
            } else {
                Node current = head.getNext();
    
                while(!current.equals(tail)) {
                    if(current.equals(node)) {
                        Node prevToCurrent = current.getPrev();
                        Node nextToCurrent = current.getNext();
                        prevToCurrent.setNext(nextToCurrent);
                        nextToCurrent.setPrev(prevToCurrent);
                        break;
                    }
                   current = current.getNext();
                }
            }
            size--;
        }
    
        public Node head() {
            return head;
        }
    
        public int size() {
            return size;
        }
    }
    

    Client code to use the above cache implementation -

    import java.util.Map;
    
    public class Client {
    
        public static void main(String[] args) {
            Client client = new Client();
            LFUCache cache = new LFUCache(4);
            cache.set(11, function(11));
            cache.set(12, function(12));
            cache.set(13, function(13));
            cache.set(14, function(14));
            cache.set(15, function(15));
            client.print(cache.getFrequencies());
    
            cache.get(13);
            cache.get(13);
            cache.get(13);
            cache.get(14);
            cache.get(14);
            cache.get(14);
            cache.get(14);
            client.print(cache.getCache());
            client.print(cache.getCounts());
            client.print(cache.getFrequencies());
        }
    
        public void print(Map<Integer, ? extends Object> map) {
    
            for(Map.Entry<Integer, ? extends Object> entry : map.entrySet()) {
                if(entry.getValue() instanceof Node) {
                    System.out.println("Cache Key => "+entry.getKey()+", Cache Value => "+((Node) entry.getValue()).toString());
                } else if (entry.getValue() instanceof DoublyLinkedList) {
                    System.out.println("Frequency Key => "+entry.getKey()+" Frequency Values => [");
                    Node head = ((DoublyLinkedList) entry.getValue()).head();
                    while(null != head) {
                        System.out.println(head.toString());
                        head = head.getNext();
                    }
                    System.out.println(" ]");
                } else {
                    System.out.println("Count Key => "+entry.getKey()+", Count Value => "+entry.getValue());
                }
            }
        }
    
        public static int function(int key) {
            int prime = 31;
            return key*prime;
        }
    }
    
    0 讨论(0)
  • 2021-01-31 05:29

    How about a priority queue? You can keep elements sorted there with keys representing the frequency. Just update the object position in the queue after visiting it. You can update just from time to time for optimizing the performance (but reducing precision).

    0 讨论(0)
  • 2021-01-31 05:31

    Many implementations I have seen have runtime complexity O(log(n)). This means, when the cache size is n, the time needed to insert/remove an element into/from chache is logarithmic. Such implementations use usually a min heap to maintain usage frequencies of elements. The root of the heap contains the element with lowest frequency, and can be accessed in O(1) time. But to maintain the heap property we have to move an element, every time it is used (and frequency is incremented) inside of the heap, to place it into proper position, or when we have to insert new element into the cache (and so put it into the heap). But the runtime complexity can be reduced to O(1), when we maintain a hashmap (Java) or unordered_map (C++) with the element as key. Additinally we need two sorts of lists, frequency list and elements lists. The elements lists contain elements that have same frequency, and the frequency list contain the element lists.

      frequency list
      1   3   6   7
      a   k   y   x
      c   l       z
      m   n
    

    Here in the example we see the frequency list that has 4 elements (4 elements lists). The element list 1 contains elements (a,c,m), the elements list 3 contains elements (k, l, n) etc. Now, when we use say element y, we have to increment its frequency and put it in the next list. Because the elements list with frequency 6 becomes empty, we delete it. The result is:

      frequency list
      1   3   7
      a   k   y
      c   l   x
      m   n   z
    

    We place the element y in the begin of the elements list 7. When we have to remove elements from the list later, we will start from the end (first z, then x and then y). Now, when we use element n, we have to increment its frequency and put it into the new list, with frequencies 4:

      frequency list
      1   3   4  7
      a   k   n  y
      c   l      x
      m          z
    

    I hope the idea is clear. I provide now my C++ implementation of the LFU cache, and will add later a Java implementation. The class has just 2 public methods, void set(key k, value v) and bool get(key k, value &v). In the get method the value to retrieve will be set per reference when the element is found, in this case the method returns true. When the element is not found, the method returns false.

    #include<unordered_map>
    #include<list>
    
    using namespace std;
    
    typedef unsigned uint;
    
    template<typename K, typename V = K>
    struct Entry
    {
        K key;
        V value;
    };
    
    
    template<typename K, typename V = K>
    class LFUCache
    {
    
    typedef  typename list<typename Entry<K, V>> ElementList;
    typedef typename list <pair <uint, ElementList>> FrequencyList;
    
    private:
        unordered_map <K, pair<typename FrequencyList::iterator, typename ElementList::iterator>> cacheMap;
        FrequencyList elements;
        uint maxSize;
        uint curSize;
    
        void incrementFrequency(pair<typename FrequencyList::iterator, typename ElementList::iterator> p) {
            if (p.first == prev(elements.end())) {
                //frequency list contains single list with some frequency, create new list with incremented frequency (p.first->first + 1)
                elements.push_back({ p.first->first + 1, { {p.second->key, p.second->value} } });
                // erase and insert the key with new iterator pair
                cacheMap[p.second->key] = { prev(elements.end()), prev(elements.end())->second.begin() };
            }
            else {
                // there exist element(s) with higher frequency
                auto pos = next(p.first);
                if (p.first->first + 1 == pos->first)
                    // same frequency in the next list, add the element in the begin
                    pos->second.push_front({ p.second->key, p.second->value });
                else
                    // insert new list before next list
                    pos = elements.insert(pos, { p.first->first + 1 , {{p.second->key, p.second->value}} });
                // update cachMap iterators
                cacheMap[p.second->key] = { pos, pos->second.begin() };
            }
            // if element list with old frequency contained this singe element, erase the list from frequency list
            if (p.first->second.size() == 1)
                elements.erase(p.first);
            else
                // erase only the element with updated frequency from the old list
                p.first->second.erase(p.second);
        }
    
        void eraseOldElement() {
            if (elements.size() > 0) {
                auto key = prev(elements.begin()->second.end())->key;
                if (elements.begin()->second.size() < 2)
                    elements.erase(elements.begin());
                else
                    elements.begin()->second.erase(prev(elements.begin()->second.end()));
                cacheMap.erase(key);
                curSize--;
            }
        }
    
    public:
        LFUCache(uint size) {
            if (size > 0)
                maxSize = size;
            else
                maxSize = 10;
            curSize = 0;
        }
        void set(K key, V value) {
            auto entry = cacheMap.find(key);
            if (entry == cacheMap.end()) {
                if (curSize == maxSize)
                    eraseOldElement();
                if (elements.begin() == elements.end()) {
                    elements.push_front({ 1, { {key, value} } });
                }
                else if (elements.begin()->first == 1) {
                    elements.begin()->second.push_front({ key,value });
                }
                else {
                    elements.push_front({ 1, { {key, value} } });
                }
                cacheMap.insert({ key, {elements.begin(), elements.begin()->second.begin()} });
                curSize++;
            }
            else {
                entry->second.second->value = value;
                incrementFrequency(entry->second);
            }
        }
    
        bool get(K key, V &value) {
            auto entry = cacheMap.find(key);
            if (entry == cacheMap.end())
                return false;
            value = entry->second.second->value;
            incrementFrequency(entry->second);
            return true;
        }
    };
    

    Here are examples of usage:

        int main()
        {
            LFUCache<int>cache(3); // cache of size 3
            cache.set(1, 1);
            cache.set(2, 2);
            cache.set(3, 3);
            cache.set(2, 4); 
    
            rc = cache.get(1, r);
    
            assert(rc);
            assert(r == 1);
            // evict old element, in this case 3
            cache.set(4, 5);
            rc = cache.get(3, r);
            assert(!rc);
            rc = cache.get(4, r);
            assert(rc);
            assert(r == 5);
    
            LFUCache<int, string>cache2(2);
            cache2.set(1, "one");
            cache2.set(2, "two");
            string val;
            rc = cache2.get(1, val);
           if (rc)
              assert(val == "one");
           else
              assert(false);
    
           cache2.set(3, "three"); // evict 2
           rc = cache2.get(2, val);
           assert(rc == false);
           rc = cache2.get(3, val);
           assert(rc);
           assert(val == "three");
    
    }
    
    0 讨论(0)
  • 2021-01-31 05:35
    1. According to me, the best way to implement a most-recently-used cache of objects would be to include a new variable as 'latestTS' for each object. TS stands for timestamp.

      // A static method that returns the current date and time as milliseconds since January 1st 1970 long latestTS = System.currentTimeMillis();

    2. ConcurrentLinkedHashMap is not yet implemented in Concurrent Java Collections. (Ref: Java Concurrent Collection API). However, you can try and use ConcurrentHashMap and DoublyLinkedList

    3. About the case to be considered: in such case, as I have said that you can declare latestTS variable, based upon the value of latestTS variable, you can remove an entry and add the new object. (Don't forget to update frequency and latestTS of the new object added)

    As you have mentioned, you can use LinkedHashMap as it gives element access in O(1) and also, you get the order traversal. Please, find the below code for LFU Cache: (PS: The below code is the answer for the question in the title i.e. "How to implement LFU cache")

    import java.util.LinkedHashMap;
    import java.util.Map;
    
    public class LFUCache {
    
        class CacheEntry
        {
            private String data;
            private int frequency;
    
            // default constructor
            private CacheEntry()
            {}
    
            public String getData() {
                return data;
            }
            public void setData(String data) {
                this.data = data;
            }
    
            public int getFrequency() {
                return frequency;
            }
            public void setFrequency(int frequency) {
                this.frequency = frequency;
            }       
    
        }
    
        private static int initialCapacity = 10;
    
        private static LinkedHashMap<Integer, CacheEntry> cacheMap = new LinkedHashMap<Integer, CacheEntry>();
        /* LinkedHashMap is used because it has features of both HashMap and LinkedList. 
         * Thus, we can get an entry in O(1) and also, we can iterate over it easily.
         * */
    
        public LFUCache(int initialCapacity)
        {
            this.initialCapacity = initialCapacity;
        }
    
        public void addCacheEntry(int key, String data)
        {
            if(!isFull())
            {
                CacheEntry temp = new CacheEntry();
                temp.setData(data);
                temp.setFrequency(0);
    
                cacheMap.put(key, temp);
            }
            else
            {
                int entryKeyToBeRemoved = getLFUKey();
                cacheMap.remove(entryKeyToBeRemoved);
    
                CacheEntry temp = new CacheEntry();
                temp.setData(data);
                temp.setFrequency(0);
    
                cacheMap.put(key, temp);
            }
        }
    
        public int getLFUKey()
        {
            int key = 0;
            int minFreq = Integer.MAX_VALUE;
    
            for(Map.Entry<Integer, CacheEntry> entry : cacheMap.entrySet())
            {
                if(minFreq > entry.getValue().frequency)
                {
                    key = entry.getKey();
                    minFreq = entry.getValue().frequency;
                }           
            }
    
            return key;
        }
    
        public String getCacheEntry(int key)
        {
            if(cacheMap.containsKey(key))  // cache hit
            {
                CacheEntry temp = cacheMap.get(key);
                temp.frequency++;
                cacheMap.put(key, temp);
                return temp.data;
            }
            return null; // cache miss
        }
    
        public static boolean isFull()
        {
            if(cacheMap.size() == initialCapacity)
                return true;
    
            return false;
        }
    }
    
    0 讨论(0)
  • 2021-01-31 05:35

    Here's the o(1) implementation for LFU - http://dhruvbird.com/lfu.pdf

    0 讨论(0)
提交回复
热议问题