Click here to Skip to main content
15,885,164 members
Articles / Programming Languages / C++
Article

STL101 Part C - Functors

Rate me:
Please Sign up or sign in to vote.
3.52/5 (16 votes)
1 Apr 20026 min read 187.4K   39   58
This third article describes how to write function adaptors which allow customization of STL functions.

Introduction

Hopefully you're aware of my format by now - once again I'm going to provide you with code you can copy and paste into an empty console application rather than use a zip file. I'll see you on the other side...

#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <functional>
#include <numeric>

using std::binary_function;
using std::unary_function;
using std::accumulate;
using std::cout;
using std::string;
using std::vector;
using std::for_each;
using std::ostream_iterator;
using std::copy;
using std::endl;
using std::sort;
using std::mem_fun_ref;

struct Member
{
   string Surname;
   string Name;
   int    VideosOut;
   int DaysOverdue;

   Member::Member(string surname, string name, int videosOut = 0, 
      int daysOverdue = 0) : Surname(surname),
      Name(name), VideosOut(videosOut), DaysOverdue(daysOverdue) {};

   int Print()
   {
      cout << "Name: " << Surname <<", " << Name;

      if (VideosOut > 0) cout << "  Videos Out = " << VideosOut;

      if (DaysOverdue > 0) cout << "  Days overdue = " << DaysOverdue;

      cout << endl;

      return 0;
   }
};

void PrintItem(const Member & m)
{
   cout << "Name: " << m.Surname <<", " << m.Name;

   if (m.VideosOut > 0) cout << "  Videos Out = " << m.VideosOut;

   if (m.DaysOverdue > 0) cout << "  Days overdue = " << m.DaysOverdue;

   cout << endl; 
}

struct CountOverdueDays : public unary_function<int, int>
{
   CountOverdueDays(int nDayLimit) : m_nDayLimit(nDayLimit) { }
   int operator()(int nOverdueSoFar, const Member& m)  
   {  
      return m.DaysOverdue >= m_nDayLimit ? 
                   nOverdueSoFar + m.VideosOut : nOverdueSoFar;
   }
private:  
   const int m_nDayLimit;
};

struct SortByVideosOut : public binary_function<Member, Member, bool>
{
   bool operator() (const Member & m1, const Member & m2)
   {
      return (m1.VideosOut < m2.VideosOut);
   }
};

bool SortMember(const Member m1, const Member m2)
{
   if (m1.Surname < m2.Surname)
      return true;
   else if (m1.Surname > m2.Surname)
      return false;

   return (m1.Name < m2.Name);
}

struct CountIntSort : public binary_function<int, int, bool>
{
public:
   static int nCount;

   bool operator() (const int n1, const int n2)
   {
      ++ nCount;
      return (n1 < n2);
   }
};

int CountIntSort::nCount = 0;

struct CountIntSort2 : public binary_function<int, int, bool>
{
public:
   int nCount;

   CountIntSort2() : nCount(0){};

   bool operator() (const int n1, const int n2)
   {
      ++ nCount;
      return (n1 < n2);
   }
};

struct CountIntSort3 : public binary_function<int, int, bool>
{
public:
   int nCount;
   CountIntSort3 * pParent;
   int nCopies;

   CountIntSort3() : nCount(0), nCopies(0), pParent(0) {};

   CountIntSort3(CountIntSort3 & c) : nCount(0), nCopies(1)
   {
      pParent = &c;
   }

   ~CountIntSort3()
   {
      pParent->nCount += nCount;
      pParent->nCopies += nCopies;
   }

   bool operator() (const int n1, const int n2)
   {
      ++ nCount;
      return (n1 < n2);
   }
};

int _tmain(int argc, _TCHAR* argv[])
{
   vector<Member> vecMem;

   vecMem.push_back(Member("Zaphir", "Jill"));
   vecMem.push_back(Member("Smith", "John", 5, 0));
   vecMem.push_back(Member("Smith", "Phil", 1, 3));
   vecMem.push_back(Member("Jones", "Susan"));
   vecMem.push_back(Member("Kaputnik", "Joe", 10, 5));
   vecMem.push_back(Member("Jones", "Bill", 3, 7));

   for_each(vecMem.begin(), vecMem.end(), PrintItem);

   cout << endl;

   sort(vecMem.begin(), vecMem.end(), SortMember);

   cout << "Now the same list sorted\n\n";

   for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));

   cout << endl;

   int nOverdue = accumulate(vecMem.begin(), vecMem.end(),
      0 /* initval = identity of addition */, 
      CountOverdueDays(5));

   cout << "Number of videos overdue by five or more days: " << nOverdue;

   cout << "\n\nSorted by number of videos out:\n\n";

   SortByVideosOut v;

   sort (vecMem.begin(), vecMem.end(), v);

   for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));

   cout << endl;

   vector<int> vecInt;
   
   for (int i = 0; i < 10000; ++i)
      vecInt.push_back(rand()% 10000);

   CountIntSort::nCount = 0;

   CountIntSort c;

   // Use a copy so it always requires the same number of iterations

   vector<int> vecInt2 = vecInt; 

       sort(vecInt2.begin(), vecInt2.end(), c);

   cout << "Sorting a vector of 10000 ints took " 
        << CountIntSort::nCount << " calls to the sort function" 
        << endl;

   CountIntSort2 c2;

   vecInt2 = vecInt;

   sort(vecInt2.begin(), vecInt2.end(), c2);

   cout << "Sorting a vector of 10000 ints second try took " 
        << c2.nCount << " calls to the sort function" << endl;

   CountIntSort3 c3;

   vecInt2 = vecInt;

   sort(vecInt2.begin(), vecInt2.end(), c3);

   cout << "Sorting a vector of 10000 ints second try took " 
       << c3.nCount 
       << " calls to the sort function, and the predicate was copied " 
       << c3.nCopies << " times." << endl;

   string s;
   std::cin >> s;

   return 0;
}

This third installment deals with an imaginary video library program, for which a portion of the struct that holds customer data may look like this:

struct Member
{
    string Surname;
    string Name;
    int    VideosOut;
    int DaysOverdue;

    Member::Member(string surname, string name, int videosOut = 0, 
              daysOverdue = 0) : Surname(surname),
              Name(name), VideosOut(videosOut), DaysOverdue(daysOverdue) {};
}

Having created this struct, we can build a vector of imaginary customers like this:

vector<Member> vecMem;

vecMem.push_back(Member("Zaphir", "Jill"));
vecMem.push_back(Member("Smith", "John", 5, 0));
vecMem.push_back(Member("Smith", "Phil", 1, 3));
vecMem.push_back(Member("Jones", "Susan"));
vecMem.push_back(Member("Kaputnik", "Joe", 10, 5));
vecMem.push_back(Member("Jones", "Bill", 3, 7));

Now, normally, if I created a struct like this on the console, I'd create a stream inserter/extractor pair for it, so I could just call cout on each item, but I'm not trying to show how to do that. Instead, I am going to create my own function, which I will call with for_each. This function is often preferable to a hand written loop for a number of reasons, which are all covered in Effective STL by Scott Meyers. If you have enough interest in my articles to have read up to the third one, you need this book. Go to Fatbrain or Amazon and order it now. Don't worry, I'll wait for you...

You're back? Good. Now, here is how it works. A function like for_each requires a unary function, one which takes one parameter. So we write a print function like this:

void PrintItem(const Member & m)
{
    cout << "Name: " << m.Surname <<", " << m.Name;

    if (m.VideosOut > 0) cout << "  Videos Out = " << m.VideosOut;

    if (m.DaysOverdue > 0) cout << "  Days overdue = " << m.DaysOverdue;

    cout << endl; 
}

and then inside main we call it like this:

for_each(vecMem.begin(), vecMem.end(), PrintItem);

Simple, yes? A comparison function such as sort requires a binary function - it needs two objects in order to compare them. Let's write a function which sorts by name - a structure requires this sort of function to tell us what to sort by. Furthermore, in this case we will sort by surname, and sort by first name within groups of the same surname.

bool SortMember(const Member & m1, const Member & m2)
{
    if (m1.Surname < m2.Surname)
        return true;
    else if (m1.Surname > m2.Surname)
        return false;

    return (m1.Name < m2.Name);
}

Then it's just a case of using this function to sort by as follows:

sort(vecMem.begin(), vecMem.end(), SortMember);

Of course, all of these global functions get a bit messy, not to mention that they are not object oriented at all, and so we are provided with some adaptors to help us, namely mem_fun, and mem_fun_ref. Both of these allow us to pass in a function that is a member of the class we are storing in our container. memfun assumes we are storing pointers to objects, and mem_fun_ref that we are storing them by value. So if we add the following code to our Member struct:

int Print()
{
    cout << "Name: " << Surname <<", " << Name;

    if (VideosOut > 0) cout << "  Videos Out = " << VideosOut;

    if (DaysOverdue > 0) cout << "  Days overdue = " << DaysOverdue;

    cout << endl;

    return 0;
}

then we can do this:

for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));

We also have mem_fun1 and mem_fun1_ref, which allow us to specify functions that take one argument. Sadly there is no mem_fun2_ref, which means we cannot put a predicate into a class (a predicate is a function that takes two arguments and returns bool.)

We can however move beyond these global functions, by instead creating a struct or class that overloads operator (). Such a function is called a functor in STL land, and a functor where operator() returns a bool is called a predicate.

Before I talk about the effect of this, let me first thank Jörgen Sigvardsson for being the first of many to point out the flaws in my original article in regard to this section. He is the author of the CoutOverdueDays function that follows.

After several false starts, I think I've got to the bottom of the issue of functors having state. Meyers explains that if operator() modifies the state of the functor, we have a problem as we are unable to know exactly how many times it will be called. However, the fact that we can introduce state into a functor is one good reason to use one. A good example can be found in Chris Losinger's article here. Jörgen's CountOverdueDays functor looks like this:

struct CountOverdueDays
{
   CountOverdueDays(int nDayLimit) : m_nDayLimit(nDayLimit) { }

   int operator()(int nOverdueSoFar, const Member& m)    
   { 
       return m.DaysOverdue >= 
                m_nDayLimit ? nOverdueSoFar + m.VideosOut : nOverdueSoFar;
   }
private: 
   const int m_nDayLimit;
};

With the help of Jörgen, the following improved code demonstrates how to count how many videos are five or more days overdue:

int nOverdue = accumulate(vecMem.begin(), vecMem.end(),
   0 /* initval = identity of addition */, 
   CountOverdueDays(5));

cout << "Number of videos overdue by five or more days: " << nOverdue;

To allow you to easily see this is correct, I provide another sort, by the number of videos out.

struct SortByVideosOut : public 
     binary_function<MEMBER bool Member Member Member,> 
{
    bool operator() (const Member & m1, const Member & m2)
    {
        return (m1.VideosOut < m2.VideosOut);
    }
};

Anyhow, now we can output the list by number of videos out, by doing the following:

SortByVideosOut v;

sort (vecMem.begin(), vecMem.end(), v);

for_each(vecMem.begin(), vecMem.end(), mem_fun_ref(&Member::Print));

One good reason for using functors instead of global functions is that they have no function call overhead once they have been passed in as an argument. Another, as also pointed out by Jörgen, is that it allows you to use function adapters like binder1st and unary_negate, so long as you derive them from unary_function or binary_function. These two classes are found in the header <functional>.

Having come this far, I decided as an exercise to add the code to count how many times operator() is called for a common function, namely sort. The following code creates a binary predicate that does a normal sort, but also counts how often the operator is called using a static variable.

struct CountIntSort : public 
     binary_function<int, int, bool><int int bool int int,,>
{
public:
   static int nCount;

   bool operator() (const int n1, const int n2)
   {
      ++ nCount;
      return (n1 < n2);
   }
};

int CountIntSort::nCount = 0;

And in main()

vector<int> vecInt;

for (int i = 0; i < 10000; ++i)
   vecInt.push_back(rand()% 10000);

CountIntSort::nCount = 0;

CountIntSort c;

// Use a copy so it always requires the same number of iterations

vector<int> vecInt2 = vecInt; 

sort(vecInt2.begin(), vecInt2.end(), c);

cout << "Sorting a vector of 10000 ints took " 
     << CountIntSort::nCount << " calls to the sort function" << endl;

On my machine, the magic number is 205,832 calls to operator () to sort 10,000 ints. That static variable is a bit ugly though, so let's replace it with a class level member.

struct CountIntSort2 : public binary_function<int, int, bool>
{
public:
   int nCount;

   CountIntSort2() : nCount(0){};

   bool operator() (const int n1, const int n2)
   {
      ++ nCount;
      return (n1 < n2);
   }
};

Making another copy of the same vector and calling this returns us a value of 0 for the number of iterations. This was the same problem I faced when writing the original article, and it was Joaquín M López Muñoz who provides the solution in the discussion below.

Predicates are passed by value, not by reference. Therefore the counter spins nicely inside the STL, but the predicate instance you passed in remains unaffected. A full explanation is given by Joaquín below, in his message titled 'I got it', in the thread labeled 'I still don't get why predicates should be stateless'. Suffice it to say that this will not work. So how do we get away from the static variable? We need to overload our copy constructor, and our destructor in order to maintain a count per copy, and pass it back until it returns to our original predicate. While we're at it, let's count how many copies of the predicate are made during the copy operation.

struct CountIntSort3 : public binary_function<int, int, bool>
{
public:
   int nCount;
   CountIntSort3 * pParent;
   int nCopies;

   CountIntSort3() : nCount(0), nCopies(0), pParent(0) {};

   CountIntSort3(CountIntSort3 & c) : nCount(0), nCopies(1)
   {
      pParent = &c;
   }

   ~CountIntSort3()
   {
      pParent->nCount += nCount;
      pParent->nCopies += nCopies;
   }

   bool operator() (const int n1, const int n2)
   {
      ++ nCount;
      return (n1 < n2);
   }
};

Using this code instead, we once again get 205,832 calls to operator (), and the predicate is copied 3,315 times during the operation.

As has been noted before by others commenting on my first article, the Boost library provides a lot of extensions to STL, but I do not use them simply because I had a hard enough time getting everyone at work to install STLPort, and I don't see much point in learning stuff that leaves me less likely to know workarounds that can be used in every situation. I will be installing Boost when I do an article on stuff not found in the VC6 STL that can be useful, although that will mostly cover stuff in STLPort. I mention this again because I'd really like to be able to add these predicate functions to the class itself if I could.

As you can see, not only does STL come with a lot of powerful functions built in (a future article will discuss how many, for now I just try to introduce a couple in each article ), for_each provides an easy way to write our own functions, and the general mechanism of providing our own code for policy means that we can adapt the general algorithms to suit our needs. Next up, I am going to offer an article on associative containers, namely map and set. As I discovered, a set is a subset of map, and both are a very useful alternative if you're looking to keep your data sorted, or want a tradeoff between the high cost of inserting into a vector, and the high cost of searching a list.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Software Developer (Senior)
Australia Australia
Programming computers ( self taught ) since about 1984 when I bought my first Apple ][. Was working on a GUI library to interface Win32 to Python, and writing graphics filters in my spare time, and then building n-tiered apps using asp, atl and asp.net in my job at Dytech. After 4 years there, I've started working from home, at first for Code Project and now for a vet telemedicine company. I owned part of a company that sells client education software in the vet market, but we sold that and I worked for the owners for five years before leaving to get away from the travel, and spend more time with my family. I now work for a company here in Hobart, doing all sorts of Microsoft based stuff in C++ and C#, with a lot of T-SQL in the mix.

Comments and Discussions

 
GeneralSo what's the conclusion? Pin
Joaquín M López Muñoz25-Mar-02 11:10
Joaquín M López Muñoz25-Mar-02 11:10 
GeneralRe: So what's the conclusion? Pin
Christian Graus25-Mar-02 11:18
protectorChristian Graus25-Mar-02 11:18 
GeneralI got it! Pin
Joaquín M López Muñoz25-Mar-02 12:00
Joaquín M López Muñoz25-Mar-02 12:00 
GeneralRe: I got it! Pin
Christian Graus25-Mar-02 12:17
protectorChristian Graus25-Mar-02 12:17 
GeneralRe: So what's the conclusion? Pin
Christian Graus25-Mar-02 11:53
protectorChristian Graus25-Mar-02 11:53 
GeneralRe: So what's the conclusion? Pin
Joaquín M López Muñoz25-Mar-02 12:11
Joaquín M López Muñoz25-Mar-02 12:11 
GeneralRe: So what's the conclusion? Pin
Chris Losinger27-Mar-02 5:52
professionalChris Losinger27-Mar-02 5:52 
GeneralRe: I still don't get why predicates should be stateless Pin
Ramon Casellas27-Mar-02 5:41
Ramon Casellas27-Mar-02 5:41 
Hola Joaquin,


There is a short discussion on this subject on Herb Sutter "More Effective C++", I don't recall the exact arguments, but I think that he states that with reference counting techniques you may be able to use stateful predicates (he gives samples regarding the implementation of remove_if in terms of find_if). However, other arguments he gives against the use of stateful predicates involve assumptions on the traversal order. Basically, if you cannot assume any particular order (e.g. not explicitly stated on the norm) then you are out.

Regards,
R.

ps: Refer to "More Effective C++" or his web page "http://www.gotw.ca" for further details.
GeneralPredicate should be stateless Pin
Robin24-Mar-02 20:13
Robin24-Mar-02 20:13 
GeneralRe: Predicate should be stateless Pin
Christian Graus24-Mar-02 22:58
protectorChristian Graus24-Mar-02 22:58 
GeneralRe: Predicate should be stateless Pin
Joaquín M López Muñoz25-Mar-02 3:21
Joaquín M López Muñoz25-Mar-02 3:21 
GeneralRe: Predicate should be stateless Pin
Robin25-Mar-02 6:27
Robin25-Mar-02 6:27 
GeneralRe: Predicate should be stateless Pin
Joaquín M López Muñoz25-Mar-02 3:39
Joaquín M López Muñoz25-Mar-02 3:39 
GeneralRe: Predicate should be stateless Pin
Robin25-Mar-02 6:30
Robin25-Mar-02 6:30 
GeneralRe: Predicate should be stateless Pin
Robin25-Mar-02 6:19
Robin25-Mar-02 6:19 
GeneralRe: Predicate should be stateless Pin
Christian Graus25-Mar-02 8:25
protectorChristian Graus25-Mar-02 8:25 
Generalgreat series.. really like it Pin
Bernhard6-Mar-02 4:22
Bernhard6-Mar-02 4:22 
GeneralRe: great series.. really like it Pin
Christian Graus6-Mar-02 8:16
protectorChristian Graus6-Mar-02 8:16 
GeneralRe: great series.. really like it Pin
Bernhard6-Mar-02 19:13
Bernhard6-Mar-02 19:13 
GeneralA more generic <code>CountOverdueFivePlus</code> Pin
Thomas Freudenberg25-Feb-02 6:01
Thomas Freudenberg25-Feb-02 6:01 
GeneralRe: A more generic <code>CountOverdueFivePlus</code> Pin
Jörgen Sigvardsson25-Feb-02 8:07
Jörgen Sigvardsson25-Feb-02 8:07 
GeneralRe: A more generic <code>CountOverdueFivePlus</code> Pin
Christian Graus25-Feb-02 8:12
protectorChristian Graus25-Feb-02 8:12 
GeneralRe: A more generic <code>CountOverdueFivePlus</code> Pin
Chris Losinger25-Feb-02 8:41
professionalChris Losinger25-Feb-02 8:41 
GeneralRe: A more generic <code>CountOverdueFivePlus</code> Pin
Christian Graus25-Feb-02 9:09
protectorChristian Graus25-Feb-02 9:09 
GeneralRe: A more generic <code>CountOverdueFivePlus</code> Pin
Chris Losinger25-Feb-02 9:25
professionalChris Losinger25-Feb-02 9:25 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.