Steve Breese

A Chicago-based Full-stack JavaScript Developer

MEAN Tutorial Progress

M101JS: MongoDB for Node.js Developers

Homework Step Results Progress
1-1 Ensure Mongo dump file is present $ ls
dump
1-1 Run mongorestore to connect to MongoDB and restore these files
mongorestore dump
building a list of dbs and collections to restore from dump dir
reading metadata for m101.hw1_1 from 
restoring m101.hw1_1 from 
restoring indexes for collection m101.hw1_1 from metadata
finished restoring m101.hw1_1 (1 document)
done
1-1 Using the Mongo shell, perform a find() on the collection called hw1_1 in the database m101
$ mongo
> show dbs
m101    0.078GB
> use m101
switched to db m101
> db.hw1_1.find().pretty()
{
	"_id" : ObjectId("51e4524ef3651c651a42331c"),
	"answer" : "Hello from MongoDB!"
}
1-1 Cut and paste the value corresponding to the answer key (without quotes) into the Homework 1.1 submission text box and click Submit.
Hello from MongoDB!
1-2 Use mongorestore to restore the dump into your running mongod.
mongorestore dump
1-2 Install the dependencies.
npm install
1-2 Run the application to get the answer.
node app.js
Answer: I like kittens
1-2 Cut and paste the answer into the Homework 1.2 submission text box and click Submit.
I like kittens
1-3 Use mongorestore to restore the dump into your running mongod
mongorestore dump
1-3 install all the dependencies listed in the 'package.json' file
npm install
1-3 Run the application to get the answer to hw1-3
node app.js
1-3 Navigate to http://localhost:3000/

Hello, Agent 007.

1-3 Cut and paste the answer into the Homework 1.3 submission text box and click Submit.
Hello, Agent 007.
2-1 Solution to Homework: Multiple selectors in a find() command
> db.movieDetails.find({"rated" : "PG-13","year" : 2013, "awards.wins":0}).pretty()
{
	"_id" : ObjectId("5692a3e124de1e0ce2dfda22"),
	"title" : "A Decade of Decadence, Pt. 2: Legacy of Dreams",
	"year" : 2013,
	"rated" : "PG-13",
	"released" : ISODate("2013-09-13T04:00:00Z"),
	"runtime" : 65,
	"countries" : [
		"USA"
	],
	"genres" : [
		"Documentary"
	],
	"director" : "Drew Glick",
	"writers" : [
		"Drew Glick"
	],
	"actors" : [
		"Gordon Auld",
		"Howie Boulware Jr.",
		"Tod Boulware",
		"Chen Drachman"
	],
	"plot" : "A behind the scenes look at the making of A Tiger in the Dark: The Decadence Saga.",
	"poster" : null,
	"imdb" : {
		"id" : "tt2199902",
		"rating" : 8,
		"votes" : 50
	},
	"awards" : {
		"wins" : 0,
		"nominations" : 0,
		"text" : ""
	},
	"type" : "movie"
}

Answer:
A Decade of Decadence, Pt. 2: Legacy of Dreams
2-2 Solution to Homework: Arrays with nested documents
> db.movieDetails.find({"awards.oscars.award":"bestPicture"})
The query document in this find command is {"awards.oscars.award":"bestPicture"}
Answer:
{"awards.oscars.award":"bestPicture"}
2-3 Solution to Homework: Simple projection
> db.movies.find({year: 1996}, {"title": 1, "_id": 0}).pretty()
The projection document in this find command is {"title": 1, "_id": 0}
Answer:
{"title": 1, "_id": 0}
2-4 Solution to Homework: Matching a specific array element
> db.movieDetails.find({"countries.1":"Sweden"}).limit(1)
The title of the first movie is The Girl with the Dragon Tattoo
Answer:
The Girl with the Dragon Tattoo
2-5 Solution to Homework: Equality queries on arrays
### INCORRECT AS IT COULD INCLUDE OTHER GENRES: ###
> db.movieDetails.find( {$and : [{genres: { $all: ["Comedy", "Crime"] }}, {"genres.0" : "Comedy"}]}).count()
42
### CORRECT AS IT MUST CONTAIN THESE GENRES IN THIS ORDER ###
>db.movieDetails.find({genres: ["Comedy","Crime" ]}).count()
20
Answer:
20
2-6 Solution to Homework: Array operators
> db.movieDetails.find( {genres: { $all: ["Comedy", "Crime"] } }).count()
56
Answer:
56
2-7 Solution to Homework: Updating scalar fields
$set
Answer:
$set
3 Solution to find() and Cursors in the Node.js Driver
$ mongoimport -d crunchbase -c companies companies.json
$ mongo
> show dbs
> use crunchbase
> db.companies.find().count()
18801
> db.companies.find().pretty()
The following app uses batching to provide lower memory overhead and faster execution times.
var MongoClient = require('mongodb').MongoClient,
    assert = require('assert');


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = {"category_code": "biotech"};

    db.collection('companies').find(query).toArray(function(err, docs) {

        assert.equal(err, null);
        assert.notEqual(docs.length, 0);

        docs.forEach(function(doc) {
            console.log( doc.name + " is a " + doc.category_code + " company." );
        });

        db.close();

    });

});
Answer:
3 Solution to Projection in the Node.js Driver
var MongoClient = require('mongodb').MongoClient,
    assert = require('assert');


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = {"category_code": "biotech"};
    var projection = {"name": 1, "category_code": 1, "_id": 0};

    var cursor = db.collection('companies').find(query);
    cursor.project(projection);

    cursor.forEach(
        function(doc) {
            console.log(doc.name + " is a " + doc.category_code + " company.");
            console.log(doc);
        },
        function(err) {
            assert.equal(err, null);
            return db.close();
        }
    );

});
Answer:
3 Solution to The CrunchBase Dataset
var MongoClient = require('mongodb').MongoClient,
    commandLineArgs = require('command-line-args'),
    assert = require('assert');


var options = commandLineOptions();

MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = queryDocument(options);
    var projection = {"_id": 1, "name": 1, "founded_year": 1,
                      "number_of_employees": 1, "crunchbase_url": 1};

    var cursor = db.collection('companies').find(query, projection);
    var numMatches = 0;

    cursor.forEach(
        function(doc) {
            numMatches = numMatches + 1;
            console.log( doc );
        },
        function(err) {
            assert.equal(err, null);
            console.log("Our query was:" + JSON.stringify(query));
            console.log("Matching documents: " + numMatches);
            return db.close();
        }
    );

});


function queryDocument(options) {

    console.log(options);

    var query = {
        "founded_year": {
            "$gte": options.firstYear,
            "$lte": options.lastYear
        }
    };

    if ("employees" in options) {
        query.number_of_employees = { "$gte": options.employees };
    }

    return query;

}


function commandLineOptions() {

    var cli = commandLineArgs([
        { name: "firstYear", alias: "f", type: Number },
        { name: "lastYear", alias: "l", type: Number },
        { name: "employees", alias: "e", type: Number }
    ]);

    var options = cli.parse()
    if ( !(("firstYear" in options) && ("lastYear" in options))) {
        console.log(cli.getUsage({
            title: "Usage",
            description: "The first two options below are required. The rest are optional."
        }));
        process.exit();
    }

    return options;

}

Command:
$ node app.js -f 2004 -l 2008 -e 100  # (first year, last year, min number employees
Our query was:{"founded_year":{"$gte":2004,"$lte":2008},"number_of_employees":{"$gte":100}}
Matching documents: 333
Answer:
3 Solution to $regex in the Node.js Driver
$ node app.js -o "personal finance"
$ node app-milestones.js -m "billion.+valuation"
Answer:
3 Solution to Dot Notation in the Node.js Driver
var MongoClient = require('mongodb').MongoClient,
    commandLineArgs = require('command-line-args'),
    assert = require('assert');


var options = commandLineOptions();


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = queryDocument(options);
    var projection = {"_id": 0, "name": 1, "founded_year": 1,
                      "number_of_employees": 1, "ipo.valuation_amount": 1};

    var cursor = db.collection('companies').find(query, projection);
    var numMatches = 0;

    cursor.forEach(
        function(doc) {
            numMatches = numMatches + 1;
            console.log( doc );
        },
        function(err) {
            assert.equal(err, null);
            console.log("Our query was:" + JSON.stringify(query));
            console.log("Matching documents: " + numMatches);
            return db.close();
        }
    );

});


function queryDocument(options) {

    console.log(options);

////////////////////////////////////////////////////////
// CONVENIENCE SYNTAX USING CURLY BRACES
    var query = {
        "founded_year": {
            "$gte": options.firstYear,
            "$lte": options.lastYear
        }
    };

////////////////////////////////////////////////////////
// DOT (.) NOTATION
    if ("employees" in options) {
        query.number_of_employees = { "$gte": options.employees };
    }

////////////////////////////////////////////////////////
// ARRAY ([]) SYNTAX
    if ("ipo" in options) {
        if (options.ipo == "yes") {
            query["ipo.valuation_amount"] = {"$exists": true, "$ne": null};
        } else if (options.ipo == "no") {
            query["ipo.valuation_amount"] = null;
        }
    }

    return query;

}


function commandLineOptions() {

    var cli = commandLineArgs([
        { name: "firstYear", alias: "f", type: Number },
        { name: "lastYear", alias: "l", type: Number },
        { name: "employees", alias: "e", type: Number },
        { name: "ipo", alias: "i", type: String }
    ]);

    var options = cli.parse()
    if ( !(("firstYear" in options) && ("lastYear" in options))) {
        console.log(cli.getUsage({
            title: "Usage",
            description: "The first two options below are required. The rest are optional."
        }));
        process.exit();
    }

    return options;

}
Command:
$ node app.js -f 2004 -l 2008 -e 100 -i yes
Our query was:{"founded_year":{"$gte":2004,"$lte":2008},"number_of_employees":{"$gte":100},"ipo.valuation_amount":{"$exists":true,"$ne":null}}
Matching documents: 11
$ node app.js -f 2004 -l 2008 -e 100 -i no
Our query was:{"founded_year":{"$gte":2004,"$lte":2008},"number_of_employees":{"$gte":100},"ipo.valuation_amount":null}
Matching documents: 322
Answer:
3 Solution to Dot Notation on Embedded Documents in Arrays
var MongoClient = require('mongodb').MongoClient,
    commandLineArgs = require('command-line-args'),
    assert = require('assert');


var options = commandLineOptions();


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = queryDocument(options);
    //////////////////////////////////////////////////////////////////////////////
    ////  You can use dot (.) notation to specify fields within embedded documents AND fields within documents that are embedded in arrays.
    var projection = {"_id": 0,
                      "name": 1,
                      "offices.country_code": 1,
                      "ipo.valuation_amount": 1};

    var cursor = db.collection('companies').find(query, projection);
    var numMatches = 0;

    cursor.forEach(
        function(doc) {
            numMatches = numMatches + 1;
            console.log( doc );
        },
        function(err) {
            assert.equal(err, null);
            console.log("Our query was:" + JSON.stringify(query));
            console.log("Matching documents: " + numMatches);
            return db.close();
        }
    );

});


function queryDocument(options) {

    console.log(options);

    var query = {
        "founded_year": {
            "$gte": options.firstYear,
            "$lte": options.lastYear
        }
    };

    if ("employees" in options) {
        query.number_of_employees = { "$gte": options.employees };
    }

    if ("ipo" in options) {
        if (options.ipo == "yes") {
            query["ipo.valuation_amount"] = {"$exists": true, "$ne": null};
        } else if (options.ipo == "no") {
            query["ipo.valuation_amount"] = null;
        }
    }

/////////////////////////////////////////////////////////
////   Dot Notation on Embedded Documents in Arrays
    if ("country" in options) {
        query["offices.country_code"] = options.country;
    }

    return query;

}


function commandLineOptions() {

    var cli = commandLineArgs([
        { name: "firstYear", alias: "f", type: Number },
        { name: "lastYear", alias: "l", type: Number },
        { name: "employees", alias: "e", type: Number },
        { name: "ipo", alias: "i", type: String },
        { name: "country", alias: "c", type: String }
    ]);

    var options = cli.parse()
    if ( !(("firstYear" in options) && ("lastYear" in options))) {
        console.log(cli.getUsage({
            title: "Usage",
            description: "The first two options below are required. The rest are optional."
        }));
        process.exit();
    }

    return options;

}

Commands:
$ node app-countries.js -f 2004 -l 2008 -e 100 -c IRL
Our query was:{"founded_year":{"$gte":2004,"$lte":2008},"number_of_employees":{"$gte":100},"offices.country_code":"IRL"}
Matching documents: 5
Answer:
3 Solution to Sort, Skip, and Limit in the Node.js Driver
var MongoClient = require('mongodb').MongoClient,
    commandLineArgs = require('command-line-args'),
    assert = require('assert');


var options = commandLineOptions();


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = queryDocument(options);
    var projection = {"_id": 0, "name": 1, "founded_year": 1,
                      "number_of_employees": 1};

    var cursor = db.collection('companies').find(query);
    cursor.project(projection);
    //cursor.sort({founded_year: -1}); // <-------- passing an object
    cursor.sort([["founded_year", 1], ["number_of_employees", -1]]);  // <------- passing an array

    var numMatches = 0;

    cursor.forEach(
        function(doc) {
            numMatches = numMatches + 1;
            console.log(doc.name + "\n\tfounded " + doc.founded_year +
                        "\n\t" + doc.number_of_employees + " employees");
        },
        function(err) {
            assert.equal(err, null);
            console.log("Our query was:" + JSON.stringify(query));
            console.log("Matching documents: " + numMatches);
            return db.close();
        }
    );

});


function queryDocument(options) {

    var query = {
        "founded_year": {
            "$gte": options.firstYear,
            "$lte": options.lastYear
        }
    };

    if ("employees" in options) {
        query.number_of_employees = { "$gte": options.employees };
    }

    return query;

}


function commandLineOptions() {

    var cli = commandLineArgs([
        { name: "firstYear", alias: "f", type: Number },
        { name: "lastYear", alias: "l", type: Number },
        { name: "employees", alias: "e", type: Number }
    ]);

    var options = cli.parse()
    if ( !(("firstYear" in options) && ("lastYear" in options))) {
        console.log(cli.getUsage({
            title: "Usage",
            description: "The first two options below are required. The rest are optional."
        }));
        process.exit();
    }

    return options;

}

Command:
$ node app-sort.js -f 2006 -l 2009 -e 100
Our query was:{"founded_year":{"$gte":2006,"$lte":2009},"number_of_employees":{"$gte":100}}
Matching documents: 204

Skip & Limit:

var MongoClient = require('mongodb').MongoClient,
    commandLineArgs = require('command-line-args'),
    assert = require('assert');


var options = commandLineOptions();


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = queryDocument(options);
    var projection = {"_id": 0, "name": 1, "founded_year": 1,
                      "number_of_employees": 1};

    var cursor = db.collection('companies').find(query);
    cursor.project(projection);
    cursor.limit(options.limit);
    cursor.skip(options.skip);
    cursor.sort([["founded_year", 1], ["number_of_employees", -1]]);

    var numMatches = 0;

    cursor.forEach(
        function(doc) {
            numMatches = numMatches + 1;
            console.log(doc.name + "\n\tfounded " + doc.founded_year +
                        "\n\t" + doc.number_of_employees + " employees");
        },
        function(err) {
            assert.equal(err, null);
            console.log("Our query was:" + JSON.stringify(query));
            console.log("Documents displayed: " + numMatches);
            return db.close();
        }
    );

});

function queryDocument(options) {

    console.log(options);

    var query = {
        "founded_year": {
            "$gte": options.firstYear,
            "$lte": options.lastYear
        }
    };

    if ("employees" in options) {
        query.number_of_employees = { "$gte": options.employees };
    }

    return query;

}


function commandLineOptions() {

    var cli = commandLineArgs([
        { name: "firstYear", alias: "f", type: Number },
        { name: "lastYear", alias: "l", type: Number },
        { name: "employees", alias: "e", type: Number },
        { name: "skip", type: Number, defaultValue: 0 },
        { name: "limit", type: Number, defaultValue: 20000 }
    ]);

    var options = cli.parse()
    if ( !(("firstYear" in options) && ("lastYear" in options))) {
        console.log(cli.getUsage({
            title: "Usage",
            description: "The first two options below are required. The rest are optional."
        }));
        process.exit();
    }

    return options;

}
Commands:
$ node app-sortSkipLimit.js -f 2006 -l 2009 -e 250 --limit 10 --skip 0
Lesson: MongoDB will always (1) Sort, (2) Skip, then (3) Limit
Answer:
Katherine, Stacy
3 Solution for insertOne() and insertMany() in the Node.js Driver
Lecture Notes

Twitter Developer Documentation is here: https://dev.twitter.com/overview/documentation

Documentation on the Twitter streaming API is here: https://dev.twitter.com/streaming/overview

Documentation on the Twitter REST API is here: https://dev.twitter.com/rest/public

To use any of the Twitter APIs you will need access tokens. The simplest means of acquiring access tokens is described here: https://dev.twitter.com/oauth/overview/application-owner-access-tokens

The Twitter API client library for Node.js that I used in the lessons is found here: https://www.npmjs.com/package/twitter

Note that you can place your access tokens in a separate file (.env) and use the following package to load them. https://www.npmjs.com/package/dotenv

The package.json file for this lesson contains the dependencies for the twitter and dotenv packages. See the applications in the handouts for examples of how to use. The documentation for the twitter and nodenv packages provides details on setting up your tokens as environment variables, loading them, and using them to access the twitter API.
var MongoClient = require('mongodb').MongoClient,
    Twitter = require('twitter'),
    assert = require('assert');

require('dotenv').load();
var twitterClient = new Twitter({
    consumer_key: process.env.TWITTER_CONSUMER_KEY,
    consumer_secret: process.env.TWITTER_CONSUMER_SECRET,
    access_token_key: process.env.TWITTER_ACCESS_TOKEN_KEY,
    access_token_secret: process.env.TWITTER_ACCESS_TOKEN_SECRET
});


MongoClient.connect('mongodb://localhost:27017/social', function(err, db) {

    assert.equal(null, err);
    console.log("Successfully connected to MongoDB.");

    twitterClient.stream('statuses/filter', {track: "marvel"}, function(stream) {
        stream.on('data', function(status) {
            console.log(status.text);
            db.collection("statuses").insertOne(status, function(err, res) {
                console.log("Inserted document with _id: " + res.insertedId + "\n");
            });
        });

        stream.on('error', function(error) {
            throw error;
        });
    });

});
Command:
$ node app-insertOne.js
mongo
> db.statuses.find({},{text: 1, _id: 0}).pretty() # Project out just the text & filter out _id

insertMany

var MongoClient = require('mongodb').MongoClient,
    Twitter = require('twitter'),
    assert = require('assert');

require('dotenv').load();
var client = new Twitter({
    consumer_key: process.env.TWITTER_CONSUMER_KEY,
    consumer_secret: process.env.TWITTER_CONSUMER_SECRET,
    access_token_key: process.env.TWITTER_ACCESS_TOKEN_KEY,
    access_token_secret: process.env.TWITTER_ACCESS_TOKEN_SECRET
});


MongoClient.connect('mongodb://localhost:27017/social', function(err, db) {

    assert.equal(null, err);
    console.log("Successfully connected to MongoDB.");

    var screenNames = ["Marvel", "DCComics", "TheRealStanLee"];
    var done = 0;

    screenNames.forEach(function(name) {

        var cursor = db.collection("statuses").find({"user.screen_name": name});
        cursor.sort({ "id": -1 });
        cursor.limit(1);

        cursor.toArray(function(err, docs) {
            assert.equal(err, null);

            var params;
            if (docs.length == 1) {
                params = { "screen_name": name, "since_id": docs[0].id, "count": 10 };
            } else {
                params = { "screen_name": name, "count": 10 };
            }

            client.get('statuses/user_timeline', params, function(err, statuses, response) {

                assert.equal(err, null);

                db.collection("statuses").insertMany(statuses, function(err, res) {

                    console.log(res);

                    done += 1;
                    if (done == screenNames.length) {
                        db.close();
                    }

                });
            });
        })
    });
});
Command:
$ node app-insertMany.js
Answer:
3 Solution for deleteOne() and deleteMany() in the Node.js Driver Create index:
> db.companies.find({permalink: "thomson-reuters"}, {name: 1, update_at: 1})
> db.companies.createIndex({permalink: 1})
{
    "createdCollectionAutomatically" : false,
    "numIndexBefore" : 1,
    "numIndexesAfter" : 2,
    "ok" : 1
}

app-deleteOne.js file

var MongoClient = require('mongodb').MongoClient,
    assert = require('assert');


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = {"permalink": {"$exists": true, "$ne": null}};
    var projection = {"permalink": 1, "updated_at": 1};

    var cursor = db.collection('companies').find(query);
    cursor.project(projection);
    cursor.sort({"permalink": 1})

    var numToRemove = 0;

    var previous = { "permalink": "", "updated_at": "" };
    cursor.forEach(
        function(doc) {

            if ( (doc.permalink == previous.permalink) && (doc.updated_at == previous.updated_at) ) {
                console.log(doc.permalink);

                numToRemove = numToRemove + 1;

                var filter = {"_id": doc._id};

                db.collection('companies').deleteOne(filter, function(err, res) {

                    assert.equal(err, null);
                    console.log(res.result);

                });

            }

            previous = doc;

        },
        function(err) {

            assert.equal(err, null);

        }
    );

});
Command to run:
$ node app-deleteOne.js

Reset a Database

> db
crunchbase
> db.dropDatabase()
{ "dropped" : "crunchbase", "ok" : 1 }
> mongoimport -d crunchbase -c companies companies.json

deleteMany

var MongoClient = require('mongodb').MongoClient,
    assert = require('assert');


MongoClient.connect('mongodb://localhost:27017/crunchbase', function(err, db) {

    assert.equal(err, null);
    console.log("Successfully connected to MongoDB.");

    var query = {"permalink": {$exists: true, $ne: null}};
    var projection = {"permalink": 1, "updated_at": 1};

    var cursor = db.collection('companies').find(query);
    cursor.project(projection);
    cursor.sort({"permalink": 1})

    var markedForRemoval = [];

    var previous = { "permalink": "", "updated_at": "" };
    cursor.forEach(
        function(doc) {

            if ( (doc.permalink == previous.permalink) && (doc.updated_at == previous.updated_at) ) {
                markedForRemoval.push(doc._id);
            }

            previous = doc;
        },
        function(err) {

            assert.equal(err, null);

            var filter = {"_id": {"$in": markedForRemoval}};

            db.collection("companies").deleteMany(filter, function(err, res) {

                console.log(res.result);
                console.log(markedForRemoval.length + " documents removed.");

                return db.close();
            });
        }
    );

});
Command to run:
$ node app-deleteMany.js
Answer:
Homework: 3.1

Question:

When using find() in the Node.js driver, which of the following best describes when the driver will send a query to MongoDB?
Answer:
When we call a cursor method passing a callback function to process query results.
Homework: 3.2

Question:

Suppose you have a MongoDB collection called school.grades that is composed solely of these 20 documents.
var cursor = db.collection("grades").find({});
cursor.skip(6);
cursor.limit(2);
cursor.sort({"grade": 1});

Which student's documents will be returned as part of a subsequent call to toArray()?
Answer:
Bob, Seamus
Homework: 3.3 Import companies.json:
> use crunchbase
> db.dropDatabase()
> exit
$ mongoimport -d crunchbase -c companies companies.json

/*

Homework Description:

This application depends on the companies.json dataset distributed as a handout with the
"find() and Cursors in the Node.js Driver" lesson. You must first import that collection. Please ensure
you are working with an unmodified version of the collection before beginning this
exercise.

To import a fresh version of the companies.json data, please type the following:

mongoimport -d crunchbase -c companies companies.json


If you have already mongoimported this data you will first need to drop the crunchbase database
in the Mongo shell. Do that by typing the following two commands, one at a time, in the Mongo shell:

use crunchbase
db.dropDatabase()


The code below is complete with the exception of the queryDocument() function.
As in the lessons, the queryDocument() function builds an object that will be passed to find()
to match a set of documents from the crunchbase.companies collection.

For this assignment, please complete the queryDocument() function as described in the TODO
comments you will find in that function.


Once complete, run this application by typing:

node buildingQueryDocuments.js


When you are convinced you have completed the application correctly, please enter the
average number of employees per company reported in the output. Enter only the number reported.
It should be three numeric digits.

As a check that you have completed the exercise correctly, the total number of unique companies
reported by the application should equal 42.

If the grading system does not accept the first solution you enter, please do not make further
attempts to have your solution graded without seeking some help in the discussion forum.

*/

var MongoClient = require('mongodb').MongoClient,
    assert = require('assert');


var allOptions = [
    {
        firstYear: 2002,
        lastYear: 2016,
        city: "Palo Alto"
    },
    {
        lastYear: 2010,
        city: "New York"
    },
    {
        city: "London"
    }
];

var numQueriesFinished = 0;
var companiesSeen = {};

for (var i=0; i

Answer:
169
Homework: 3.4 Solution:
/*

Homework Description:

In completing this exercise, you will find the following lesson helpful as a refresher on the $or
operator.
https://university.mongodb.com/courses/MongoDB/M101JS/2016_January/courseware/Week_2_CRUD/56955ef3d8ca393adc3abe5c

This application depends on the companies.json dataset distributed as a handout with the
"find() and Cursors in the Node.js Driver" lesson. You must first import that collection. Please ensure
you are working with an unmodified version of the collection before beginning this
exercise.

To import a fresh version of the companies.json data, please type the following:

mongoimport -d crunchbase -c companies companies.json


If you have already mongoimported this data you will first need to drop the crunchbase database
in the Mongo shell. Do that by typing the following two commands, one at a time, in the Mongo shell:

use crunchbase
db.dropDatabase()


The code below is complete with the exception of the queryDocument() function.
As in the lessons, the queryDocument() function builds an object that will be passed to find()
to match a set of documents from the crunchbase.companies collection.

For this assignment, please complete the queryDocument() function as described in the TODO
comments you will find in that function.


Once complete, run this application by typing:

node overviewOrTags.js


When you are convinced you have completed the application correctly, please enter the
average number of employees per company reported in the output. Enter only the number reported.
It should be two numeric digits.

As a check that you have completed the exercise correctly, the total number of unique companies
reported by the application should equal 194.

If the grading system does not accept the first solution you enter, please do not make further
attempts to have your solution graded without seeking some help in the discussion forum.


*/


var MongoClient = require('mongodb').MongoClient,
    assert = require('assert');

var allOptions = [
    {
        overview: "wiki",
    },
    {
        milestones: "CMO"
    }
];

var numQueriesFinished = 0;
var companiesSeen = {};

for (var i=0; i

Results:
$ node overviewOrTags.js
Successfully connected to MongoDB for query: 0
Successfully connected to MongoDB for query: 1
Query 1 was:{"milestones.source_description":{"$regex":"CMO","$options":"i"}}
Matching documents: 3
Query 0 was:{"$or":[{"overview":{"$regex":"wiki","$options":"i"}},{"tag_list":{"$regex":"wiki","$options":"i"}}]}
Matching documents: 206
Companies found: 123people,3721-internet-assistant...
Total employees in companies identified: 9496
Total unique companies: 194
Average number of employees per company: 48
Answer to populate:
48
4.1 Solution to Quiz: MongoDB Schema Design What's the single most important factor in designing your application schema within MongoDB?

Answer:
Matching the data access patterns of your application.
4.2 Solution to Quiz: Modeling a Blog in Documents Given the document schema that we proposed for the blog, how many collections would need to be accessed to display a blog post with its comments and tags?

Answer:
1
4.3 Solution to $regex in the Node.js Driver

Answer:
4.4 Solution to Quiz: Living Without Constraint
What does Living Without Constraints refer to?
Answer:
Keeping your data consistent even though MongoDB lacks foreign key constraints
4.5 Solution to Quiz: Living Without Transactions
Which of the following operations operate atomically within a single document? Check all that apply.
Answer: ALL!
Update
findAndModify
$addToSet (within an update)
$push within an update
4.6 Solution to Quiz: One to One Relations
What's a good reason you might want to keep two documents that are related to each other one-to-one in separate collections? Check all that apply.
Answer:
To reduce the working set size of your application.
    Because the combined size of the documents would be larger than 16MB
    
4.7 Solution to Quiz: One to Many Relations
When is it recommended to represent a one to many relationship in multiple collections?
Answer:
Whenever the many is large
    
4.8 Solution to Multikeys
> db.student.find()
{ "_id" : 0, "name" : "Steve Breese", "teachers" : [0,1] }
{ "_id" : 1, "name" : "Andrew Erlichson", "teachers" : [0, 1, 3] }
> db.teachers.find()
{ "_id" : 0, "name" : "Rene Castellanos" }
{ "_id" : 1, "name" : "John L. Hennessy" }
mongo:
> db.student.ensureIndex({'teachers':1})
{
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 1,
    "numIndexesAfter" : 2,
    "ok" : 1
}
> db.students.find({'teachers':{$all:[0,1]}})
{ "_id" : 0, "name" : "Steve Breese", "teachers" : [0,1] }
{ "_id" : 1, "name" : "Andrew Erlichson", "teachers" : [0, 1, 3] }
> db.students.find({'teachers':{$all:[0,1]}}).explain()
{
    "cursor" : "BtreeCursor teachers_1",
    "isMultiKey" : true,
}
4.9 Solution to Benefits of Embedding
Improved Read Performance
One Round Trip to the DB
Answer:
Whenever the many is large
    
4.10 Solution to Quiz: Trees
Given the following typical document for a e-commerce category hierarchy collection called categories
{
  _id: 34,
  name : "Snorkeling",
  parent_id: 12,
  ancestors: [12, 35, 90]
}
Which query will find all descendants of the snorkeling category?
Answer:
db.categories.find({ancestors:34})
    
4.1 Solution to Homework: Homework: 4.1
Given the following typical document for a e-commerce category hierarchy collection called categories
Suppose we are building a web site that will display companies data in several different views. Based on the lessons in this module and ignoring other concerns, which of the following conditions favor embedding milestones (as they are in the facebook.json example) over maintaining milestones in a separate collection. Check all that apply.
Wrong:
The number of milestones from a company rarely exceeds 10 per year.
An individual milestone entry will always be smaller than 16K bytes
One frequently displayed view of our data displays company details such as the "name", "founded_year", "twitter_username", etc. as well as milestones.

The number of milestones for a company rarely exceeds 10 per year.
Milestones will never contain more than 15 fields
An individual milestone entry will always be smaller than 16K bytes
One frequently displayed view of our data displays company details such as the "name", "founded_year", "twitter_username", etc. as well as milestones.

The number of milestones for a company rarely exceeds 10 per year.
Milestones will never contain more than 15 fields
An individual milestone entry will always be smaller than 16K bytes
One frequently displayed view of our data displays company details such as the "name", "founded_year", "twitter_username", etc. as well as milestones.
Some of the milestone fields such as "stoneable_type" and "stoneable" are frequently the same from one milestone to another.

The number of milestones for a company rarely exceeds 10 per year.
An individual milestone entry will always be smaller than 16K bytes
One frequently displayed view of our data displays company details such as the "name", "founded_year", "twitter_username", etc. as well as milestones.

The number of milestones for a company rarely exceeds 10 per year.
An individual milestone entry will always be smaller than 16K bytes
One frequently displayed view of our data displays company details such as the "name", "founded_year", "twitter_username", etc. as well as milestones.
Some of the milestone fields such as "stoneable_type" and "stoneable" are frequently the same from one milestone to another.

An individual milestone entry will always be smaller than 16K bytes
One frequently displayed view of our data displays company details such as the "name", "founded_year", "twitter_username", etc. as well as milestones.

Some of the milestone fields such as "stoneable_type" and "stoneable" are frequently the same from one milestone to another.

Correct Answer:
The number of milestones for a company rarely exceeds 10 per year.
One frequently displayed view of our data displays company details such as the "name", "founded_year", "twitter_username", etc. as well as milestones.
4.2 Solution to Homework: Homework: 4.2
Which of the following schemas will make it possible to find() all descendants of a category using a single query.
Wrong:
E - "children", "descendants"
Correct Answer: B
db.categories.insertOne({"_id": "Quantum Mechanics", "ancestors": ["Books", "Science", "Physics"], "parent": "Physics"})
db.categories.insertOne({"_id": "Classical Mechanics", "ancestors": ["Books", "Science", "Physics"], "parent": "Physics"})
db.categories.insertOne({"_id": "Physics", "ancestors": ["Books", "Science"], "parent": "Science"})
db.categories.insertOne({"_id": "Chemistry", "ancestors": ["Books", "Science"], "parent": "Science"})
db.categories.insertOne({"_id": "Science", "ancestors": ["Books"], "parent": "Books"})
db.categories.insertOne({"_id": "Books", "ancestors": [], "parent": null})
4.3 Solution to Homework: Homework: 4.3

Given:
{
    _id: 123456789,
    title: "Good Book",
    author: [ "Sam Goodman", "Mike Smith" ],
    published_date: ISODate("2010-09-24"),
    publisher_id: "Smith Publishing",
    available: 3,
    checkout: [ { patron_id: "33457", date: ISODate("2012-10-15") } ]
}
Which of the following is the primary advantage to this design?
Wrong:
3 - Can retrieve all data about a book, its publisher, and any patrons who checked out the book with a single query.
Correct Answer: 5th
5 - Can make atomic updates as books are checked out or turned in.
Storage Engines Solution to Quiz: Storage Engines: Introduction

The storage engine directly determines

Correct Answer:
The data file format
Format of indexes
MMAPv1 Solution to Quiz: Storage Engines: MMAPv1


Correct Answer:
MMAPv1 storage engine automatically allocates power-of-two-sized documents when new documents are inserted
MMAPv1 storage engine is built on top of the mmap system call that maps files into memory
MMAPv1 storage engine has collection level locking (not document level)
MMAPv1 does not manage memory
WiredTiger Solution to Quiz: Storage Engines: WiredTiger
killall mongod
mkdir WT
mongod --dbpath WT --storageEngine wiredTiger
> db.foo.stats()
{
    "WiredTiger" : {
        "formatVersion" : 1
    }
}

WiredTiger storage engine has
Document-level concurrency
Compression
Creating Indexes Solution to Quiz: Creating Indexes

Which optimization will typically have the greatest impact on the performance of a database.
> db.students.createIndex({student_id:1});
{
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 1,
    "numIndexesAfter" : 2,
    "ok" : 1
}
> db.students.explain().find({student_id:5});
    "winningPlan" :
        "indexName" : "student_id_1",
> db.students.explain(true).find({student_id:5});
    "docsExamined" : 10
> db.students.createIndex({student_id:1, class_id: -1 });

Please provide the mongo shell command to add an index to a collection named students, having the index key be class, student_name.
Answer:
> db.students.createIndex({class:1, student_name: 1 });
Indexes Solution to Quiz: Indexes

Which optimization will typically have the greatest impact on the performance of a database.


Answer:
Adding appropriate indexes on large collections so that only a small percentage of queries need to scan the collection.
Discovering (and Deleting) Indexes Solution to Quiz: Discovering (and Deleting) Indexes

> db.students.getIndexes();
> db.students.dropIndex({student_id:1});

Which of the following is a valid way to discover indexes for a collection in mongoDB?
Answer:
db.collection.getIndexes();
Multikey Indexes Solution to Quiz: Multikey Indexes

Suppose we have a collection foo that has an index created as follows:
db.foo.createIndex( { a:1, b:1 } )

Which of the following inserts are valid to this collection?
Answer:
db.foo.insert( { a : "grapes", b : "oranges" } )
db.foo.insert( { a : ["apples", "oranges" ], b : "grapes" } )
db.foo.insert( { a : "grapes", b : [ 8, 9, 10 ] } )
Dot Notation and Multikey Solution to Quiz: Dot Notation and Multikey

> db.students.createIndex({'scores.score':1});
> db.students.explain().find({'scores': {$elemMatch: {type:'exam', score{'$gt':99.8}}}});
    "winningPlan" :

Suppose you have a collection called people in the database earth with documents of the following form:
{
	"_id" : ObjectId("551458821b87e1799edbebc4"),
	"name" : "Eliot Horowitz",
	"work_history" : [
		{
			"company" : "DoubleClick",
			"position" : "Software Engineer"
		},
		{
			"company" : "ShopWiki",
			"position" : "Founder & CTO"
		},
		{
			"company" : "MongoDB",
			"position" : "Founder & CTO"
		}
	]
}
Answer:
db.people.createIndex({'work_history.company':-1});
Unique Indexes Solution to Quiz: Index Creation Option, Unique

> db.stuff.createIndex({thing:1});
> db.stuff.dropIndex({thing:1});
> db.stuff.remove({thing: 'apple'}, {justOne: true});
> db.stuff.createIndex({thing:1}, {unique:true});

Please provide the mongo shell command to create a unique index on student_id, class_id, ascending for the collection students.
> db.students.createIndex({student_id:1, class_id:1}, {unique:true});
Answer:


    
Sparse Indexes Solution to Quiz: Index Creation, Sparse

> db.employees.getIndexes();
> db.employees.createIndex({cell:1},{unique:true});
{
    "errmsg" : "E11000 duplicate key error index: test.employees.$cell_1 dup key: { : null }",
> db.employees.createIndex({cell:1},{unique:true, sparse: true});

Answer:
You can gain greater flexibility with creating Unique indexes. (when some documents lack the key you are indexing on)
The index will be smaller than it would if it were not sparse.
Background Indexes Solution to Quiz: Index Creation, Background

> db.students.getIndexes()
> db.students.createIndex({'scores.score':1});
> db.students.findOne();
    (BLOCKED while foreground index is running)
^C (do you want to kill the current op(s) on the server? (y/n): y
> db.students.createIndex({'scores.score':1},{background : true});

Which things are true about creating an index in the background in MongoDB. Check all that apply.
Answer:
Although the database server will continue to take requests, a background index creation still blocks the mongo shell that you are using to create the index.
Creating an index in the background takes longer than creating it in the foreground
Explain Solution to Quiz: Using Explain

> db.example.createIndex({a:1, b:1});
{
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 1,
    "numIndexesAfter" : 2,
    "ok" : 1
}
db.example.createIndex({b:1});
{
    "createdCollectionAutomatically" : false,
    "numIndexesBefore" : 2,
    "numIndexesAfter: : 3,
    "ok" : 1
}
> var exp = db.example.explain();
> exp.help();
Explainable operations
    .aggregate(...) - explain an aggregation operation
    .count(...) - explain a count operation
    .find(...) - get an explainable query
    .group(...) - explain a group operation
    .remove(...) - explain a remove operation
    .update(...) - explain an update operation
Explainable collection methods
    .getCollection()
    .getVerbosity
    .setVerbosity(verbosity)
> exp.find({a:17, b:55}).sort({b:-1});
> exp.find({c:200});
"winningPlan" : {
    "stage" : "COLLSCAN",  # Collection scan

> var cursor = db.example.find({a:99});
> cursor.explain();
> cursor.next();
{ "_id" : 99000, "a" : 99, "b", 0, "c" : 0 }

Which of the following are valid ways to find out which index uses a particular query? Check all that apply.
Answer:
db.example.explain().remove( { a : 1, b : 2 } )
db.example.explain().find( { a : 1, b : 2 } )
curs = db.example.find( { a : 1, b : 2 } ); curs.explain()
var exp = db.example.explain(); exp.find( { a : 1, b : 2 } )
db.example.find( { a : 1, b : 2 } ).explain()
Verbosity Solution to Quiz: Explain: Verbosity

> exp.find({a:17, b:55});
    "executionStats" : {
        "nReturned" : 100
        "executionTimeMillis" : 0,
        "totalKeysExamined" : 100,
        "totalDocsExamined" : 100,
> db.example.dropIndex({a:1, b:1});
> exp.find({a:17, b:55});
    "executionStats" : {
        "nReturned" : 100
        "executionTimeMillis" : 5,
        "totalKeysExamined" : 10000,
        "totalDocsExamined" : 10000,
> db.example.createIndex({a:1,b:1});
> var exp = db.example.explain("allPlansExecution"); # Gives you more information
Given the following output from explain, what is the best description of what happened during the query?
> exp = db.example.explain("executionStats")
Explainable(test.example)
> exp.find( { a : 7 } )
{
	"queryPlanner" : {
		"plannerVersion" : 1,
		"namespace" : "test.example",
		"indexFilterSet" : false,
		"parsedQuery" : {
			"a" : {
				"$eq" : 7
			}
		},
		"winningPlan" : {
			"stage" : "COLLSCAN",
			"filter" : {
				"a" : {
					"$eq" : 7
				}
			},
			"direction" : "forward"
		},
		"rejectedPlans" : [ ]
	},
	"executionStats" : {
		"executionSuccess" : true,
		"nReturned" : 10000,
		"executionTimeMillis" : 619,
		"totalKeysExamined" : 0,
		"totalDocsExamined" : 999999,
		"executionStages" : {
			"stage" : "COLLSCAN",
			"filter" : {
				"a" : {
					"$eq" : 7
				}
			},
			"nReturned" : 10000,
			"executionTimeMillisEstimate" : 520,
			"works" : 1000001,
			"advanced" : 10000,
			"needTime" : 990000,
			"needFetch" : 0,
			"saveState" : 7812,
			"restoreState" : 7812,
			"isEOF" : 1,
			"invalidates" : 0,
			"direction" : "forward",
			"docsExamined" : 999999
		}
	},
	"serverInfo" : {
		"host" : "cross-mb-air.local",
		"port" : 27017,
		"version" : "3.0.1",
		"gitVersion" : "534b5a3f9d10f00cd27737fbcd951032248b5952"
	},
	"ok" : 1
}
Answer: The query scanned 999,999 documents, returning 10,000 in 619 milliseconds.


Covered Queries Solution to Quiz: Covered Queries

> var exp = db.numbers.explain("executionStats");
> exp.find({i:45,j:23});
> exp.find({i:45, j:23},{_id:0, i:1, j:1, k:1});
    "executionStats"
        "nReturned: 100,
        "totalDocsExamined" : 0
You would like to perform a covered query on the example collection. You have the following indexes:
{ name : 1, dob : 1 }
{ _id : 1 }
{ hair : 1, name : 1 }
Which of the following is likely to be a covered query?
Answer:
  • db.example.find( { name : { $in : [ "Bart", "Homer" ] } }, {_id : 0, hair : 1, name : 1} )(dob is missing)
  • db.example.find( { _id : 1117008 }, { _id : 0, name : 1, dob : 1 } )(name & dob do not appear in the index)
  • db.example.find( { name : { $in : [ "Bart", "Homer" ] } }, {_id : 0, dob : 1, name : 1} )
  • db.example.find( { name : { $in : ["Alfred", "Bruce" ] } }, { name : 1, hair : 1 } ) (dob is missing & _id is being returned)
When Solution to Quiz: When is an Index Used?

Given collection foo with following index:
db.foo.createIndex( { a : 1, b : 1, c : 1 } )
Which of the following queries will use the index?
Answer:
  • db.foo.find( { a : 3 } )
  • db.foo.find( { b : 3, c : 4 } ) (b is not left-subset)
  • db.foo.find( { c : 1 } ).sort( { a : 1, b : 1 } )
  • db.foo.find({c:1}).sort({a:-1, b:1}) (sorting doesn't match index orientation)
Index Size Solution to Quiz: How Large is Your Index?

> db.students.stats()
    "totalIndexSize" : 668003728,
    "indexSizes" : {
        "_id_" : 324456384,
        "student_id_1_class_id_1" : 343547344
    }
> db.students.totalIndexSize()
668003728
Is it more important that your index or your data fit into memory?
Answer: Index


    
Index Entries
Index Cardinaltiy
Solution to Quiz: Number of Index Entries

Let's say you update a document with a key called tags and that update causes the document to need to get moved on disk. Assume you are using the MMAPv1 storage engine. If the document has 100 tags in it, and if the tags array is indexed with a multikey index, how many index points need to be updated in the index to accommodate the move?
Answer: 100
Geospatial Solution to Quiz: Geospatial Indexes

> db.stores.createIndex({ location: '2d', type:1})
> db.stores.getIndexes()
> db.stores.find({location:{$near:[50,50]}})
Suppose you have a 2D geospatial index defined on the key location in the collection places. Write a query that will find the closest three places (the closest three documents) to the location 74, 140.
Answer:
db.places.find( { location : { $near : [74,140] } }).limit(3)
Geospatial Spherical Solution to Geospatial Spherical

> db.places.find().pretty()
{
    "name" : Apple Store",
    "city" : Palo Alto",
    "location" : {
        "type" : "Point",
        "coordinates" : [ -122, 37 ]
    }
}
> db.places.createIndex({'location': '2dsphere'})
> db.places.getIndexes()
> db.places.find({
    location:{
        $near: {
            $geometry: {
                type: "Point",
                coordinates: [-122, 37]},
            $maxDistance: 2000
        }
    }
}).pretty()
$ mongo < geonear.js # Redirect from this file to run the query.
Answer:
db.stores.find({
    loc:{
        $near: {
            $geometry: {
                type: "Point",
                coordinates: [-130, 39]},
            $maxDistance: 1000000
        }
    }
})
Text Indexes Solution to Quiz: Text Indexes

> db.sentences.find({'words':'dog shrub ruby.'})
{ "_id" : ObjectId(""), "words" : "dog shrub ruby." }
> db.sentences.find({'words' : 'dog'})
    (none found)
> db.sentences.createIndex({'words':'text'})
> db.sentences.find({$text:{$search:'dog'}})
> db.sentences.find({$text:{$search:'dog moss Ruby.'}})  # Logical or operator applied!!!
> db.sentences.find({$text:{$search:'dog tree obsidian'}}, {DisScore:{$meta: 'textScore'}}).sort({DisScore:{$meta:'textScore'}})
You create a text index on the "title" field of the movies collection, and then perform the following text search:
> db.movies.find( { $text : { $search : "Big Lebowski" } } )
Which of the following documents will be returned, assuming they are in the movies collection? Check all that apply.
Answer: ALL
  • { "title" : "The Big Lebowski" , star: "Jeff Bridges" }
  • { "title" : "Big" , star : "Tom Hanks" }
  • { "title" : "Big Fish" , star: "Ewan McGregor" }
Efficiency of Index Use Solution to Quiz: Efficiency of Index Use

> db.students.find({student_id: {$gt: 500000}, class_id: 54}).sort({student_id: 1}).explain()
    "executionStats" : {
        "nReturned" : 10118,
        "executionTimeMillis" : 2900,  # SLOW!!!
        "totalKeysExamined" : 850433,  # POOR SELECTIVITIY!!!
    "winningPlan" : {  # used a compound index based on
        "stage" : "IXSCAN",
        "keyPattern" : {
            "student_id" : 1,
            "class_id" : 1
        }
    "rejectedPlans" : [
# index on class_id would be substantially more selective, so let's use HINT:
> db.students.find({student_id: {$gt: 500000}, class_id: 54}).sort({student_id: 1}).hint({class_id:1}).explain("executionstats")
    "executionStats" : {
        "nReturned" : 10118,
        "executionTimeMillis" 79,    # Much faster!!!
        "totalKeysExamined" : 20071, # Much less!!!

In general, based on the preceding lecture, what is the primary factor that determines how efficiently an index can be used?
Answer: The selectivity of the index

        

Selectivity is the primary factor that determines how efficiently an index can be used. Ideally, the index enables us to select only those records required to complete the result set, without the need to scan a substantially larger number of index keys (or documents) in order to complete the query. Selectivity determines how many records any subsequent operations must work with. Fewer records means less execution time.

Efficiency of Index Use Example Solution to Quiz: Efficiency of Index Use Example

> db.students.createIndex({class_id: 1, final_grade: 1, student_id: 1})
> db.students.find({student_id: {$gt: 500000}, class_id: 54}).sort({final_grade: -1}).explain("executionStats")
    "winningPlan" : {   # No SORT stage!!!
        "inputStage" : {
            "stage" : "IXSCAN",
            "keyPattern" : {    # Used our new index!!!
                "class_id" : 1,     # Class is very selective
                "final_grade" : 1,  # Walk index keys in order to get sorted result set
                "student_id" : 1    # Match student_id: {$gt: 500000} range constraint
            }
    "executionStats" : {
        "executionTimeMillis" : 27  # Super fast!!!

In general, which of the following rules of thumb should you keep in mind when building compound indexes? Check all that apply. For this question, use the following definitions:
equality field: field on which queries will perform an equality test
sort field: field on which queries will specify a sort
range field: field on which queries perform a range test
Answer:
Equality fields before range fields
Sort fields before range fields
Equality fields before sort fields
Default logging facility Solution to

> db.students.find({students_id:10000})
{  "_id" : ObjectId(""), "student_id" : 10000, "scores" : [ { "type" : "exam", "score" : 23.69 }, { "type" : "quiz", "score" :
Sun Nov 18 [conn1] query school.students query: { student_id: 10000.0 } ntoreturn:0 nscanned:10000000 ... 4241ms  # took 4 seconds!!!
No question!


    
Profiling Solution to Quiz: Profiling

$ mongod -dbpath /usr/local/var/mongodb --profile 1 --slowms 2
> db.students.find({student_id:10000})  # <--- Slow query!!
> db.system.profile.find().pretty()
    "ns" : "school.students",
    "query" : {
        "student_id" : 10000
    }
    "nscanned" : 10000000,  # <-- Too many!!!
    "nreturned: 1,
    "millis" : 4231,  # <-- Took too long!!! (4 seconds)
> db.system.profile.find({ns:/school.students/}).sort({ts:1}).pretty()
> db.system.profile.find({millis:{$gt:1}}).sort({ts:1}).pretty()  # sort by timestamp

Turn on profiler from shell
> db.getProfilingLevel()
1
> db.getProfilingStatus()
{"was" : 1, "slowms" : 2 }
> db.setProfilingStatus()
# Set the profiling level:
> db.setProfilingLevel(1,4) # <-- profile queries taking 4 seconds or longer
{ "was" : 1, "slowms" : 2, "ok" : 1 }
# Turn off profiling:
> db.setProfilingLevel(0)
{ "was" : 1, "slowms" : 2, "ok" : 1 }
> db.getProfilingStatus()
{"was" : 0, "slowms" : 4 }
> db.getProfilingLevel()
0
Write the query to look in the system profile collection for all queries that took longer than one second, ordered by timestamp descending.
Answer:
db.system.profile.find({millis:{$gt:1000}}).sort({ts:-1}).pretty()
Mongotop
Mongostat
Solution to Quiz: Mongostat

mongotop provides a method to track the amount of time a MongoDB instance spends reading and writing data. mongotop provides statistics on a per-collection level. By default, mongotop returns values every second.

$ mongotop 3
$ mongo --port 27018
> use school
> db.students.count();
$ mongostat --port 27018
insert query update delete
Which of the following statements about mongostat output are true?
Answer:
The getmore column concerns the number of requests per time interval to get additional data from a cursor
the faults column appears only in the mmapv1 output
5.1 Solution to Homework: Homework 5.1

Wrong answer:
  • db.products.find( { 'brand' : "GE" } )
  • db.products.find( { 'brand' : "GE" } ).sort( { price : 1 } )
  • db.products.find( { $and : [ { price : { $gt : 30 } },{ price : { $lt : 50 } } ] } ).sort( { brand : 1 } )
  • db.products.find( { brand : 'GE' } ).sort( { category : 1, brand : -1 } )
Wrong answer:
  • db.products.find( { 'brand' : "GE" } )
  • db.products.find( { 'brand' : "GE" } ).sort( { price : 1 } )
  • db.products.find( { $and : [ { price : { $gt : 30 } },{ price : { $lt : 50 } } ] } ).sort( { brand : 1 } )
  • db.products.find( { brand : 'GE' } ).sort( { category : 1, brand : -1 } )

The following 2 combinations are wrong:
"$and" & "category : 1, brand : -1"
"price : 1" & "category : 1, brand : -1"
I now realize that the "category : 1, brand : -1" is wrong because either all or none of the keys must have reverse sort direction.

Correct way to solve:
> db.createCollection("products")
> db.products.insert(
   [
     { item: "digital camera", brand: "GE", price: 119.95, category: 'electronics' },
     { item: "Walkman", brand: "Sony", price: 99.95, category: 'electronics' },
     { item: "iPod", brand: "Apple", price: 399.95, category: 'electronics' },
     { item: "Civic", brand: "Honda", price: 10000, category: 'car' },
     { item: "micorave", brand: "GE", price: 139.95, category: 'appliance' }
   ]
)
BulkWriteResult({
	"writeErrors" : [ ],
	"writeConcernErrors" : [ ],
	"nInserted" : 5,
	"nUpserted" : 0,
	"nMatched" : 0,
	"nModified" : 0,
	"nRemoved" : 0,
	"upserted" : [ ]
})
> db.products.createIndex({	"price" : -1 });
> db.products.createIndex({ "category" : 1, "brand" : 1 });
> db.products.explain().find( { 'brand' : "GE" } );
> db.products.explain().find( { 'brand' : "GE" } ).sort( { price : 1 } );
			"inputStage" : {
				"stage" : "IXSCAN",
				"keyPattern" : {
					"price" : -1
				},
> db.products.explain().find( { $and : [ { price : { $gt : 30 } },{ price : { $lt : 50 } } ] } ).sort( { brand : 1 } );
						"inputStage" : {
							"stage" : "IXSCAN",
							"keyPattern" : {
								"price" : -1
							},
> db.products.explain().find( { brand : 'GE' } ).sort( { category : 1, brand : -1 } )
				"inputStage" : {
					"stage" : "COLLSCAN",
> db.students.getIndexes();
> db.students.dropIndex({student_id:1});
Answer:
db.products.find( { 'brand' : "GE" } ).sort( { price : 1 } )
db.products.find( { $and : [ { price : { $gt : 30 } },{ price : { $lt : 50 } } ] } ).sort( { brand : 1 } )
5.2 Solution to Homework: Homework 5.2


Answer:
  • The query uses an index to determine the order in which to return result documents.
  • The query examines 251120 documents.
5.3 Solution to Homework: Homework 5.3

$ mongoimport -d m101 -c profile < sysprofile.json
connected to: localhost
imported 1515 documents
> use m101
> db.profile.find({ns:/school2.students/}).sort({millis:-1}).limit(1).pretty()
Answer: 15820


    
Aggregation Framework Solution to Quiz: Introduction to the Aggregation Framework
Which of the following are true of the aggregation pipeline?
Aggregation pipelines are similar to Linux shell pipelines.
The task a pipeline stage performs is tunable.
Pipelines use a MongoDB collection as input.
You may include a particular type of stage in an aggregation pipeline multiple times.
An individual stages within a pipeline performs a single task.
Filter Aggregation Operations Solution to Quiz: Familiar Aggregation Operations
> db.companies.aggregate([
    { $match : { founded_year: 2004 } },
    { $sort: { name: 1 } },
    { $skip: 10 },
    { $limit: 5 },
    { $project: {
        _id: 0,
        name: 1
    }  }
])

Which of the following are true with respect to constructing aggregation pipelines? Check all that apply.
  • You should try to include $match stages as early as possible in your pipeline.
  • To filter documents using a $match stage, we use the same syntax for constructing query documents (filters) as we do for find().
Reshaping Documents in $project Stages Solution to Quiz: Reshaping Documents in $project Stages
> db.companies.aggregate([
    { $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } }
    ,
    { $project: {
        _id: 0,
        name: 1,
        ipo: "$ipo.pub_year",
        valuation: "$ipo.valuation_amount",
        funders: "$funding_rounds.investments.financial_org.permalink"
    } }
]).pretty()
> db.companies.aggregate([
    { $match: {"funding_rounds.investments.financial_org.permalink": "greylock } }
    ,
    { $projet: {
        _id: 0,
        name: 1,
        founded: {
            year: "$founded_year",
            month: "$founded_month",
            day: "$found_day"
        }
    } }
]).pretty()

db.companies.aggregate([
    { $match: {"name": "Facebook" } }
    ,
    { $project: {
        _id: 0,
        name: 1,
        people: "$relationships.person.last_name"
    } }
])

What would be third entry in the "people" array for Facebook if this query were run against the CrunchBase data set? If you can't figure it out, do a little experimentation using an aggregation query. You might want to add a $match stage to the pipeline provided.
Answer:Sandberg
$unwind
db.companies.aggregate([
    { $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
    { $project: {
        _id: 0,
        name: 1,
        amount: "$funding_rounds.raised_amount",
        year: "$funding_rounds.funded_year"
    } }
])


// unwind
db.companies.aggregate([
    { $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
    { $unwind: "$funding_rounds" },
    { $project: {
        _id: 0,
        name: 1,
        amount: "$funding_rounds.raised_amount",
        year: "$funding_rounds.funded_year"
    } }
])



// Add funder to output documents.
db.companies.aggregate([
    { $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
    { $unwind: "$funding_rounds" },
    { $project: {
        _id: 0,
        name: 1,
        funder: "$funding_rounds.investments.financial_org.permalink",
        amount: "$funding_rounds.raised_amount",
        year: "$funding_rounds.funded_year"
    } }
])
Array Expressions
db.companies.aggregate([
    { $match: {"funding_rounds.investments.financial_org.permalink": "greylock" } },
    { $project: {
        _id: 0,
        name: 1,
        founded_year: 1,
        rounds: { $filter: {
            input: "$funding_rounds",
            as: "round",
            cond: { $gte: ["$$round.raised_amount", 100000000] } } }
    } },
    { $match: {"rounds.investments.financial_org.permalink": "greylock" } },
]).pretty()





db.companies.aggregate([
    { $match: { "founded_year": 2010 } },
    { $project: {
        _id: 0,
        name: 1,
        founded_year: 1,
        first_round: { $arrayElemAt: [ "$funding_rounds", 0 ] },
        last_round: { $arrayElemAt: [ "$funding_rounds", -1 ] }
    } }
]).pretty()


db.companies.aggregate([
    { $match: { "founded_year": 2010 } },
    { $project: {
        _id: 0,
        name: 1,
        founded_year: 1,
        first_round: { $slice: [ "$funding_rounds", 1 ] },
        last_round: { $slice: [ "$funding_rounds", -1 ] }
    } }
]).pretty()

db.companies.aggregate([
    { $match: { "founded_year": 2010 } },
    { $project: {
        _id: 0,
        name: 1,
        founded_year: 1,
        early_rounds: { $slice: [ "$funding_rounds", 1, 3 ] }
    } }
]).pretty()

db.companies.aggregate([
    { $match: { "founded_year": 2004 } },
    { $project: {
        _id: 0,
        name: 1,
        founded_year: 1,
        total_rounds: { $size: "$funding_rounds" }
    } }
]).pretty()
Using Accumulators in $project Stages Answer to Using Accumulators in $project Stages
db.companies.aggregate([
    { $match: { "funding_rounds": { $exists: true, $ne: [ ]} } },
    { $project: {
        _id: 0,
        name: 1,
        largest_round: { $max: "$funding_rounds.raised_amount" }
    } }
])


db.companies.aggregate([
    { $match: { "funding_rounds": { $exists: true, $ne: [ ]} } },
    { $project: {
        _id: 0,
        name: 1,
        total_funding: { $sum: "$funding_rounds.raised_amount" }
    } }
])


db.companies.aggregate([
    { $group: {
        _id: { founded_year: "$founded_year" },
        average_number_of_employees: { $avg: "$number_of_employees" }
    } },
    { $sort: { average_number_of_employees: -1 } }

])
$group
db.companies.aggregate([
    { $group: {
        _id: { founded_year: "$founded_year" },
        average_number_of_employees: { $avg: "$number_of_employees" }
    } },
    { $sort: { average_number_of_employees: -1 } }

])

db.companies.aggregate( [
    { $match : { founded_year : 2001 } },
    { $project : { _id: 0, name : 1, number_of_employees: 1 } },
    { $sort : { number_of_employees : -1 } }
] )


db.companies.aggregate( [
    { $match: { "relationships.person": { $ne: null } } },
    { $project: { relationships: 1, _id: 0 } },
    { $unwind: "$relationships" },
    { $group: {
        _id: "$relationships.person",
        count: { $sum: 1 }
    } },
    { $sort: { count: -1 } }
] )


_id in $group Stages Solution to _id in $group Stages
db.companies.aggregate([
    { $match: { founded_year: { $gte: 2010 } } },
    { $group: {
        _id: { founded_year: "$founded_year"},
        companies: { $push: "$name" }
    } },
    { $sort: { "_id.founded_year": 1 } }
]).pretty()


db.companies.aggregate([
    { $match: { founded_year: { $gte: 2010 } } },
    { $group: {
        _id: "$founded_year",
        companies: { $push: "$name" }
    } },
    { $sort: { "_id": 1 } }
]).pretty()



db.companies.aggregate([
    { $match: { founded_year: { $gte: 2010 } } },
    { $group: {
        _id: { founded_year: "$founded_year", category_code: "$category_code" },
        companies: { $push: "$name" }
    } },
    { $sort: { "_id.founded_year": 1 } }
]).pretty()


db.companies.aggregate([
    { $group: {
        _id: { ipo_year: "$ipo.pub_year" },
        companies: { $push: "$name" }
    } },
    { $sort: { "_id.ipo_year": 1 } }
]).pretty()


db.companies.aggregate( [
    { $match: { "relationships.person": { $ne: null } } },
    { $project: { relationships: 1, _id: 0 } },
    { $unwind: "$relationships" },
    { $group: {
        _id: "$relationships.person",
        count: { $sum: 1 }
    } },
    { $sort: { count: -1 } }
] )

$group vs. $project Solution to $group vs. $project
db.companies.aggregate([
    { $match: { funding_rounds: { $ne: [ ] } } },
    { $unwind: "$funding_rounds" },
    { $sort: { "funding_rounds.funded_year": 1,
               "funding_rounds.funded_month": 1,
               "funding_rounds.funded_day": 1 } },
    { $group: {
        _id: { company: "$name" },
        funding: {
            $push: {
                amount: "$funding_rounds.raised_amount",
                year: "$funding_rounds.funded_year"
            } }
    } },
] ).pretty()



db.companies.aggregate([
    { $match: { funding_rounds: { $exists: true, $ne: [ ] } } },
    { $unwind: "$funding_rounds" },
    { $sort: { "funding_rounds.funded_year": 1,
               "funding_rounds.funded_month": 1,
               "funding_rounds.funded_day": 1 } },
    { $group: {
        _id: { company: "$name" },
        first_round: { $first: "$funding_rounds" },
        last_round: { $last: "$funding_rounds" },
        num_rounds: { $sum: 1 },
        total_raised: { $sum: "$funding_rounds.raised_amount" }
    } },
    { $project: {
        _id: 0,
        company: "$_id.company",
        first_round: {
            amount: "$first_round.raised_amount",
            article: "$first_round.source_url",
            year: "$first_round.funded_year"
        },
        last_round: {
            amount: "$last_round.raised_amount",
            article: "$last_round.source_url",
            year: "$last_round.funded_year"
        },
        num_rounds: 1,
        total_raised: 1,
    } },
    { $sort: { total_raised: -1 } }
] ).pretty()





db.companies.find({ name: "Fox Interactive Media" })

HW 6.1
db.companies.aggregate( [
{ $match: { "relationships.person": { $ne: null } } },
{ $project: { relationships: 1, _id: 0 } },
{ $unwind: "$relationships" },
{ $group: {
_id: "$relationships.person",
count: { $sum: 1 }
} },
{ $sort: { count: -1 } }
] )


Incorrect answers:
  • 23 (23 is the total number of relationships Eric Di Benedetto is found in)
Find the number of unique companies he is associated with.
Answer: 15 Query:
db.companies.aggregate( [
    { $match: { "relationships.person": { $ne: null } } },
    { $project: { name: 1, relationships: 1, _id: 0 } },
    { $unwind: "$relationships" },
    { $group: {
    	_id: { founded_year: "$relationships.person", name: "$name" },
        count: { $sum: 1 }
    } },
    { $match: { "_id.founded_year.permalink": "eric-di-benedetto" } },
    { $group: {
    	_id: { this_dude: "$_id.founded_year.permalink"},
        count: { $sum: 1 }
    } }
] )
HW 6.2
$ mongoimport -d test -c grades --drop grades.json
2016-02-10T12:28:32.102-0600	connected to: localhost
2016-02-10T12:28:32.103-0600	dropping: test.grades
2016-02-10T12:28:32.172-0600	imported 280 documents
> use test
> db.grades.aggregate([
    { $unwind: "$scores" },
    { $match: {"scores.type": { $ne: "quiz" } } },
    { $group: {
        _id: "$class_id",
        average: { $avg: "$scores.score" }
    } },
    { $sort: { "average": -1 } }
    ]).pretty()
{ "_id" : 1, "average" : 64.89462395242741 }
{ "_id" : 9, "average" : 58.32770509408073 }
{ "_id" : 5, "average" : 56.80408386154663 }
        
Answer: 1
HW 6.3 Solution to Homework 6.3
  1. $match (on the year)
  2. $project (including an array size)
  3. $match (on the size)
  4. $project (calculating the avg)
  5. $sort (by average amount raised)
  6. $limit (to 1)
Answer: Nimbit
db.companies.aggregate( [
    { $match : { founded_year : 2004 } },
    {
        $project: {
            name: 1,
            raised_amount: "$funding_rounds.raised_amount",
            numberOfFundingRounds: { $size: "$funding_rounds" },
        }
    },
   { $match : { numberOfFundingRounds : {$gt: 4} } },
   {
        $project: {
        name: 1,
        raisedAvg: {$avg: "$raised_amount" }
                }
    },
          { $sort: { raisedAvg: 1 } },
    { $limit: 1 }
   ]).pretty()
{
	"_id" : ObjectId("52cdef7d4bab8bd675298d6e"),
	"name" : "Nimbit",
	"raisedAvg" : 1085127.2
}
        
Write Concern Solution to Quiz: Write Concern

Provided you assume that the disk is persistent, what are the w and j settings required to guarantee that an insert or update has been written all the way to disk.

Answer: w=1, j=1
Network Error Solution to Quiz: Network Error(e.g., TCP request)
What are the reasons why an application may receive an error back even if the write was successful?
Answer:
  • The network TCP connection between the application and the server was reset after the server received a write but before a response could be sent.
  • The MongoDB server terminates between receiving the write and responding to it.
  • The network fails between the time of the write and the time the client receives a response to the write.
Intro. to Replication Solution to Quiz: Introduction to Replication
What is the minimum original number of nodes needed to assure the election of a new Primary if a node goes down?
Answer: 3
Replica Set Elections Solution to Quiz: Replica Set Elections
Which types of nodes can participate in elections of a new primary?
Answer:
  • Regular replica set members
  • Hidden Members
  • Arbiters
Write Consistency Solution to Quiz: Write Consistency
During the time when failover is occurring, can writes successfully complete?
Answer: No
Creating a Replica Set Solution to Quiz: Creating a Replica Set
$ more create_replica_set.sh
mkdir -p /data/rs1/ /data/rs2 /data/rs3
mongo --replSet rs1 --logpath "1.log" --dbpath /path/rs1 --port 27017
mongo --replSet rs1 --logpath "2.log" --dbpath /path/rs1 --port 27018
mongo --replSet rs1 --logpath "3.log" --dbpath /path/rs1 --port 27019
$ bash < create_replica_set.sh
$ more 1.log
$ more init_replica.js
$ config = { _id: "rs1", members:[
            { _id : 0, host : "Andrews-iMac.local:27017", priority:0, slaveDelay: 5 },
            { _id : 1, host : "Andrews-iMac.local:27018" },
            { _id : 2, host : "Andrews-iMac.local:27019"} ]
}

rs.initiate(config)
rs.status()
$ mongo --port 27018
bye
$ mongo --port 27018 < init_replica.js
bye
$ mongo --port 27018
rs1:PRIMARY> rs.status()
bye
$ mongo --port 27017
rs1:SECONDARY>
$ mongo --port 27019
rs1:SECONDARY> db.people.find()
error: { "$err" : "not master and slaveOk=false", "code" : 13435 }
rs1:SECONDARY> rs.slaveOk()
Answer: rs.slaveOk()
Replica Set Internals Solution to Quiz: Replica Set Internals
$ ps -ef | grep mongod
> use local
> db.oplog.rs.find().pretty()
$ ps -ef | grep mongod
60491  --port 27017 --smallfiles --fork
60494  --port 27018 --smallfiles --fork
> rs.staus();
{
            "_id" : 1,
            "name" : "localhost:27018",
            "stateStr" : "(not reachable/healthy)",
            "uptime : 0,
}
        

Which of the following statements are true about replication?
Answer:
  • Replication supports mixed-mode storage engines. For examples, a mmapv1 primary and wiredTiger secondary.
  • A copy of the oplog is kept on both the primary and secondary servers.
  • The oplog is implemented as a capped collection.
  • You can only write to a primary node
  • You cannot read from a secondary by default.
Failover & Rollback Solution to Quiz: Failover and Rollback
What happens if a node comes back up as a secondary after a period of being offline and the oplog has looped on the primary?
Answer: The entire dataset will be copied from the primary.
Connecting to a Replica Set from the Node.js Driver Solution to Quiz: Connecting to a Replica Set from the Node.js Driver
$ mongo localhost:30001
> rs.initiate()
> rs.add("education.local:30002")
> rs.add("education.local:30003")
> rs.status()
$ vim app.js
var MongoClient = require('mongodb').MongoClient;

MongoClient.connect("mongodb://localhost:30001,localhost:30002,localhost:30003/course", function(err, db) {
    if (err) throw err;

    db.collection("repl").insert({ 'x' : 1 }, function(err, doc) {
        if (err) throw err;

        db.collection("repl").findOne({ 'x' : 1 }, function(err, doc) {
            if (err) throw err;

            console.log(doc);
            db.close();
        });
    });
});

If you leave a replica set node out of the seedlist within the driver, what will happen?
Answer: The missing node will be discovered as long as you list at least one valid node.
Failover in the Node.js Driver Solution to Quiz: Failover in the Node.js Driver
var MongoClient = require('mongodb').MongoClient;

MongoClient.connect("mongodb://localhost:30001,localhost:30002,localhost:30003/course", function(err, db) {
    if (err) throw err;

    var documentNumber = 0;
    function insertDocument() {

        db.collection("repl").insert({ 'documentNumber' : documentNumber++ }, function(err, doc) {
            if (err) throw err;
            console.log(doc);
        });

        console.log("Dispatched insert");
        setTimeout(insertDocument, 1000);
    }

    insertDocument();
});


What will happen if this insert happens during a primary election?
db.collection('foo').insert({x:1}, callback);
Answer: The insert will be buffered until the election completes, then the callback will be called after the operation is sent and a response is received
Write Concern Revisited Solution to Quiz: Write Concern Revisited
        w How many nodes you wait for before you move on when doing an INSERT
        w=2 Wait for primary & secondary nodes to acknowledge write.
        j=1 Wait for primary to write to disk
        WTimeout = how long to wait for write to be acknowledged by secondaries.
        Places to configure:
        Set on a connection.
        Set on collection.
        Set on replica set.

            w=majority (wait for majority of nodes to replicate/avoid rollbacks)

> rs.conf()
"members" : [
     {
            "_id" : 0,
            "host" : "Andrews-iMac.local:27017",
            "priority" : 0,
            "slaveDelay" : 5
     }
        

If you set w=1 and j=1, is it possible to wind up rolling back a committed write to the primary on failover?
Answer: Yes
If the write went to the primary, but the primary goes down before it propogates to the secondary. When the original primary returns, it will roll himself back because he's ahead of the other primary.
Read Preferences Solution to
Read Options:
  • Primary (default)
  • Primary Preferred
  • Secondaries
  • Secondaries Prefered
  • Nearest (ping time, tag set (data center))

Read from Secondary: Eventual Consistent Read

You can configure your applications via the drivers to read from secondary nodes within a replica set. What are the reasons that you might not want to do that? Check all that apply.

Answer:
  • You may not read what you previously wrote to MongoDB on a secondary because it will lag behind by some amount.
  • If the secondary hardware has insufficient memory to keep the read working set in memory, directing reads to it will likely slow it down.
  • If your write traffic is great enough, and your secondary is less powerful than the primary, you may overwhelm the secondary, which must process all the writes as well as the reads. Replication lag can result.
Implications of Replication Solution to Quiz: Review of Implications of Replication

If you set w=4 on a MongoClient and there are only three nodes in the replica set, how long will you wait in PyMongo for a response from an insert if you don't set a timeout?
Answer: You will get an immediate error
Sharding Solution to Quiz: Introduction to Sharding

If the shard key is not included in a find operation and there are 4 shards, each one a replica set with 3 nodes, how many nodes will see the find operation?
Answer: 4
Since the shard key is not included in the find operation, mongos has to send the query to all 4 of the shards. Each shard has 3 replica-set members, but only one member of each replica set (the primary, by default) is required to handle the find.
Building a Sharded Environment Solution to Quiz: Building a Sharded Environment
> db.students.explain().find({}).limit(10)
    "stage" : "SHARD_MERGE",
        "shardName" : "s0",
        "shardName" : "s1",
        "shardName" : "s2",
> db.students.explain().find({student_id:1000}).limit(10)
    "stage" : "SINGLE_SHARD",
        "shardName" : "s0",
            "inputStage" : {
                "student_index"
            }

If you want to build a production system with two shards, each one a replica set with three nodes, how may mongod processes must you start?
Answer: 9
6 mongod's will come from the shards, (2 replica sets x 3 nodes) + 3 config servers (recommended for production systems)
Implications of Sharding Solution to Quiz: Implications of Sharding
  • Every document needs to include the shard key
  • Shard key is immutable
  • Need index that starts with the shard key
  • When doing an update, shard key needs to be specified or multi-key = true
  • If query lacks shard key => scatter gather
  • No unique key unless it starts with the shard key

Suppose you wanted to shard the zip code collection after importing it. You want to shard on zip code. What index would be required to allow MongoDB to shard on zip code?
Answer: An index on zip or a non-multi-key index that starts with zip.
Sharding + Replication Solution to Quiz: Sharding + Replication

Suppose you want to run multiple mongos routers for redundancy. What level of the stack will assure that you can failover to a different mongos from within your application?
Answer: drivers
Choosing a Shard Key Solution to Quiz: Choosing a Shard Key

Thinking about the tradeoffs of shard key selection, select the true statements below.
  • Choosing posttime as the shard key will cause hotspotting as time progresses.
  • Choosing username as the shard key will distribute posts to the wall well across the shards.
  • Choosing visible_to (multikey index) as a shard key is illegal.
7.1 Solution to Homework: Homework: 7.1

Which of the following statements are true about replication in MongoDB? Check all that apply.
Answer:
  • The oplog utilizes a capped collection.
  • The minimum sensible number of voting nodes to a replica set is three.
  • By default, using the MongoClient connection class, w=1 and j=0
  • MongoDB replication is asynchronous.
7.2 Solution to Homework: Homework 7.2

Let's suppose you have a five member replica set and want to assure that writes are committed to the journal and are acknowledged by at least 3 nodes before you proceed forward. What would be the appropriate settings for w and j?
Answer: w="majority", j=1
7.3 Solution to Homework: Homework 7.3

Which of the following statements are true about choosing and using a shard key? Check all that apply.
Answer:
  • There must be an index on the collection that starts with the shard key.
  • MongoDB cannot enforce unique indexes on a sharded collection other than the shard key itself or indexes preferred by the shard key.
  • Any update that does not contain the shard key will be sent to all shards.
7.4 Solution to Homework: Homework 7.4
You have a sharded system with three shards and have sharded the collections "students" in the "school" database across those shards. The output of sh.status() when connected to mongos looks like this:
			{ "student_id" : { "$minKey" : 1 } } -->> { "student_id" : 0 } on : s2 Timestamp(3, 0)
			{ "student_id" : 0 } -->> { "student_id" : 2 } on : s0 Timestamp(3, 1)
			{ "student_id" : 2 } -->> { "student_id" : 3497 } on : s1 Timestamp(3, 2)
			{ "student_id" : 3497 } -->> { "student_id" : 7778 } on : s1 Timestamp(3, 3)
			{ "student_id" : 7778 } -->> { "student_id" : { "$maxKey" : 1 } } on : s1 Timestamp(3, 4)

Answer: s1
7.5 Solution to Homework: Homework 7.5
In this homework you will build a small replica set on your own computer.
# Create three directories for the three mongod processes.
$ mkdir -p /data/rs1 /data/rs2 /data/rs3
# Now start three mongo instances
$ mongod --replSet m101 --logpath "1.log" --dbpath /data/rs1 --port 27017 --smallfiles --oplogSize 64 --fork
$ mongod --replSet m101 --logpath "2.log" --dbpath /data/rs2 --port 27018 --smallfiles --oplogSize 64 --fork
$ mongod --replSet m101 --logpath "3.log" --dbpath /data/rs3 --port 27019 --smallfiles --oplogSize 64 --fork
# Now connect to a mongo shell and make sure it comes up
$ mongo --port 27017
# Now create the replica set.
> config = { _id: "m101", members:[
          { _id : 0, host : "localhost:27017"},
          { _id : 1, host : "localhost:27018"},
          { _id : 2, host : "localhost:27019"} ]
         };
> rs.initiate(config);
# see the state of replication
> rs.status()

Answer: X9d9Y9f9Y969a9q9j9q9S939i9k93959q9S969v
Note: All 9's must be removed from the above string in order to get the correct validation code.
Solution to

Solution to

M101JS Files

Textbook: MongoDB: The Definitive Guide, 2nd Edition

Chapter Title Progress
1

Introduction

2

Getting Started

Start mongo shell on a different machine:
mongo some-host:30000/myDB
Have the ability to connect to a different database or server at any time:
$ mongo --nodb
> conn = new Mongo("some-host:30000") connection to some-host:30000
> db = conn.getDB("myDB")
myDB
Run scripts:
mongo script1.js script2.js script3.js
Run a script using a connection to a non-default host/port mongod:
mongo --quiet server-1:30000/foo script1.js script2.js script3.js
run scripts from within the interactive shell using the load() function:
> load("script1.js")
I am script1.js
# JavaScript equivalents to shell helpers
Helper              Equivalent
use foo             db.getSisterDB("foo")
show dbs            db.getMongo().getDBs()
show collections    db.getCollectionNames()
Customize your prompt with Database name:
prompt = function() {
if (typeof db == 'undefined') {
return '(nodb)> '; }
        // Check the last db operation
try { db.runCommand({getLastError:1});
}
catch (e) {
print(e); }
return db+"> "; };

Textbook: Node.js in Action

Download source code
Chapter Title Progress
1

Welcome to Node.js

A readable stream lets you read data from a source.
var stream = fs.createReadStream('./resource.json')
stream.on('data', function (chunk) {
  console.log(chunk)
})
stream.on('end', function () {
  console.log('finished')
})
DIRTy = data-intensive real-time applications
2

Building a multiroom chat application

2

Building a multiroom chat application

2.1

Application overview

2.2

Application requirements and initial setup

2.2.1

Serving HTTP and WebSocket

2.2.2

Creating the application file structure

  1. Launch Terminal
  2. Create project directory
    $ mkdir chatroom
    $ cd chatroom/
    $ mkdir lib
    $ mkdir -p public/javascripts
    $ mkdir public/stylesheets
    
2.2.3

Specifying dependencies

  1. Create and populate the package.json file
    $ touch package.json
    $ vi package.json
    i (Insert mode)
    (cut and paste the following:)
    {
      "name": "chatrooms",
      "version": "0.0.1",
      "description": "Minimalist multiroom chat server",
      "dependencies": {
        "socket.io": "~0.9.6",
        "mime": "~1.2.7"
      }
    }
    [Esc]       (turn off the Insert mode)
    :x[return]  (save & close)
    
2.2.4

Installing dependencies

$ npm install
2.3

Serving the application’s HTML, CSS, and client-side JavaScript

2.3.1

Creating a basic static file server

$ touch server.js
$ vi server.js
i (Insert mode)
var http  = require('http');
var fs    = require('fs');
var path  = require('path');
var mime  = require('mime');
var cache = {};
function send404(response) {
  response.writeHead(404, {'Content-Type': 'text/plain'});
  response.write('Error 404: resource not found.');
  response.end();
}
function sendFile(response, filePath, fileContents) {
  response.writeHead(
200,
{"content-type": mime.lookup(path.basename(filePath))} );
  response.end(fileContents);
}
function serveStatic(response, cache, absPath) {
  if (cache[absPath]) {
    sendFile(response, absPath, cache[absPath]);
  } else {
    fs.exists(absPath, function(exists) {
      if (exists) {
        fs.readFile(absPath, function(err, data) {
          if (err) {
            send404(response);
          } else {
            cache[absPath] = data;
            sendFile(response, absPath, data);
          }
        });
      } else {
        send404(response);
      }
    });
  }
}
var server = http.createServer(function(request, response) {
  var filePath = false;
  if (request.url == '/') {
    filePath = 'public/index.html';
  } else {
    filePath = 'public' + request.url;
  }
  var absPath = './' + filePath;
  serveStatic(response, cache, absPath);
});
server.listen(3000, function() {
          console.log("Server listening on port 3000.");
});
[Esc]       (turn off the Insert mode)
:w[return]  (save)
:x[return]  (save & close)
$ node server.js
http://127.0.0.1:3001/
2.3.2

Adding the HTML and CSS files

Create a file in the public directory named index.html and place the HTML
$ cd public
$ touch index.html
$ vi index.html
i (Insert mode)

        

Chat




Chat commands:
  • Change nickname: /nick [username]
  • Join/create room: /join [room name]
[Esc] (turn off the Insert mode) :w[return] (save) :x[return] (save & close)
In the public/ stylesheets directory, create a file named style.css and put the following CSS code in it.
$ cd stylesheets/
$ touch style.css
$ vi style.css
body {
  padding: 50px;
  font: 14px "Lucida Grande", Helvetica, Arial, sans-serif;
}
a {
  color: #00B7FF;
}
# content {
  width: 800px;
  margin-left: auto;
  margin-right: auto;
}
# room {
  background-color: #ddd;
  margin-bottom: 1em;
}
# messages {
   width: 690px;
   height: 300px;
   overflow: auto;
   background-color: #eee;
   margin-bottom: 1em;
   margin-right: 10px;
 }
2.4

Handling chat-related messaging using Socket.IO

2.4.1

Setting up the Socket.IO server

Append the following two lines to server.js
$ cd ../..
vi server.js
...
var chatServer = require('./lib/chat_server');
chatServer.listen(server);
Create a new file, chat_server.js, inside the lib directory:
$ cd lib
$ vi chat_server.js
3

Node programming fundamentals

Angular 2 Official Tutorial

Chapter Title Progress
Introduction App Vision
5 Minute Quickstart - 1 See It Run!
5 Minute Quickstart - 2 Development Environment
5 Minute Quickstart - 3 First Angular Component
5 Minute Quickstart - 4 Add the index.html
5 Minute Quickstart - 5 Compile and run
5 Minute Quickstart - 6 Final structure
5 Minute Quickstart - 6 Final structure
5 Minute Quickstart - 7 Wrap Up
5 Minute Quickstart - 8 Appendices
5 Minute Quickstart - 9 Appendices
1. The Hero Editor Keep the app transpiling and running
1. The Hero Editor - 1 Show our Hero
1. The Hero Editor - 2 Editing Our Hero
1. The Hero Editor - 3 Editing Our Hero
1. The Hero Editor - 4 The Road We’ve Travelled
2. Master/Detail - 1 It Takes Many Heroes
2. Master/Detail - 2 Where We Left Off
2. Master/Detail - 3 Displaying Our Heroes
2. Master/Detail - 4 Selecting a Hero
2. Master/Detail - 5 Displaying Our Heroes
2. Master/Detail - 6 The Road We’ve Travelled
3. Multiple Components - 1 Where We Left Off
3. Multiple Components - 2 Displaying Our Heroes
3. Multiple Components - 3 Making a Hero Detail Component
3. Multiple Components - 4 Refresh the AppComponent
3. Multiple Components - 5 The Road We’ve Travelled
4. Services - 1 Introduction
4. Services - 2 Where We Left Off
4. Services - 3 Creating a Hero Service
4. Services - 4 Async Services and Promises
4. Services - 5 The Road We’ve Travelled
https://stripe.com/jobs/positions/support-engineer