How to use MongoDb with Python
This little tutorial shows how to use MongoDB with python.
from pymongo import MongoClient
import pprint
import pandas as pd
#Connect to mongodb
client = MongoClient('mongodb://localhost:27017/', readPreference='primaryPreferred')
#Connect to spark db
db = client['Connect_to_Spark_db']
mentions_events = db['mentions_events']
pprint.pprint(mentions_events.find_one())
Test request no. Of actors
agg=db.mentions_events.aggregate([
{ "$limit": 5000000},
{
"$group": {
"_id": {"actor":"$Actor1Name"},
"Count": { "$sum": 1}
},
},
{ "$count": "myCount" }
])
Request 1
# Requete 1
agg=db.mentions_events.aggregate([
{ "$limit": 10000},
{
"$group": {
"_id": {"annee": "$yearmention", "mois": "$monthmention", "jour": "$daymonthevent", "pays":"$ActionGeo_CountryCode", "langue":"$MentionDocTranslationInfo"},
"Nb_Articles": { "$sum": "$NumMentions" },
}
}
])
pprint.pprint(list(agg))
Request 2
agg=db.mentions_events.aggregate( [
{ $match:
{ $or: [{"Actor1Name":"TOURIST"},{"Actor2Name":"TOURIST"}] },
},
{ $group:
{_id:
{GLOBALEVENTID:"$GLOBALEVENTID",
Actor1Code:"$Actor1Code",
Actor1Name:"$Actor1Name",
Actor1CountryCode:"$Actor1CountryCode",
Actor2Code:"$Actor2Code",
Actor2Name:"$Actor2Name",
Actor2CountryCode:"$Actor2CountryCode",
EventCode:"$EventCode",
QuadClass:"$QuadClass",
SQLDATE:"$SQLDATE",
SOURCEURL:"$SOURCEURL"
}
}
}
] );
Request 3
df_correspondances =
agg=db.mentions_events.aggregate([
{
"$project": {
"monthmention": 1, "ActionGeo_CountryCode": 1, "MentionDocTranslationInfo" : 1, "Actor1Name" : 1,
"lessThan0": {
"$cond": [ { "$lt": ["MentionDocTone", 0 ] }, 1, 0]
},
"moreThan0": {
"$cond": [ { "$gt": [ "MentionDocTone", 0 ] }, 1, 0]
}
}
},
{ "$limit": 1000000},
{
"$group": {
"_id": {"mois": "$monthmention", "pays":"$ActionGeo_CountryCode", "langue":"$MentionDocTranslationInfo", "actor":"$Actor1Name"},
"Nb_Articles_Negatifs": { "$sum": "$lessThan0" },
"Nb_Articles_Positifs": { "$sum": "$moreThan0" }
}
}
])
pprint.pprint(list(agg))
req3 = list(agg)
df_3 = pd.Da
{'_id': {'mois': 1, 'pays': 'GM', 'langue': 'ara', 'actor': 'IRANIAN'},
'Nb_Articles_Negatifs': 0,
'Nb_Articles_Positifs': 3}
Request 4
agg = db.mentions_events.aggregate([
{ "$limit": 10000},
{ "$group": {
"_id": { "GLOBALEVENTID": "$GLOBALEVENTID", "Actor1Name": "$Actor1Name" },
"avgGoldstein": { "$avg": "$GoldsteinScale" } } },
{ "$group": {
"_id": { "Actor1Name": "$_id.Actor1Name" },
"totalGoldstein": { "$sum": "$avgGoldstein"} } },
{"$sort":{"totalGoldstein":1}}
])
pprint.pprint(list(agg))