Published on 01 May 2021 in
Data Science
How to use MongoDb with Python
This little tutorial shows how to use MongoDB with python.
from pymongo import MongoClient
import pprint
import pandas as pd
#Connect to mongodb
client = MongoClient('mongodb://localhost:27017/', readPreference='primaryPreferred')
#Connect to spark db
db = client['Connect_to_Spark_db']
mentions_events = db['mentions_events']
pprint.pprint(mentions_events.find_one())
Test request no. Of actors
agg=db.mentions_events.aggregate([
{ "$limit": 5000000},
{
"$group": {
"_id": {"actor":"$Actor1Name"},
"Count": { "$sum": 1}
},
},
{ "$count": "myCount" }
])
Request 1
# Requete 1
agg=db.mentions_events.aggregate([
{ "$limit": 10000},
{
"$group": {
"_id": {"annee": "$yearmention", "mois": "$monthmention", "jour": "$daymonthevent", "pays":"$ActionGeo_CountryCode", "langue":"$MentionDocTranslationInfo"},
"Nb_Articles": { "$sum": "$NumMentions" },
}
}
])
pprint.pprint(list(agg))
Request 2
agg=db.mentions_events.aggregate( [
{ $match:
{ $or: [{"Actor1Name":"TOURIST"},{"Actor2Name":"TOURIST"}] },
},
{ $group:
{_id:
{GLOBALEVENTID:"$GLOBALEVENTID",
Actor1Code:"$Actor1Code",
Actor1Name:"$Actor1Name",
Actor1CountryCode:"$Actor1CountryCode",
Actor2Code:"$Actor2Code",
Actor2Name:"$Actor2Name",
Actor2CountryCode:"$Actor2CountryCode",
EventCode:"$EventCode",
QuadClass:"$QuadClass",
SQLDATE:"$SQLDATE",
SOURCEURL:"$SOURCEURL"
}
}
}
] );
Request 3
df_correspondances =
agg=db.mentions_events.aggregate([
{
"$project": {
"monthmention": 1, "ActionGeo_CountryCode": 1, "MentionDocTranslationInfo" : 1, "Actor1Name" : 1,
"lessThan0": {
"$cond": [ { "$lt": ["MentionDocTone", 0 ] }, 1, 0]
},
"moreThan0": {
"$cond": [ { "$gt": [ "MentionDocTone", 0 ] }, 1, 0]
}
}
},
{ "$limit": 1000000},
{
"$group": {
"_id": {"mois": "$monthmention", "pays":"$ActionGeo_CountryCode", "langue":"$MentionDocTranslationInfo", "actor":"$Actor1Name"},
"Nb_Articles_Negatifs": { "$sum": "$lessThan0" },
"Nb_Articles_Positifs": { "$sum": "$moreThan0" }
}
}
])
pprint.pprint(list(agg))
req3 = list(agg)
df_3 = pd.Da
{'_id': {'mois': 1, 'pays': 'GM', 'langue': 'ara', 'actor': 'IRANIAN'},
'Nb_Articles_Negatifs': 0,
'Nb_Articles_Positifs': 3}
Request 4
agg = db.mentions_events.aggregate([
{ "$limit": 10000},
{ "$group": {
"_id": { "GLOBALEVENTID": "$GLOBALEVENTID", "Actor1Name": "$Actor1Name" },
"avgGoldstein": { "$avg": "$GoldsteinScale" } } },
{ "$group": {
"_id": { "Actor1Name": "$_id.Actor1Name" },
"totalGoldstein": { "$sum": "$avgGoldstein"} } },
{"$sort":{"totalGoldstein":1}}
])
pprint.pprint(list(agg))