Commit ccc2d4c9 authored by MARIA CANTON RELLO's avatar MARIA CANTON RELLO
Browse files

Delete F1Project.ipynb

parent 98bf9159
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "iB7tCyIfDxDQ",
"outputId": "25597d03-a63c-4945-c630-39f55eb86b9c"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting findspark\n",
" Downloading findspark-1.4.2-py2.py3-none-any.whl (4.2 kB)\n",
"Installing collected packages: findspark\n",
"Successfully installed findspark-1.4.2\n",
"Collecting pyspark\n",
" Downloading pyspark-3.2.0.tar.gz (281.3 MB)\n",
"\u001b[K |████████████████████████████████| 281.3 MB 38 kB/s \n",
"\u001b[?25hCollecting py4j==0.10.9.2\n",
" Downloading py4j-0.10.9.2-py2.py3-none-any.whl (198 kB)\n",
"\u001b[K |████████████████████████████████| 198 kB 68.9 MB/s \n",
"\u001b[?25hBuilding wheels for collected packages: pyspark\n",
" Building wheel for pyspark (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for pyspark: filename=pyspark-3.2.0-py2.py3-none-any.whl size=281805911 sha256=f2571b69b12f966e59ced159d02f148d9f7047c77170eece822c51eaeec0c6ac\n",
" Stored in directory: /root/.cache/pip/wheels/0b/de/d2/9be5d59d7331c6c2a7c1b6d1a4f463ce107332b1ecd4e80718\n",
"Successfully built pyspark\n",
"Installing collected packages: py4j, pyspark\n",
"Successfully installed py4j-0.10.9.2 pyspark-3.2.0\n",
"Requirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (3.10.0)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.3.0)\n",
"Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.15.0)\n",
"Requirement already satisfied: requests[socks]>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from tweepy) (2.23.0)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy) (3.1.1)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2021.10.8)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (3.0.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.24.3)\n",
"Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.7.1)\n"
]
}
],
"source": [
"!pip install findspark #installing libraries\n",
"!pip install pyspark #installing libraries\n",
"!pip install tweepy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QYzArZLyDzzw"
},
"outputs": [],
"source": [
"import tweepy\n",
"\n",
"import pandas as pd\n",
"import re\n",
"\n",
"#Entorno apache spark\n",
"import findspark\n",
"\n",
"import pyspark as ps\n",
"import warnings\n",
"from pyspark.sql import SQLContext\n",
"\n",
"from pyspark.sql.functions import col, udf\n",
"from textblob import TextBlob\n",
"from datetime import datetime\n",
"\n",
"#Tratamiento de datos para entrenamiento\n",
"from pyspark.ml.feature import HashingTF, IDF, Tokenizer\n",
"from pyspark.ml.feature import StringIndexer\n",
"from pyspark.ml import Pipeline\n",
"\n",
"\n",
"##Logistic regression\n",
"from pyspark.ml.classification import LogisticRegression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "lMlVt3kpD5Nu"
},
"outputs": [],
"source": [
"#Creating the authentication object\n",
"auth = tweepy.OAuthHandler(\"XXXXXXXXXXXXXXXXXXXX\",\"XXXXXXXXXXXXXXXXXXXXXXXXX\") #Eliminados por razones de privacidad\n",
"#Setting your access token and secret\n",
"auth.set_access_token(\"XXXXXXXXXXXXXXXXXXXXXXXXX\", \"XXXXXXXXXXXXXXXXXXXXXXXXX\") #Eliminados por razones de privacidad\n",
"#Creating the API object while passing in auth information\n",
"api = tweepy.API (auth, wait_on_rate_limit = True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "MYCcSYYdD7q4"
},
"outputs": [],
"source": [
"search='Checo Perez'\n",
"\n",
"num_tweets= 1000\n",
"\n",
"date_since=\"2021-12-13\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Hj3b45ksD8l-",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "be73ff05-3908-4785-8d68-cc306b4f4b18"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Checo Perez\n",
"107.30596139971132\n",
"245\n",
"14\n",
"296\n"
]
}
],
"source": [
"\n",
"\n",
"tweets = tweepy.Cursor(api.search,q = search, lang='en', since = date_since).items(num_tweets)\n",
"\n",
"\n",
"positive=0\n",
"negative=0\n",
"neutral=0\n",
"sentiment=0\n",
"\n",
"for tweet in tweets:\n",
" final_text= tweet.text.replace('RT', '')\n",
" if final_text.startswith(' @'):\n",
" position =final_text.index(':')\n",
" final_text=final_text[position+2:]\n",
" if final_text.startswith('@'):\n",
" position =final_text.index(' ')\n",
" final_text=final_text[position+2:]\n",
" analysis= TextBlob(final_text)\n",
" sentiment+=analysis.polarity\n",
" dm= analysis.polarity\n",
" \n",
" if dm >0:\n",
" positive +=1\n",
" elif dm <0:\n",
" negative+=1\n",
" else:\n",
" neutral+=1\n",
" \n",
"print(search)\n",
"print(sentiment)\n",
"print(positive)\n",
"print(negative)\n",
"print(neutral)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "mIIQImaFD8sQ"
},
"outputs": [],
"source": [
""
]
}
],
"metadata": {
"colab": {
"name": "Untitled0.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
\ No newline at end of file
%% Cell type:code id: tags:
```
!pip install findspark #installing libraries
!pip install pyspark #installing libraries
!pip install tweepy
```
%%%% Output: stream
Collecting findspark
Downloading findspark-1.4.2-py2.py3-none-any.whl (4.2 kB)
Installing collected packages: findspark
Successfully installed findspark-1.4.2
Collecting pyspark
Downloading pyspark-3.2.0.tar.gz (281.3 MB)
 |████████████████████████████████| 281.3 MB 38 kB/s
[?25hCollecting py4j==0.10.9.2
Downloading py4j-0.10.9.2-py2.py3-none-any.whl (198 kB)
 |████████████████████████████████| 198 kB 68.9 MB/s
[?25hBuilding wheels for collected packages: pyspark
Building wheel for pyspark (setup.py) ... [?25l[?25hdone
Created wheel for pyspark: filename=pyspark-3.2.0-py2.py3-none-any.whl size=281805911 sha256=f2571b69b12f966e59ced159d02f148d9f7047c77170eece822c51eaeec0c6ac
Stored in directory: /root/.cache/pip/wheels/0b/de/d2/9be5d59d7331c6c2a7c1b6d1a4f463ce107332b1ecd4e80718
Successfully built pyspark
Installing collected packages: py4j, pyspark
Successfully installed py4j-0.10.9.2 pyspark-3.2.0
Requirement already satisfied: tweepy in /usr/local/lib/python3.7/dist-packages (3.10.0)
Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.3.0)
Requirement already satisfied: six>=1.10.0 in /usr/local/lib/python3.7/dist-packages (from tweepy) (1.15.0)
Requirement already satisfied: requests[socks]>=2.11.1 in /usr/local/lib/python3.7/dist-packages (from tweepy) (2.23.0)
Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->tweepy) (3.1.1)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2.10)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (2021.10.8)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (3.0.4)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.24.3)
Requirement already satisfied: PySocks!=1.5.7,>=1.5.6 in /usr/local/lib/python3.7/dist-packages (from requests[socks]>=2.11.1->tweepy) (1.7.1)
%% Cell type:code id: tags:
```
import tweepy
import pandas as pd
import re
#Entorno apache spark
import findspark
import pyspark as ps
import warnings
from pyspark.sql import SQLContext
from pyspark.sql.functions import col, udf
from textblob import TextBlob
from datetime import datetime
#Tratamiento de datos para entrenamiento
from pyspark.ml.feature import HashingTF, IDF, Tokenizer
from pyspark.ml.feature import StringIndexer
from pyspark.ml import Pipeline
##Logistic regression
from pyspark.ml.classification import LogisticRegression
```
%% Cell type:code id: tags:
```
#Creating the authentication object
auth = tweepy.OAuthHandler("XXXXXXXXXXXXXXXXXXXX","XXXXXXXXXXXXXXXXXXXXXXXXX") #Eliminados por razones de privacidad
#Setting your access token and secret
auth.set_access_token("XXXXXXXXXXXXXXXXXXXXXXXXX", "XXXXXXXXXXXXXXXXXXXXXXXXX") #Eliminados por razones de privacidad
#Creating the API object while passing in auth information
api = tweepy.API (auth, wait_on_rate_limit = True)
```
%% Cell type:code id: tags:
```
search='Checo Perez'
num_tweets= 1000
date_since="2021-12-13"
```
%% Cell type:code id: tags:
```
tweets = tweepy.Cursor(api.search,q = search, lang='en', since = date_since).items(num_tweets)
positive=0
negative=0
neutral=0
sentiment=0
for tweet in tweets:
final_text= tweet.text.replace('RT', '')
if final_text.startswith(' @'):
position =final_text.index(':')
final_text=final_text[position+2:]
if final_text.startswith('@'):
position =final_text.index(' ')
final_text=final_text[position+2:]
analysis= TextBlob(final_text)
sentiment+=analysis.polarity
dm= analysis.polarity
if dm >0:
positive +=1
elif dm <0:
negative+=1
else:
neutral+=1
print(search)
print(sentiment)
print(positive)
print(negative)
print(neutral)
```
%%%% Output: stream
Checo Perez
107.30596139971132
245
14
296
%% Cell type:code id: tags:
```
```
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment