diff --git a/Ricardo/NLP.ipynb b/Ricardo/NLP.ipynb new file mode 100644 index 0000000000..31c91cdff5 --- /dev/null +++ b/Ricardo/NLP.ipynb @@ -0,0 +1,626 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | url | \n", + "is_spam | \n", + "
|---|---|---|
| 0 | \n", + "https://briefingday.us8.list-manage.com/unsubs... | \n", + "True | \n", + "
| 1 | \n", + "https://www.hvper.com/ | \n", + "True | \n", + "
| 2 | \n", + "https://briefingday.com/m/v4n3i4f3 | \n", + "True | \n", + "
| 3 | \n", + "https://briefingday.com/n/20200618/m#commentform | \n", + "False | \n", + "
| 4 | \n", + "https://briefingday.com/fan | \n", + "True | \n", + "
| \n", + " | url | \n", + "is_spam | \n", + "
|---|---|---|
| 0 | \n", + "https://briefingday.us8.list-manage.com/unsubs... | \n", + "1 | \n", + "
| 1 | \n", + "https://www.hvper.com/ | \n", + "1 | \n", + "
| 2 | \n", + "https://briefingday.com/m/v4n3i4f3 | \n", + "1 | \n", + "
| 3 | \n", + "https://briefingday.com/n/20200618/m#commentform | \n", + "0 | \n", + "
| 4 | \n", + "https://briefingday.com/fan | \n", + "1 | \n", + "
| \n", + " | url | \n", + "is_spam | \n", + "
|---|---|---|
| 0 | \n", + "[https, briefingday, us, list, manage, com, un... | \n", + "1 | \n", + "
| 1 | \n", + "[https, www, hvper, com] | \n", + "1 | \n", + "
| 2 | \n", + "[https, briefingday, com, v, i] | \n", + "1 | \n", + "
| 3 | \n", + "[https, briefingday, com, m, commentform] | \n", + "0 | \n", + "
| 4 | \n", + "[https, briefingday, com, fan] | \n", + "1 | \n", + "
| \n", + " | url | \n", + "is_spam | \n", + "
|---|---|---|
| 0 | \n", + "[http, briefingday, list, manage, unsubscribe] | \n", + "1 | \n", + "
| 1 | \n", + "[http, hvper] | \n", + "1 | \n", + "
| 2 | \n", + "[http, briefingday] | \n", + "1 | \n", + "
| 3 | \n", + "[http, briefingday, commentform] | \n", + "0 | \n", + "
| 4 | \n", + "[http, briefingday] | \n", + "1 | \n", + "
SVC(kernel='linear', random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(kernel='linear', random_state=42)