{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Getting Started with sanityze" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## If you are developing locally\n", "\n", "After checking out the repository, you can run the following commands to install the package and its dependencies:\n", "\n", "```bash\n", "$ poetry install\n", "```\n", "\n", "Open this notebook in JupyterLab and run the cells in section \"Example\"." ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## If you are using the public package\n", "\n", "To use `sanityze` in a project, You can install it from PyPI:\n", "\n", "```bash\n", "$ pip install sanityze\n", "```\n", "\n", "Then, you can import it in your code:\n", "\n", "```python\n", "from sanityze.cleanser import *\n", "from sanityze.spotters import *\n", "```\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Examples" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | product_name | \n", "price | \n", "
---|---|---|
0 | \n", "laptop | \n", "1200 | \n", "
1 | \n", "printer foo@gaga.com | \n", "150 | \n", "
2 | \n", "tablet | \n", "300 | \n", "
3 | \n", "desk 5555 5555 5555 4444 | \n", "450 | \n", "
4 | \n", "chair | \n", "200 | \n", "
\n", " | product_name | \n", "price | \n", "
---|---|---|
0 | \n", "laptop | \n", "1200 | \n", "
1 | \n", "printer EMAILADDRS | \n", "150 | \n", "
2 | \n", "tablet | \n", "300 | \n", "
3 | \n", "desk 5555 5555 5555 4444 | \n", "450 | \n", "
4 | \n", "chair | \n", "200 | \n", "
\n", " | first_name | \n", "last_name | \n", "email_address | \n", "visa_cc | \n", "master_cc | \n", "balance | \n", "active_member | \n", "age | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "Jacob | \n", "King | \n", "the following is my email address JacobKing100... | \n", "this is my credit card: 4658481398602920 | \n", "5339168719695860 | \n", "100 | \n", "1 | \n", "24 | \n", "
1 | \n", "Chloe | \n", "Lavoie | \n", "the following is my email address ChloeLavoie2... | \n", "this is my credit card: 4532546510575280 | \n", "5284482559079650 | \n", "200 | \n", "0 | \n", "36 | \n", "
2 | \n", "Myles | \n", "Clark | \n", "MylesClark300@hotmail.com | \n", "this is my credit card: 4539650939655290 | \n", "5338287181016540 | \n", "300 | \n", "1 | \n", "23 | \n", "
3 | \n", "Daniel | \n", "Murray | \n", "DanielMurray400@outlook.ca | \n", "this is my credit card: 4716505160113470 | \n", "5581255820397210 | \n", "400 | \n", "0 | \n", "28 | \n", "
4 | \n", "Lucy | \n", "Landry | \n", "LucyLandry500@ubc.ca | \n", "this is my credit card: 4716908400371550 | \n", "5453813871212040 | \n", "500 | \n", "1 | \n", "37 | \n", "
\n", " | first_name | \n", "last_name | \n", "email_address | \n", "visa_cc | \n", "master_cc | \n", "balance | \n", "active_member | \n", "age | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "Jacob | \n", "King | \n", "the following is my email address EMAILADDRS | \n", "this is my credit card: CREDITCARD | \n", "CREDITCARD | \n", "100 | \n", "1 | \n", "24 | \n", "
1 | \n", "Chloe | \n", "Lavoie | \n", "the following is my email address EMAILADDRS | \n", "this is my credit card: CREDITCARD | \n", "CREDITCARD | \n", "200 | \n", "0 | \n", "36 | \n", "
2 | \n", "Myles | \n", "Clark | \n", "EMAILADDRS | \n", "this is my credit card: CREDITCARD | \n", "CREDITCARD | \n", "300 | \n", "1 | \n", "23 | \n", "
3 | \n", "Daniel | \n", "Murray | \n", "EMAILADDRS | \n", "this is my credit card: CREDITCARD | \n", "CREDITCARD | \n", "400 | \n", "0 | \n", "28 | \n", "
4 | \n", "Lucy | \n", "Landry | \n", "EMAILADDRS | \n", "this is my credit card: CREDITCARD | \n", "CREDITCARD | \n", "500 | \n", "1 | \n", "37 | \n", "
5 | \n", "Austin | \n", "Cote | \n", "EMAILADDRS | \n", "this is my credit card: CREDITCARD | \n", "CREDITCARD | \n", "600 | \n", "1 | \n", "31 | \n", "
6 | \n", "Leo | \n", "Leblanc | \n", "EMAILADDRS | \n", "this is my credit card: CREDITCARD | \n", "this is my master card number: CREDITCARD | \n", "700 | \n", "0 | \n", "41 | \n", "
7 | \n", "Luke | \n", "Cote | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "800 | \n", "1 | \n", "43 | \n", "
8 | \n", "Chloe | \n", "Martin | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "900 | \n", "0 | \n", "58 | \n", "
9 | \n", "Sophia | \n", "Taylor | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1000 | \n", "1 | \n", "67 | \n", "
10 | \n", "Sebastian | \n", "Li | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1100 | \n", "0 | \n", "25 | \n", "
11 | \n", "Theodore | \n", "Walker | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1200 | \n", "1 | \n", "29 | \n", "
12 | \n", "Grayson | \n", "Moore | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1300 | \n", "0 | \n", "38 | \n", "
13 | \n", "Madelyn | \n", "Ross | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1400 | \n", "1 | \n", "64 | \n", "
14 | \n", "Charlie | \n", "Johnson | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1500 | \n", "0 | \n", "66 | \n", "
15 | \n", "Isaac | \n", "Davis | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1600 | \n", "1 | \n", "55 | \n", "
16 | \n", "Grace | \n", "Thomas | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1700 | \n", "0 | \n", "43 | \n", "
17 | \n", "Kayden | \n", "Thomas | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1800 | \n", "1 | \n", "48 | \n", "
18 | \n", "Peyton | \n", "Bergeron | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "1900 | \n", "0 | \n", "58 | \n", "
19 | \n", "Evelyn | \n", "Johnston | \n", "EMAILADDRS | \n", "CREDITCARD | \n", "CREDITCARD | \n", "2000 | \n", "0 | \n", "29 | \n", "
\n", " | first_name | \n", "last_name | \n", "balance | \n", "active_member | \n", "age | \n", "
---|---|---|---|---|---|
0 | \n", "Jacob | \n", "King | \n", "100 | \n", "1 | \n", "24 | \n", "
1 | \n", "Chloe | \n", "Lavoie | \n", "200 | \n", "0 | \n", "36 | \n", "
2 | \n", "Myles | \n", "Clark | \n", "300 | \n", "1 | \n", "23 | \n", "
3 | \n", "Daniel | \n", "Murray | \n", "400 | \n", "0 | \n", "28 | \n", "
4 | \n", "Lucy | \n", "Landry | \n", "500 | \n", "1 | \n", "37 | \n", "
\n", " | first_name | \n", "last_name | \n", "balance | \n", "active_member | \n", "age | \n", "
---|---|---|---|---|---|
0 | \n", "Jacob | \n", "King | \n", "100 | \n", "1 | \n", "24 | \n", "
1 | \n", "Chloe | \n", "Lavoie | \n", "200 | \n", "0 | \n", "36 | \n", "
2 | \n", "Myles | \n", "Clark | \n", "300 | \n", "1 | \n", "23 | \n", "
3 | \n", "Daniel | \n", "Murray | \n", "400 | \n", "0 | \n", "28 | \n", "
4 | \n", "Lucy | \n", "Landry | \n", "500 | \n", "1 | \n", "37 | \n", "
5 | \n", "Austin | \n", "Cote | \n", "600 | \n", "1 | \n", "31 | \n", "
6 | \n", "Leo | \n", "Leblanc | \n", "700 | \n", "0 | \n", "41 | \n", "
7 | \n", "Luke | \n", "Cote | \n", "800 | \n", "1 | \n", "43 | \n", "
8 | \n", "Chloe | \n", "Martin | \n", "900 | \n", "0 | \n", "58 | \n", "
9 | \n", "Sophia | \n", "Taylor | \n", "1000 | \n", "1 | \n", "67 | \n", "
10 | \n", "Sebastian | \n", "Li | \n", "1100 | \n", "0 | \n", "25 | \n", "
11 | \n", "Theodore | \n", "Walker | \n", "1200 | \n", "1 | \n", "29 | \n", "
12 | \n", "Grayson | \n", "Moore | \n", "1300 | \n", "0 | \n", "38 | \n", "
13 | \n", "Madelyn | \n", "Ross | \n", "1400 | \n", "1 | \n", "64 | \n", "
14 | \n", "Charlie | \n", "Johnson | \n", "1500 | \n", "0 | \n", "66 | \n", "
15 | \n", "Isaac | \n", "Davis | \n", "1600 | \n", "1 | \n", "55 | \n", "
16 | \n", "Grace | \n", "Thomas | \n", "1700 | \n", "0 | \n", "43 | \n", "
17 | \n", "Kayden | \n", "Thomas | \n", "1800 | \n", "1 | \n", "48 | \n", "
18 | \n", "Peyton | \n", "Bergeron | \n", "1900 | \n", "0 | \n", "58 | \n", "
19 | \n", "Evelyn | \n", "Johnston | \n", "2000 | \n", "0 | \n", "29 | \n", "
\n", " | first_name | \n", "last_name | \n", "email_address | \n", "visa_cc | \n", "master_cc | \n", "balance | \n", "active_member | \n", "age | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "Jacob | \n", "King | \n", "the following is my email address d3ebf4160b78... | \n", "this is my credit card: 49bd7b28d230d310f17685... | \n", "c077ea8331f1357b655546c0c6dd030c | \n", "100 | \n", "1 | \n", "24 | \n", "
1 | \n", "Chloe | \n", "Lavoie | \n", "the following is my email address ccc33e19aed8... | \n", "this is my credit card: c44d39f19fdd5f1b07c2e4... | \n", "1beee7a164fd2c76e8b4e588131d64c9 | \n", "200 | \n", "0 | \n", "36 | \n", "
2 | \n", "Myles | \n", "Clark | \n", "8a7ab41909ffabb0e5bd3e759e926550 | \n", "this is my credit card: b31abe94871fbe42c4805a... | \n", "ff05ef02f05938f288220125c64d72e3 | \n", "300 | \n", "1 | \n", "23 | \n", "
3 | \n", "Daniel | \n", "Murray | \n", "e786525bbe5cdbd0e1b0723580c0be35 | \n", "this is my credit card: 1d5378ff80e8dbdf12c3d1... | \n", "8c33e7d6d9f5b8b3e9f68b11416e9fe3 | \n", "400 | \n", "0 | \n", "28 | \n", "
4 | \n", "Lucy | \n", "Landry | \n", "66aa0f296907a96a0a15a64c54a0271c | \n", "this is my credit card: f2d2c9b2a8fdfd376aa806... | \n", "2fe45c134eebaf4476d23fea7642824a | \n", "500 | \n", "1 | \n", "37 | \n", "
5 | \n", "Austin | \n", "Cote | \n", "1aac2a4671c819e7ba448156eb2a945d | \n", "this is my credit card: e0690ff65144d6ff24fc2c... | \n", "9e3172495cc711edec297ef8a5f095a5 | \n", "600 | \n", "1 | \n", "31 | \n", "
6 | \n", "Leo | \n", "Leblanc | \n", "b575f97818252f3ae9de320f38e6a26b | \n", "this is my credit card: bee6a0d7b4c91337eef146... | \n", "this is my master card number: dfcc41738316c7d... | \n", "700 | \n", "0 | \n", "41 | \n", "
7 | \n", "Luke | \n", "Cote | \n", "726ff593c99ae2734b61f7ba08cc0339 | \n", "c68e0b958a085134e22585ac92c08e3c | \n", "6bb66b6d89abbbf0718f66720547ce57 | \n", "800 | \n", "1 | \n", "43 | \n", "
8 | \n", "Chloe | \n", "Martin | \n", "40b148d282dfa8d609ab18207188463a | \n", "22dabc63d739da192ef30a2bbcb06e61 | \n", "a8f9b7c5b5e4c4b3f8a4d37d51078cac | \n", "900 | \n", "0 | \n", "58 | \n", "
9 | \n", "Sophia | \n", "Taylor | \n", "a6c9c2015492ed280d2da2b94f3f37a4 | \n", "444225bea558baa2a4c006f688853d1f | \n", "6ad1e9a84f5dbd02512391a07c001ed5 | \n", "1000 | \n", "1 | \n", "67 | \n", "
10 | \n", "Sebastian | \n", "Li | \n", "026dec7828d784304966748f8aac14e4 | \n", "10fc94534572f34b028c61523f4605fe | \n", "d45be3c831abd474467bdd0b5d9e4dd6 | \n", "1100 | \n", "0 | \n", "25 | \n", "
11 | \n", "Theodore | \n", "Walker | \n", "0c89577b11f335687d51f6769baf809b | \n", "b28d6de54411d3884d8b24d55e285004 | \n", "52ccea324a0c1a50a266a33283645042 | \n", "1200 | \n", "1 | \n", "29 | \n", "
12 | \n", "Grayson | \n", "Moore | \n", "4e7ca558fbc639c4cb24f8588c118b3d | \n", "ba76eb7fc1be6293e5feba89f5c7639f | \n", "39c34c75de06effa59da842781653c4a | \n", "1300 | \n", "0 | \n", "38 | \n", "
13 | \n", "Madelyn | \n", "Ross | \n", "94cdc647d86ef5a8b54d5ff54b4c35b4 | \n", "211acd46154d7a437dcc03b3ce46e5ce | \n", "bdefeaadfd160b9ff82eb33bd2726b1d | \n", "1400 | \n", "1 | \n", "64 | \n", "
14 | \n", "Charlie | \n", "Johnson | \n", "3c235fdb2aa343a247c0f51ceda5eabe | \n", "ba97f2ad35d4d9f6fe00ed50e2762327 | \n", "9fe8835d0d95deccf7dfd99c02c9c294 | \n", "1500 | \n", "0 | \n", "66 | \n", "
15 | \n", "Isaac | \n", "Davis | \n", "ffd1f67fff8581d26db24b85ce1d479a | \n", "53309f3853ff954ef7ed621b38501e28 | \n", "e2737b551ebe7a0f4842f3f11bc2aa87 | \n", "1600 | \n", "1 | \n", "55 | \n", "
16 | \n", "Grace | \n", "Thomas | \n", "8d54befa39a3f13bea178f38a8fc67de | \n", "99a0625ae373ff242d7ed9c76930b836 | \n", "8aba9728ea64663867b50a17c10bf729 | \n", "1700 | \n", "0 | \n", "43 | \n", "
17 | \n", "Kayden | \n", "Thomas | \n", "e71770b14ccf5aa8587750c5c5318f4a | \n", "779c725caf15e67c16b59536eaa5b862 | \n", "92610a6913a995c2d9f5e08bfcd6c105 | \n", "1800 | \n", "1 | \n", "48 | \n", "
18 | \n", "Peyton | \n", "Bergeron | \n", "b7299528a41c8f5baf74ecc541b7aa4e | \n", "060783327b0e977a61614fa2129a7328 | \n", "68321411ad37a3dccabe7902620ef7d0 | \n", "1900 | \n", "0 | \n", "58 | \n", "
19 | \n", "Evelyn | \n", "Johnston | \n", "95473fc56071e41d16b3b769a07d17ad | \n", "a22af5a670e749c4e8529a840088c372 | \n", "83d71d2e14d5de862d8bcd28c23c5417 | \n", "2000 | \n", "0 | \n", "29 | \n", "