Skip to content
This repository was archived by the owner on Feb 21, 2019. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
2dec81a
fix how 'next_inspection' could be null
crccheck Jan 31, 2015
4b421c3
document timing info for a data import
crccheck Jan 31, 2015
813925c
upgrade to django 1.7
crccheck Jan 31, 2015
e18111e
make parallel s3 upload the default
crccheck Jan 31, 2015
0be456d
shutup, django
crccheck Jan 31, 2015
624ae6c
update scraper for django 1.7 syntax
crccheck Jan 31, 2015
e60227c
silence s3 put so overall output is easier to follow
crccheck Mar 20, 2015
d517ae4
tweak postgres dump/restore docs
crccheck Jun 12, 2015
ab92185
switch to migrate instead of syncdb
crccheck Jun 12, 2015
c9b4d5c
tweak readme, delete heroku deploy instructions
crccheck Jun 12, 2015
1a97a6b
combine to one requirements.txt for simplicity
crccheck Jun 12, 2015
aaa25a2
Document import timing
Jun 12, 2015
d406eca
add a progress bar to the import
crccheck Jun 12, 2015
b0ed76c
bump django yet again to 1.8
crccheck Jun 12, 2015
4c50544
switch to dj-obj-update package to handle model update_or_create
crccheck Jun 12, 2015
9d322b3
add more progress bar indicators to data loaders
crccheck Jun 14, 2015
9b07f9c
reenable geopy
crccheck Jun 14, 2015
929b7ba
delete deprecated url templatetag
crccheck Jun 14, 2015
2d330ff
remove procfile since this ain't goin' on heroku
crccheck Jun 14, 2015
974f703
fix get_queryset/get_query_set
crccheck Jun 14, 2015
3ae9c19
workflow tweaks
crccheck Apr 1, 2016
4e19626
Refactor site download to be all Makefile
crccheck Apr 1, 2016
3f2d174
switch to regular aws-cli instead of s3-parallel-put since it's broken
crccheck Oct 28, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@ example_project/local_settings.py

# Randomly generated files
*.log
*.pid
*.pot
*.pyc
.DS_Store
.sass-cache
site/
._*


# Files generated by Heroku
Expand Down
88 changes: 31 additions & 57 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,63 +1,47 @@
PROJECT=./example_project
MANAGE=python $(PROJECT)/manage.py
SITE_URL=localhost:8000
PORT=14327

help:
@echo "make commands:"
@echo " make help - this help"
@echo " make test - run test suite"
@echo " make resetdb - drop and recreate the database"
@echo " make scrape - get data and import"
@echo " make import - import data"
@echo " make site - spider $(SITE_URL) and save pages locally"
@echo " make upload - sync spidered pages to S3"
@echo " make serve - serve the spided pages locally (on port 8088)"
help: ## Shows this help
@echo "$$(grep -h '#\{2\}' $(MAKEFILE_LIST) | sed 's/: #\{2\} / /' | column -t -s ' ')"


# TODO actually write some tests
test:
$(MANAGE) test tx_elevators


resetdb:
resetdb: ## Reset the dev database
$(MANAGE) reset_db --router=default --noinput
$(MANAGE) syncdb --noinput
$(MANAGE) migrate --noinput


# Backup the local database
#
# To restore
# cat tx_elevators-2014-08-31.dump | \
# docker run --rm --link postgis:postgis -t crccheck/postgis \
# pg_restore -U docker -h postgis --dbname elevators
dumpdb:
docker run --rm --link postgis:postgis -t crccheck/postgis \
pg_dump -U docker -h postgis -p 5432 -Fc elevators > tx_elevators-$$(date +"%Y-%m-%d").dump

# Dump building geocodes
#
# Note that `geocode` will still re-lookup bad addresses
#
# To restore: `django loadgeo data/geocoding.csv`
dumpgeo:
dumpgeo: ## Dump building geo data
$(MANAGE) dumpgeo > data/geocoding.csv

scrape:
scrape: ## Scrape new data
cd data && $(MAKE) $(MFLAGS) clean elevator_data_file.csv
python tx_elevators/scripts/scrape.py data/elevator_data_file.csv
@echo "should geocode the top 1000 too: $(MANAGE) geocode"


# timing for trivial import real 1m51.994s
# timing for a fresh import real 4m15.279s
import:
python tx_elevators/scripts/scrape.py data/elevator_data_file.csv


dbpush:
test $(SCP_DUMP)
test $(SCP_URL)
pg_dump -Fc --no-acl --no-owner tx_elevators > tx_elevators.dump
scp tx_elevators.dump $(SCP_DUMP)
heroku pgbackups:restore DATABASE $(SCP_URL)
rm tx_elevators.dump
web/start:
$(MANAGE) collectstatic --noinput
DEBUG=0 $(MANAGE) runserver $(PORT) --nothreading --noreload & echo $$! > web.pid
sleep 1

web/stop: web.pid
# pkill -P $$(cat web.pid)
kill $$(cat web.pid)
rm web.pid


# FINISHED --2013-04-01 00:10:54--
Expand All @@ -67,29 +51,19 @@ dbpush:
# FINISHED --2014-11-01 16:38:55--
# Total wall clock time: 9m 4s
# Downloaded: 25615 files, 120M in 0.8s (150 MB/s)
site:
bin/download_site.sh
#
# FINISHED --2016-04-01 04:48:54--
# Total wall clock time: 4m 59s
# Downloaded: 26757 files, 126M in 0.1s (971 MB/s)
site: web/start
mkdir -p ._site
cd ._site && wget -r localhost:$(PORT) --force-html -e robots=off -nH -nv --max-redirect 0 || true
@$(MAKE) web/stop

serve:
cd site && python -m SimpleHTTPServer 8088
cd ._site && python -m SimpleHTTPServer 8088

# 24340 files uploaded.
# 3 files skipped.
# real 200m23.933s

# 25611 files uploaded.
# 2662 files skipped.
# real 122m28.098s
upload:
LOGGING=WARN DEBUG=0 $(MANAGE) sync_s3 --dir site --gzip

# requires installing https://github.com/twpayne/s3-parallel-put
# uses 8 threads by default
#
# INFO:s3-parallel-put[statter-12800]:put 137686194 bytes in 28270 files in 697.4 seconds (197436 bytes/s, 40.5 files/s)
upload2:
cd site && s3-parallel-put --bucket=${AWS_BUCKET_NAME} \
--grant public-read --header "Cache-Control:max-age=2592000" --gzip .


.PHONY: help test resetdb scrape pushdb site upload serve
aws s3 sync ._site s3://$(AWS_BUCKET_NAME)/ \
--cache-control "max-age=2592000" \
--acl "public-read"
1 change: 0 additions & 1 deletion Procfile

This file was deleted.

34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
TX Elevators
============

Dev Setup
---------

Installing Requirements:

pip install -r requirements.txt


Using Postgresql instead of Sqlite as your database:

export DATABASE_URL='postgres:///tx_elevators'


Getting Data
------------

If you don't have a database set up, `DEBUG=1 make resetdb` will create one for
you. Running `make scrape` will download a fresh copy of the CSV and import the
data. Afterwards, you can run `manage.py geocode` to geocode the data.


Deploying to S3
---------------

Partial instructions for deploying to a [hosted site on S3]:

1. Make sure you're not in debug mode.
2. Make sure this project is running locally on `http://localhost:8000`.
3. Run `make site upload`

[hosted site on S3]: http://docs.aws.amazon.com/AmazonS3/latest/dev/WebsiteHosting.html
62 changes: 0 additions & 62 deletions README.rst

This file was deleted.

28 changes: 0 additions & 28 deletions bin/download_site.sh

This file was deleted.

7 changes: 3 additions & 4 deletions data/Makefile
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
CSV = https://www.license.state.tx.us/DownLoadableContent/Elevator/elevator_data_file.csv

# Source Elevator Data CSV:
# https://www.license.state.tx.us/ElevatorSearch/HelpPage.asp#data
elevator_data_file.csv:
wget https://www.license.state.tx.us/DownLoadableContent/Elevator/elevator_data_file.csv
mv $@ $@.orig
cat $@.orig | csvsort > $@
curl $(CSV) | csvsort > $@


sample_elevator_data_file.csv: elevator_data_file.csv
Expand All @@ -12,7 +12,6 @@ sample_elevator_data_file.csv: elevator_data_file.csv

clean:
rm -f elevator_data_file.csv
rm -f elevator_data_file.csv.orig


.PHONY: clean
6 changes: 3 additions & 3 deletions example_project/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,8 @@ def project_dir(*paths):
LOGGING = {
'version': 1,
'disable_existing_loggers': False,
# Log everything
'root': {
'level': env.get('LOGGING', 'DEBUG'),
'level': env.get('LOGGING', 'WARNING'),
'handlers': ['console'],
},
'filters': {
Expand Down Expand Up @@ -180,7 +179,6 @@ def project_dir(*paths):
# extra apps used for development
INSTALLED_APPS += [
'django_extensions',
'debug_toolbar',

'django.contrib.sessions',
'django.contrib.auth',
Expand All @@ -193,6 +191,8 @@ def project_dir(*paths):
'django.contrib.messages.middleware.MessageMiddleware',
]

# STFU DJANGO, STOP COMPLAINING
TEST_RUNNER = 'django.test.runner.DiscoverRunner'

try:
from .local_settings import *
Expand Down
11 changes: 0 additions & 11 deletions requirements-dev.txt

This file was deleted.

12 changes: 9 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Django==1.6.5
Django==1.8.2

dj-database-url>=0.2.1
dj-database-url==0.3.0
project_runpy
psycopg2>=2.4.5
gunicorn==0.17.2

boto==2.36.0
geopy==0.97.1
django-extensions==1.5.5
factory-boy
tqdm==1.0
dj-obj-update==0.2.0
6 changes: 5 additions & 1 deletion tx_elevators/management/commands/loadgeo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging

from django.core.management.base import BaseCommand
from tqdm import tqdm


class Command(BaseCommand):
Expand All @@ -17,8 +18,11 @@ def handle(self, path, *args, **options):
logger = logging.getLogger(__name__)

with open(path) as csvfile:
for total, row in enumerate(csvfile, start=1):
pass
csvfile.seek(0)
reader = csv.reader(csvfile)
for row in reader:
for row in tqdm(reader, total=total, leave=True):
elbi, latitude, longitude = row
building = Building.objects.filter(elbi=elbi).update(
latitude=latitude,
Expand Down
Loading