diff --git a/go.mod b/go.mod index db850a1..8646472 100644 --- a/go.mod +++ b/go.mod @@ -2,21 +2,21 @@ module github.com/UTDNebula/api-tools go 1.24.0 +replace github.com/UTDNebula/nebula-api/api v0.0.0-20251202050932-54b3ff061b27 => ..\nebula-api\api + require ( github.com/PuerkitoBio/goquery v1.8.1 github.com/UTDNebula/nebula-api/api v0.0.0-20251202050932-54b3ff061b27 //points to the compound-key branch of the nebula-api. - github.com/chromedp/cdproto v0.0.0-20250120090109-d38428e4d9c8 + github.com/chromedp/cdproto v0.0.0-20250120090109-d38428e4d9c8 github.com/chromedp/chromedp v0.12.1 github.com/google/go-cmp v0.7.0 github.com/joho/godotenv v1.5.1 github.com/valyala/fastjson v1.6.4 - go.mongodb.org/mongo-driver v1.17.3 - golang.org/x/net v0.43.0 + go.mongodb.org/mongo-driver v1.17.4 + golang.org/x/net v0.47.0 google.golang.org/genai v1.32.0 ) - - require ( cel.dev/expr v0.19.2 // indirect cloud.google.com/go v0.118.3 // indirect @@ -29,7 +29,6 @@ require ( github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.25.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.51.0 // indirect github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 // indirect - github.com/KyleBanks/depth v1.2.1 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect github.com/bytedance/gopkg v0.1.3 // indirect github.com/bytedance/sonic v1.14.1 // indirect @@ -42,27 +41,10 @@ require ( github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/gabriel-vasile/mimetype v1.4.10 // indirect - github.com/getsentry/sentry-go v0.33.0 // indirect - github.com/getsentry/sentry-go/gin v0.33.0 // indirect github.com/gin-contrib/sse v1.1.0 // indirect github.com/gin-gonic/gin v1.10.1 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/go-openapi/jsonpointer v0.22.0 // indirect - github.com/go-openapi/jsonreference v0.21.1 // indirect - github.com/go-openapi/spec v0.21.0 // indirect - github.com/go-openapi/swag v0.24.1 // indirect - github.com/go-openapi/swag/cmdutils v0.24.0 // indirect - github.com/go-openapi/swag/conv v0.24.0 // indirect - github.com/go-openapi/swag/fileutils v0.24.0 // indirect - github.com/go-openapi/swag/jsonname v0.24.0 // indirect - github.com/go-openapi/swag/jsonutils v0.24.0 // indirect - github.com/go-openapi/swag/loading v0.24.0 // indirect - github.com/go-openapi/swag/mangling v0.24.0 // indirect - github.com/go-openapi/swag/netutils v0.24.0 // indirect - github.com/go-openapi/swag/stringutils v0.24.0 // indirect - github.com/go-openapi/swag/typeutils v0.24.0 // indirect - github.com/go-openapi/swag/yamlutils v0.24.0 // indirect github.com/go-playground/locales v0.14.1 // indirect github.com/go-playground/universal-translator v0.18.1 // indirect github.com/go-playground/validator/v10 v10.27.0 // indirect @@ -89,9 +71,6 @@ require ( github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/stretchr/testify v1.11.1 // indirect - github.com/swaggo/files v1.0.1 // indirect - github.com/swaggo/gin-swagger v1.6.1 // indirect - github.com/swaggo/swag v1.16.6 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.3.0 // indirect github.com/xdg-go/pbkdf2 v1.0.0 // indirect @@ -108,14 +87,12 @@ require ( go.opentelemetry.io/otel/sdk/metric v1.34.0 // indirect go.opentelemetry.io/otel/trace v1.34.0 // indirect golang.org/x/arch v0.21.0 // indirect - golang.org/x/crypto v0.42.0 // indirect - golang.org/x/mod v0.28.0 // indirect + golang.org/x/crypto v0.45.0 // indirect golang.org/x/oauth2 v0.28.0 // indirect - golang.org/x/sync v0.17.0 // indirect - golang.org/x/sys v0.36.0 // indirect - golang.org/x/text v0.29.0 // indirect + golang.org/x/sync v0.18.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/text v0.31.0 // indirect golang.org/x/time v0.10.0 // indirect - golang.org/x/tools v0.36.0 // indirect google.golang.org/api v0.224.0 // indirect google.golang.org/genproto v0.0.0-20250303144028-a0af3efb3deb // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect diff --git a/go.sum b/go.sum index 946358f..955b664 100644 --- a/go.sum +++ b/go.sum @@ -28,14 +28,8 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0 github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.51.0/go.mod h1:SZiPHWGOOk3bl8tkevxkoiwPgsIl6CwrWcbwjfHZpdM= github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0 h1:6/0iUd0xrnX7qt+mLNRwg5c0PGv8wpE8K90ryANQwMI= github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0/go.mod h1:otE2jQekW/PqXk1Awf5lmfokJx4uwuqcj1ab5SpGeW0= -github.com/KyleBanks/depth v1.2.1 h1:5h8fQADFrWtarTdtDudMmGsC7GPbOAu6RVB3ffsVFHc= -github.com/KyleBanks/depth v1.2.1/go.mod h1:jzSb9d0L43HxTQfT+oSA1EEp2q+ne2uh6XgeJcm8brE= github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= -github.com/UTDNebula/nebula-api/api v0.0.0-20251018005009-dd2dbf5b78dc h1:SHhxrjeG4/mIwSiY8Tx0u2IQ7xfr9rq+FSeqSE1Fcbc= -github.com/UTDNebula/nebula-api/api v0.0.0-20251018005009-dd2dbf5b78dc/go.mod h1:YSzlxyHwsPqohD61L16N87D2J4en8bmwsKm78qgyF7s= -github.com/UTDNebula/nebula-api/api v0.0.0-20251202050932-54b3ff061b27 h1:CMJgNHhCmLfPHPB5WpxYCJKar7AtmeiDAmmDR3P67HU= -github.com/UTDNebula/nebula-api/api v0.0.0-20251202050932-54b3ff061b27/go.mod h1:YSzlxyHwsPqohD61L16N87D2J4en8bmwsKm78qgyF7s= github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M= @@ -71,10 +65,6 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/gabriel-vasile/mimetype v1.4.10 h1:zyueNbySn/z8mJZHLt6IPw0KoZsiQNszIpU+bX4+ZK0= github.com/gabriel-vasile/mimetype v1.4.10/go.mod h1:d+9Oxyo1wTzWdyVUPMmXFvp4F9tea18J8ufA774AB3s= -github.com/getsentry/sentry-go v0.33.0 h1:YWyDii0KGVov3xOaamOnF0mjOrqSjBqwv48UEzn7QFg= -github.com/getsentry/sentry-go v0.33.0/go.mod h1:C55omcY9ChRQIUcVcGcs+Zdy4ZpQGvNJ7JYHIoSWOtE= -github.com/getsentry/sentry-go/gin v0.33.0 h1:8oH2ydD/EeKlDMoENhCtq3rJ7EcQtjfdzrMM1Ku2J3I= -github.com/getsentry/sentry-go/gin v0.33.0/go.mod h1:uwqds9mMjBCn/+WcgtX0z+mBq487macTzDxNljM+/g8= github.com/gin-contrib/sse v1.1.0 h1:n0w2GMuUpWDVp7qSpvze6fAu9iRxJY4Hmj6AmBOU05w= github.com/gin-contrib/sse v1.1.0/go.mod h1:hxRZ5gVpWMT7Z0B0gSNYqqsSCNIJMjzvm6fqCz9vjwM= github.com/gin-gonic/gin v1.10.1 h1:T0ujvqyCSqRopADpgPgiTT63DUQVSfojyME59Ei63pQ= @@ -84,36 +74,6 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-openapi/jsonpointer v0.22.0 h1:TmMhghgNef9YXxTu1tOopo+0BGEytxA+okbry0HjZsM= -github.com/go-openapi/jsonpointer v0.22.0/go.mod h1:xt3jV88UtExdIkkL7NloURjRQjbeUgcxFblMjq2iaiU= -github.com/go-openapi/jsonreference v0.21.1 h1:bSKrcl8819zKiOgxkbVNRUBIr6Wwj9KYrDbMjRs0cDA= -github.com/go-openapi/jsonreference v0.21.1/go.mod h1:PWs8rO4xxTUqKGu+lEvvCxD5k2X7QYkKAepJyCmSTT8= -github.com/go-openapi/spec v0.21.0 h1:LTVzPc3p/RzRnkQqLRndbAzjY0d0BCL72A6j3CdL9ZY= -github.com/go-openapi/spec v0.21.0/go.mod h1:78u6VdPw81XU44qEWGhtr982gJ5BWg2c0I5XwVMotYk= -github.com/go-openapi/swag v0.24.1 h1:DPdYTZKo6AQCRqzwr/kGkxJzHhpKxZ9i/oX0zag+MF8= -github.com/go-openapi/swag v0.24.1/go.mod h1:sm8I3lCPlspsBBwUm1t5oZeWZS0s7m/A+Psg0ooRU0A= -github.com/go-openapi/swag/cmdutils v0.24.0 h1:KlRCffHwXFI6E5MV9n8o8zBRElpY4uK4yWyAMWETo9I= -github.com/go-openapi/swag/cmdutils v0.24.0/go.mod h1:uxib2FAeQMByyHomTlsP8h1TtPd54Msu2ZDU/H5Vuf8= -github.com/go-openapi/swag/conv v0.24.0 h1:ejB9+7yogkWly6pnruRX45D1/6J+ZxRu92YFivx54ik= -github.com/go-openapi/swag/conv v0.24.0/go.mod h1:jbn140mZd7EW2g8a8Y5bwm8/Wy1slLySQQ0ND6DPc2c= -github.com/go-openapi/swag/fileutils v0.24.0 h1:U9pCpqp4RUytnD689Ek/N1d2N/a//XCeqoH508H5oak= -github.com/go-openapi/swag/fileutils v0.24.0/go.mod h1:3SCrCSBHyP1/N+3oErQ1gP+OX1GV2QYFSnrTbzwli90= -github.com/go-openapi/swag/jsonname v0.24.0 h1:2wKS9bgRV/xB8c62Qg16w4AUiIrqqiniJFtZGi3dg5k= -github.com/go-openapi/swag/jsonname v0.24.0/go.mod h1:GXqrPzGJe611P7LG4QB9JKPtUZ7flE4DOVechNaDd7Q= -github.com/go-openapi/swag/jsonutils v0.24.0 h1:F1vE1q4pg1xtO3HTyJYRmEuJ4jmIp2iZ30bzW5XgZts= -github.com/go-openapi/swag/jsonutils v0.24.0/go.mod h1:vBowZtF5Z4DDApIoxcIVfR8v0l9oq5PpYRUuteVu6f0= -github.com/go-openapi/swag/loading v0.24.0 h1:ln/fWTwJp2Zkj5DdaX4JPiddFC5CHQpvaBKycOlceYc= -github.com/go-openapi/swag/loading v0.24.0/go.mod h1:gShCN4woKZYIxPxbfbyHgjXAhO61m88tmjy0lp/LkJk= -github.com/go-openapi/swag/mangling v0.24.0 h1:PGOQpViCOUroIeak/Uj/sjGAq9LADS3mOyjznmHy2pk= -github.com/go-openapi/swag/mangling v0.24.0/go.mod h1:Jm5Go9LHkycsz0wfoaBDkdc4CkpuSnIEf62brzyCbhc= -github.com/go-openapi/swag/netutils v0.24.0 h1:Bz02HRjYv8046Ycg/w80q3g9QCWeIqTvlyOjQPDjD8w= -github.com/go-openapi/swag/netutils v0.24.0/go.mod h1:WRgiHcYTnx+IqfMCtu0hy9oOaPR0HnPbmArSRN1SkZM= -github.com/go-openapi/swag/stringutils v0.24.0 h1:i4Z/Jawf9EvXOLUbT97O0HbPUja18VdBxeadyAqS1FM= -github.com/go-openapi/swag/stringutils v0.24.0/go.mod h1:5nUXB4xA0kw2df5PRipZDslPJgJut+NjL7D25zPZ/4w= -github.com/go-openapi/swag/typeutils v0.24.0 h1:d3szEGzGDf4L2y1gYOSSLeK6h46F+zibnEas2Jm/wIw= -github.com/go-openapi/swag/typeutils v0.24.0/go.mod h1:q8C3Kmk/vh2VhpCLaoR2MVWOGP8y7Jc8l82qCTd1DYI= -github.com/go-openapi/swag/yamlutils v0.24.0 h1:bhw4894A7Iw6ne+639hsBNRHg9iZg/ISrOVr+sJGp4c= -github.com/go-openapi/swag/yamlutils v0.24.0/go.mod h1:DpKv5aYuaGm/sULePoeiG8uwMpZSfReo1HR3Ik0yaG8= github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= @@ -197,12 +157,6 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= -github.com/swaggo/files v1.0.1 h1:J1bVJ4XHZNq0I46UU90611i9/YzdrF7x92oX1ig5IdE= -github.com/swaggo/files v1.0.1/go.mod h1:0qXmMNH6sXNf+73t65aKeB+ApmgxdnkQzVTAj2uaMUg= -github.com/swaggo/gin-swagger v1.6.1 h1:Ri06G4gc9N4t4k8hekMigJ9zKTFSlqj/9paAQCQs7cY= -github.com/swaggo/gin-swagger v1.6.1/go.mod h1:LQ+hJStHakCWRiK/YNYtJOu4mR2FP+pxLnILT/qNiTw= -github.com/swaggo/swag v1.16.6 h1:qBNcx53ZaX+M5dxVyTrgQ0PJ/ACK+NzhwcbieTt+9yI= -github.com/swaggo/swag v1.16.6/go.mod h1:ngP2etMK5a0P3QBizic5MEwpRmluJZPHjXcMoj4Xesg= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.3.0 h1:Qd2W2sQawAfG8XSvzwhBeoGq71zXOC/Q1E9y/wUcsUA= @@ -218,8 +172,8 @@ github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gi github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 h1:ilQV1hzziu+LLM3zUTJ0trRztfwgjqKnBWNtSRkbmwM= github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfSfmXjznFBSZNN13rSJjlIOI1fUNAtF7rmI= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -go.mongodb.org/mongo-driver v1.17.3 h1:TQyXhnsWfWtgAhMtOgtYHMTkZIfBTpMTsMnd9ZBeHxQ= -go.mongodb.org/mongo-driver v1.17.3/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= +go.mongodb.org/mongo-driver v1.17.4 h1:jUorfmVzljjr0FLzYQsGP8cgN/qzzxlY9Vh0C9KFXVw= +go.mongodb.org/mongo-driver v1.17.4/go.mod h1:Hy04i7O2kC4RS06ZrhPRqj/u4DTYkFDAAccj+rVKqgQ= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/contrib/detectors/gcp v1.34.0 h1:JRxssobiPg23otYU5SbWtQC//snGVIM3Tx6QRzlQBao= @@ -244,24 +198,22 @@ golang.org/x/arch v0.21.0 h1:iTC9o7+wP6cPWpDWkivCvQFGAHDQ59SrSxsLPcnkArw= golang.org/x/arch v0.21.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI= -golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= -golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE= -golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc= golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= -golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -270,8 +222,8 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= -golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -281,15 +233,13 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= -golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= golang.org/x/time v0.10.0 h1:3usCWA8tQn0L8+hFJQNgzpWbd89begxN66o1Ojdn5L4= golang.org/x/time v0.10.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= -golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.224.0 h1:Ir4UPtDsNiwIOHdExr3fAj4xZ42QjK7uQte3lORLJwU= google.golang.org/api v0.224.0/go.mod h1:3V39my2xAGkodXy0vEqcEtkqgw2GtrFL5WuBZlCTCOQ= diff --git a/main.go b/main.go index 7d0af33..9ba21b6 100644 --- a/main.go +++ b/main.go @@ -36,8 +36,6 @@ func main() { // Flag for profile scraping scrapeProfiles := flag.Bool("profiles", false, "Alongside -scrape, signifies that professor profiles should be scraped.") - // Flag for soc scraping - scrapeOrganizations := flag.Bool("organizations", false, "Alongside -scrape, signifies that SOC organizations should be scraped.") // Flag for calendar scraping and parsing cometCalendar := flag.Bool("cometCalendar", false, "Alongside -scrape or -parse, signifies that the Comet Calendar should be scraped/parsed.") // Flag for astra scraping and parsing @@ -106,8 +104,6 @@ func main() { log.Panic("No term specified for coursebook scraping! Use -term to specify.") } scrapers.ScrapeCoursebook(*term, *startPrefix, *outDir, *resume) - case *scrapeOrganizations: - scrapers.ScrapeOrganizations(*outDir) case *cometCalendar: scrapers.ScrapeCometCalendar(*outDir) case *astra: diff --git a/parser/cometCalendarParser.go b/parser/cometCalendarParser.go index fd23056..4575b6c 100644 --- a/parser/cometCalendarParser.go +++ b/parser/cometCalendarParser.go @@ -1,3 +1,7 @@ +/* + This file contains the code for the comet calendar events parser. +*/ + package parser import ( @@ -8,14 +12,16 @@ import ( "regexp" "slices" "strings" + "time" + "github.com/UTDNebula/api-tools/scrapers" "github.com/UTDNebula/api-tools/utils" "github.com/UTDNebula/nebula-api/api/schema" ) // Some events have only the building name, not the abbreviation // Maps building names to their abbreviations -var buildingAbbreviations = map[string]string{ +var DefaultBuildings = map[string]string{ "Activity Center": "AB", "Activity Center Bookstore": "ACB", "Administration": "AD", @@ -74,7 +80,7 @@ var buildingAbbreviations = map[string]string{ } // Valid building abreviations for checking -var validAbbreviations []string = []string{ +var DefaultValid []string = []string{ "AB", "ACB", "AD", @@ -146,6 +152,11 @@ func ParseCometCalendar(inDir string, outDir string) { } multiBuildingMap := make(map[string]map[string]map[string][]schema.Event) + // Some events have only the building name, not the abbreviation + buildingAbbreviations, validAbbreviations, err := getLocationAbbreviations(inDir) + if err != nil { + panic(err) + } for _, event := range allEvents { @@ -239,3 +250,52 @@ func ParseCometCalendar(inDir string, outDir string) { utils.WriteJSON(fmt.Sprintf("%s/cometCalendar.json", outDir), result) } + +// getAbbreviations dynamically retrieves the all of the locations abbreviations +func getLocationAbbreviations(inDir string) (map[string]string, []string, error) { + // Get the locations from the map scraper + var mapFile []byte + + mapFile, err := os.ReadFile(inDir + "/mapLocations.json") + if err != nil { + if os.IsNotExist(err) { + // Scrape the data if the it doesn't exist yet and then get the map file + scrapers.ScrapeMapLocations(inDir) + time.Sleep(2 * time.Second) + ParseMapLocations(inDir, inDir) + time.Sleep(2 * time.Second) + + // If fail to get the locations again, it's not because location is unscraped + mapFile, err = os.ReadFile(inDir + "/mapLocations.json") + if err != nil { + return nil, nil, err + } + } else { + return nil, nil, err + } + } + + var locations []schema.MapBuilding + if err = json.Unmarshal(mapFile, &locations); err != nil { + return nil, nil, err + } + + // Process the abbreviations + buildingsAbbreviations := make(map[string]string, 0) // Maps building names to their abbreviations + validAbbreviations := make([]string, 0) // Valid building abreviations for checking + + for _, location := range locations { + // Trim the following acronym in the name + trimmedName := strings.Split(*location.Name, " (")[0] + // Fallback on the locations that have no acronyms + abbreviation := "" + if location.Acronym != nil { + abbreviation = *location.Acronym + } + + buildingsAbbreviations[trimmedName] = abbreviation + validAbbreviations = append(validAbbreviations, abbreviation) + } + + return buildingsAbbreviations, validAbbreviations, nil +} diff --git a/parser/parser_test.go b/parser/parser_test.go index 35b92c3..7ffca8e 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -361,7 +361,7 @@ func TestParse(t *testing.T) { if outputProfessor, ok := ProfessorsByKey[key]; ok { diff := cmp.Diff(expectedProfessor, outputProfessor, - cmpopts.IgnoreFields(schema.Professor{}, "Id"), + cmpopts.IgnoreFields(schema.Professor{}, "Id", "Key", "Section_keys"), cmp.Transformer("Sections", func(sections []primitive.ObjectID) []string { result := make([]string, 0, len(sections)) for _, id := range sections { @@ -411,7 +411,7 @@ func TestParse(t *testing.T) { t.Run(key, func(t *testing.T) { if outputSection, ok := SectionsByClass[key]; ok { diff := cmp.Diff(expectedSection, outputSection, - cmpopts.IgnoreFields(schema.Section{}, "Id", "Key", "Course_key"), + cmpopts.IgnoreFields(schema.Section{}, "Id", "Key", "Course_key", "Professor_keys"), cmp.Transformer("Professors", func(profIds []primitive.ObjectID) []string { result := make([]string, 0, len(profIds)) for _, id := range profIds { diff --git a/parser/professorParser.go b/parser/professorParser.go index 40eb3e5..0360ada 100644 --- a/parser/professorParser.go +++ b/parser/professorParser.go @@ -10,10 +10,10 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) -func parseProfessors(sectionId primitive.ObjectID, rowInfo map[string]*goquery.Selection) []primitive.ObjectID { +func parseProfessors(sectionId schema.SectionKey, rowInfo map[string]*goquery.Selection) []schema.ProfessorKey { professorText := utils.TrimWhitespace(rowInfo["Instructor(s):"].Text()) professorMatches := personRegexp.FindAllStringSubmatch(professorText, -1) - var profRefs []primitive.ObjectID = make([]primitive.ObjectID, 0, len(professorMatches)) + var profRefs []schema.ProfessorKey = make([]schema.ProfessorKey, 0, len(professorMatches)) for _, match := range professorMatches { nameStr := utils.TrimWhitespace(match[1]) @@ -29,10 +29,15 @@ func parseProfessors(sectionId primitive.ObjectID, rowInfo map[string]*goquery.S profKey := firstName + lastName + professorKey := schema.ProfessorKey { + First_name: firstName, + Last_name: lastName, + } + prof, profExists := Professors[profKey] if profExists { - prof.Sections = append(prof.Sections, sectionId) - profRefs = append(profRefs, prof.Id) + prof.Section_keys = append(prof.Section_keys, sectionId) + profRefs = append(profRefs, professorKey) continue } @@ -40,10 +45,11 @@ func parseProfessors(sectionId primitive.ObjectID, rowInfo map[string]*goquery.S prof.Id = primitive.NewObjectID() prof.First_name = firstName prof.Last_name = lastName + prof.Key = professorKey prof.Titles = []string{utils.TrimWhitespace(match[2])} prof.Email = utils.TrimWhitespace(match[3]) - prof.Sections = []primitive.ObjectID{sectionId} - profRefs = append(profRefs, prof.Id) + prof.Section_keys = []schema.SectionKey{sectionId} + profRefs = append(profRefs, professorKey) Professors[profKey] = prof ProfessorIDMap[prof.Id] = profKey } diff --git a/parser/sectionParser.go b/parser/sectionParser.go index b150276..14bc835 100644 --- a/parser/sectionParser.go +++ b/parser/sectionParser.go @@ -71,7 +71,7 @@ func parseSection(rowInfo map[string]*goquery.Selection, classInfo map[string]st Section_number: sectionNumber, Course_key: courseKey, Academic_session: session, - Professors: parseProfessors(id, rowInfo), + Professor_keys: parseProfessors(sectionKey, rowInfo), Teaching_assistants: getTeachingAssistants(rowInfo), Internal_class_number: classNum, Instruction_mode: getInstructionMode(classInfo), diff --git a/parser/validator.go b/parser/validator.go index a5aff4c..bc2ba45 100644 --- a/parser/validator.go +++ b/parser/validator.go @@ -6,7 +6,6 @@ import ( "github.com/UTDNebula/api-tools/utils" "github.com/UTDNebula/nebula-api/api/schema" - "go.mongodb.org/mongo-driver/bson/primitive" ) // Main validation, putting everything together @@ -29,6 +28,11 @@ func validate() { courseByKey[course.Key] = course } + profByKey := make(map[schema.ProfessorKey]*schema.Professor) + for _, professor := range Professors { + profByKey[professor.Key] = professor + } + log.Printf("\nValidating courses...") courseKeys := utils.GetMapKeys(Courses) for i := range len(courseKeys) { @@ -54,7 +58,7 @@ func validate() { valDuplicateSections(section1, section2) } // Make sure section isn't referencing any nonexistent professors, and that section-professor references are consistent both ways - valSectionReferenceProf(section1, Professors, ProfessorIDMap) + valSectionReferenceProf(section1, profByKey) // Make sure section isn't referencing a nonexistant course valSectionReferenceCourse(section1, courseByKey) @@ -106,7 +110,7 @@ func valCourseReference(course *schema.Course, sections map[schema.SectionKey]*s // Validate if the sections are duplicate func valDuplicateSections(section1 *schema.Section, section2 *schema.Section) { - if section1.Key == section2.Key && section1.Academic_session == section2.Academic_session { + if section1.Key == section2.Key { log.Print("Duplicate section found!") log.Printf("Section 1: %v\n\nSection 2: %v", section1, section2) log.Panic("Sections failed to validate!") @@ -114,20 +118,20 @@ func valDuplicateSections(section1 *schema.Section, section2 *schema.Section) { } // Validate section reference to professor -func valSectionReferenceProf(section *schema.Section, profs map[string]*schema.Professor, profIDMap map[primitive.ObjectID]string) { - for _, profID := range section.Professors { - professorKey, exists := profIDMap[profID] +func valSectionReferenceProf(section *schema.Section, profs map[schema.ProfessorKey]*schema.Professor) { + for _, profKey := range section.Professor_keys { + professor, exists := profs[profKey] // validate if the section references to some prof not in the parsed professors if !exists { log.Printf("Nonexistent professor reference found for section ID %s!", section.Id) - log.Printf("Referenced professor ID: %s", profID) + log.Printf("Referenced professor key: %v", profKey) log.Panic("Sections failed to validate!") } // validate if the referenced professor references back to section - if !slices.Contains(profs[professorKey].Sections, section.Id) { + if !slices.Contains(professor.Section_keys, section.Key) { log.Printf("Inconsistent professor reference found for section ID %s! The section references the professor, but not vice-versa!", section.Id) - log.Printf("Referenced professor ID: %s", profID) + log.Printf("Referenced professor key: %v", professor.Key) log.Panic("Sections failed to validate!") } } @@ -146,7 +150,7 @@ func valSectionReferenceCourse(section *schema.Section, coursesByKey map[schema. // Validate if the professors are duplicate func valDuplicateProfs(prof1 *schema.Professor, prof2 *schema.Professor) { - if prof1.First_name == prof2.First_name && prof1.Last_name == prof2.Last_name && prof1.Profile_uri == prof2.Profile_uri { + if prof1.Key == prof2.Key && prof1.Profile_uri == prof2.Profile_uri { log.Printf("Duplicate professor found!") log.Printf("Professor 1: %v\n\nProfessor 2: %v", prof1, prof2) log.Panic("Professors failed to validate!") diff --git a/parser/validator_test.go b/parser/validator_test.go index ef0f503..badd024 100644 --- a/parser/validator_test.go +++ b/parser/validator_test.go @@ -10,7 +10,6 @@ import ( "testing" "github.com/UTDNebula/nebula-api/api/schema" - "go.mongodb.org/mongo-driver/bson/primitive" ) // Globals for testing these validation units @@ -19,7 +18,16 @@ var testSections []*schema.Section var testProfessors []*schema.Professor // Map index of test sections to test courses -var indexMap map[int]int +var sectionCourseMap map[int]int + +// Map professor to sections +var profSectionMap map[int][]int + +// Map sections to professor +var sectionProfMap map[int][]int + +// Map courses to sections +var courseSectionMap map[int][]int func init() { // Parse the test courses @@ -52,8 +60,94 @@ func init() { panic(err) } - // The correct mapping - indexMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4} + // The correct mapping between sections and courses + sectionCourseMap = map[int]int{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 4} + + // Mapping between professor and sections + profSectionMap = map[int][]int{0: {4, 5}, 1: {4, 5}, 2:{0}, 3:{1}} + + // Reverse mappings + courseSectionMap = map[int][]int{} + for sectionIndex, courseIndex := range sectionCourseMap { + courseSectionMap[courseIndex] = append(courseSectionMap[courseIndex], sectionIndex) + } + + sectionProfMap = map[int][]int{} + for profIndex, sections := range profSectionMap { + for _, sectionIndex := range sections { + sectionProfMap[sectionIndex] = append(sectionProfMap[sectionIndex], profIndex) + } + } + + // Set up keys for courses + for i := range testCourses { + testCourses[i].Key = schema.CourseKey { + Course_number: testCourses[i].Course_number, + Catalog_year: testCourses[i].Catalog_year, + Subject_prefix: testCourses[i].Subject_prefix, + } + + sectionKeys := []schema.SectionKey{} + for _, sectionIndex := range courseSectionMap[i] { + sectionKey := schema.SectionKey { + Section_number: testSections[sectionIndex].Section_number, + Term: testSections[sectionIndex].Academic_session.Name, + Course_number: testCourses[i].Course_number, + Catalog_year: testCourses[i].Catalog_year, + Subject_prefix: testCourses[i].Subject_prefix, + } + sectionKeys = append(sectionKeys, sectionKey) + } + testCourses[i].Section_keys = sectionKeys + } + + // Set up keys for sections + for i := range testSections { + testSections[i].Key = schema.SectionKey { + Section_number: testSections[i].Section_number, + Term: testSections[i].Academic_session.Name, + Course_number: testCourses[sectionCourseMap[i]].Course_number, + Catalog_year: testCourses[sectionCourseMap[i]].Catalog_year, + Subject_prefix: testCourses[sectionCourseMap[i]].Subject_prefix, + } + + testSections[i].Course_key = schema.CourseKey { + Course_number: testCourses[sectionCourseMap[i]].Course_number, + Catalog_year: testCourses[sectionCourseMap[i]].Catalog_year, + Subject_prefix: testCourses[sectionCourseMap[i]].Subject_prefix, + } + + professorKeys := []schema.ProfessorKey{} + for _, professorIndex := range sectionProfMap[i] { + professorKey := schema.ProfessorKey { + First_name: testProfessors[professorIndex].First_name, + Last_name: testProfessors[professorIndex].Last_name, + } + professorKeys = append(professorKeys, professorKey) + } + testSections[i].Professor_keys = professorKeys + } + + // Set up keys for professors + for i := range testProfessors { + testProfessors[i].Key = schema.ProfessorKey { + First_name: testProfessors[i].First_name, + Last_name: testProfessors[i].Last_name, + } + + sectionKeys := []schema.SectionKey{} + for _, sectionIndex := range profSectionMap[i] { + sectionKey := schema.SectionKey { + Section_number: testSections[sectionIndex].Section_number, + Term: testSections[sectionIndex].Academic_session.Name, + Course_number: testCourses[sectionCourseMap[sectionIndex]].Course_number, + Catalog_year: testCourses[sectionCourseMap[sectionIndex]].Catalog_year, + Subject_prefix: testCourses[sectionCourseMap[sectionIndex]].Subject_prefix, + } + sectionKeys = append(sectionKeys, sectionKey) + } + testProfessors[i].Section_keys = sectionKeys + } } // Test duplicate courses. Designed for fail cases @@ -175,13 +269,15 @@ func TestCourseReferencePass(t *testing.T) { // Test section reference to professor, designed for pass case // TestSectionReferenceProfPass ensures section professor references are mutual. func TestSectionReferenceProfPass(t *testing.T) { - // Build profIDMap & profs - profIDMap := make(map[primitive.ObjectID]string) - profs := make(map[string]*schema.Professor) + // Build profs maps + profs := make(map[schema.ProfessorKey]*schema.Professor) for _, professor := range testProfessors { - profIDMap[professor.Id] = professor.First_name + professor.Last_name - profs[professor.First_name+professor.Last_name] = professor + profKey := schema.ProfessorKey { + First_name: professor.First_name, + Last_name: professor.Last_name, + } + profs[profKey] = professor } var logBuffer bytes.Buffer @@ -189,7 +285,6 @@ func TestSectionReferenceProfPass(t *testing.T) { defer func() { logOutput := logBuffer.String() - if logOutput != "" { t.Errorf("Expected nothing printed in log") } @@ -199,21 +294,22 @@ func TestSectionReferenceProfPass(t *testing.T) { }() for _, section := range testSections { - valSectionReferenceProf(section, profs, profIDMap) + valSectionReferenceProf(section, profs) } } // Test section reference to professors, designed for fail case // TestSectionReferenceProfFail catches missing professor back-references. func TestSectionReferenceProfFail(t *testing.T) { - - profIDMap := make(map[primitive.ObjectID]string) - profs := make(map[string]*schema.Professor) + profs := make(map[schema.ProfessorKey]*schema.Professor) for i, professor := range testProfessors { if i != 0 { - profIDMap[professor.Id] = professor.First_name + professor.Last_name - profs[professor.First_name+professor.Last_name] = professor + profKey := schema.ProfessorKey { + First_name: professor.First_name, + Last_name: professor.Last_name, + } + profs[profKey] = professor } } @@ -222,10 +318,9 @@ func TestSectionReferenceProfFail(t *testing.T) { defer func() { logOutput := logBuffer.String() - for _, msg := range []string{ "Nonexistent professor reference found for section ID ObjectID(\"67d07ee0c972c18731e23bea\")!", - "Referenced professor ID: ObjectID(\"67d07ee0c972c18731e23beb\")", + "Referenced professor key: {Naim Bugra Ozel}", } { if !strings.Contains(logOutput, msg) { t.Errorf("The function didn't log correct message. Expected \"%v\"", msg) @@ -242,7 +337,7 @@ func TestSectionReferenceProfFail(t *testing.T) { }() for _, section := range testSections { - valSectionReferenceProf(section, profs, profIDMap) + valSectionReferenceProf(section, profs) } } @@ -259,7 +354,6 @@ func TestSectionReferenceCourse(t *testing.T) { defer func() { logOutput := logBuffer.String() - if logOutput != "" { t.Errorf("Expected nothing printed in log") } @@ -376,3 +470,93 @@ func testDuplicatePass(objType string, ix1 int, ix2 int, t *testing.T) { } // fail = "missing" means it lacks one sections +// fail = "modified" means one section's course reference has been modified +// For course-section, no longer needed +// func testCourseReferenceFail(fail string, courseIx int, sectionIx int, t *testing.T) { +// sectionMap := make(map[primitive.ObjectID]*schema.Section) + +// var sectionID, originalID primitive.ObjectID // used to store IDs of modified sections + +// // Build the failed section map based on fail type +// switch fail { +// case "missing": +// // Misses a section +// for i, section := range testSections { +// if sectionIx != i { +// sectionMap[section.Id] = section +// } else { +// sectionID = section.Id // Nonexistent ID referenced by course +// } +// } +// case "modified": +// // One section doesn't reference to correct courses +// for i, section := range testSections { +// sectionMap[section.Id] = section +// if sectionIx == i { +// // Save the section ID and original course reference to be restored later on +// sectionID = section.Id +// originalID = section.Course_reference + +// // Modified part +// sectionMap[section.Id].Course_reference = primitive.NewObjectID() +// } +// } +// } + +// // Expected msgs +// var expectedMsgs []string + +// // The course that references nonexistent stuff +// var failCourse *schema.Course + +// if fail == "missing" { +// failCourse = testCourses[courseIx] + +// expectedMsgs = []string{ +// fmt.Sprintf("Nonexistent section reference found for %v%v!", failCourse.Subject_prefix, failCourse.Course_number), +// fmt.Sprintf("Referenced section ID: %s\nCourse ID: %s", sectionID, failCourse.Id), +// } +// } else { +// failCourse = testCourses[courseIx] +// failSection := testSections[sectionIx] + +// expectedMsgs = []string{ +// fmt.Sprintf("Inconsistent section reference found for %v%v! The course references the section, but not vice-versa!", +// failCourse.Subject_prefix, failCourse.Course_number), +// fmt.Sprintf("Referenced section ID: %s\nCourse ID: %s\nSection course reference: %s", +// failSection.Id, failCourse.Id, failSection.Course_reference), +// } +// } + +// // Buffer to capture the output +// var logBuffer bytes.Buffer +// log.SetOutput(&logBuffer) + +// defer func() { +// logOutput := logBuffer.String() + +// for _, msg := range expectedMsgs { +// if !strings.Contains(logOutput, msg) { +// t.Errorf("The function didn't log correct message. Expected \"%v\"", msg) +// } +// } + +// // Restore to original course reference of modified section (if needed) +// if fail == "modified" { +// sectionMap[sectionID].Course_reference = originalID +// } + +// if r := recover(); r == nil { +// t.Errorf("The function didn't panic") +// } else { +// if r != "Courses failed to validate!" { +// t.Errorf("The function panic the wrong message") +// } +// } +// }() + +// // Run func +// for _, course := range testCourses { +// valCourseReference(course, sectionMap) +// } +// } \ No newline at end of file diff --git a/scrapers/astra.go b/scrapers/astra.go index 849f2d8..0eb0a15 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -46,16 +46,13 @@ func ScrapeAstra(outDir string) { time.Sleep(500 * time.Millisecond) cancel() // Don't need chromedp anymore - // Starting date date := time.Now() + endDate := date.Add(time.Hour * 24 * 365) // Start on previous date to make sure we have today's data, regardless of what timezone the scraper is in date = date.Add(time.Hour * -24) - // Stop condition - lt10EventsCount := 0 - - // Run until 90 days of no events - for lt10EventsCount < 90 { + // Run for a year + for ; date.Before(endDate); date = date.Add(time.Hour * 24) { formattedDate := date.Format("2006-01-02") log.Printf("Scraping %s...", formattedDate) @@ -85,14 +82,6 @@ func ScrapeAstra(outDir string) { if numEvents >= MAX_EVENTS_PER_DAY { log.Panic("ERROR: Max events per day exceeded!") } - if numEvents < 10 { - lt10EventsCount += 1 - if lt10EventsCount > 30 { - log.Printf("There have been %d days in a row with fewer than 10 events.", lt10EventsCount) - } - } else { - lt10EventsCount = 0 - } // Add to record comma := "," @@ -101,7 +90,6 @@ func ScrapeAstra(outDir string) { firstLoop = false } days = fmt.Sprintf("%s%s\"%s\":%s", days, comma, formattedDate, stringBody) - date = date.Add(time.Hour * 24) } log.Printf("Scraped Astra up to %s!", date.Format("2006-01-02")) diff --git a/scrapers/cometCalendar.go b/scrapers/cometCalendar.go index 26e42af..3a4f613 100644 --- a/scrapers/cometCalendar.go +++ b/scrapers/cometCalendar.go @@ -1,5 +1,5 @@ /* - This file contains the code for the events scraper. + This file contains the code for the comet calendar events scraper. */ package scrapers @@ -19,9 +19,11 @@ import ( "go.mongodb.org/mongo-driver/bson/primitive" ) +const CAL_URL string = "https://calendar.utdallas.edu/api/2/events" + // RawEvent mirrors the nested event payload returned by the calendar API. type RawEvent struct { - Event map[string]interface{} `json:"event"` + Event map[string]any `json:"event"` } // APICalendarResponse models the calendar API pagination envelope. @@ -31,172 +33,201 @@ type APICalendarResponse struct { Date map[string]string `json:"date"` } -// ScrapeCometCalendar retrieves calendar events through the API and writes normalized JSON output. +// ScrapeCometCalendar retrieves calendar events through the API func ScrapeCometCalendar(outDir string) { err := os.MkdirAll(outDir, 0777) if err != nil { panic(err) } - cli := http.Client{Timeout: 15 * time.Second} + client := http.Client{Timeout: 15 * time.Second} var calendarData APICalendarResponse // Get the total number of pages log.Printf("Getting the number of pages...") - if err := scrapeAndUnmarshal(&cli, 0, &calendarData); err != nil { + + if err := callAndUnmarshal(&client, 0, &calendarData); err != nil { panic(err) } numPages := calendarData.Page["total"] log.Printf("The number of pages is %d!\n\n", numPages) - var events []schema.Event + var calendarEvents []schema.Event for page := range numPages { log.Printf("Scraping events of page %d...", page+1) - if err := scrapeAndUnmarshal(&cli, page+1, &calendarData); err != nil { + if err := callAndUnmarshal(&client, page+1, &calendarData); err != nil { panic(err) } - for _, rawEvent := range calendarData.Events { - // Parse the time - eventInstance := toMap(toMap(toSlice(rawEvent.Event["event_instances"])[0])["event_instance"]) - startTime := parseTime(toString(eventInstance["start"])) - endTime := startTime - if toString(eventInstance["end"]) != "" { - endTime = parseTime(toString(eventInstance["end"])) - } - - // Parse location - location := strings.Trim(fmt.Sprintf("%s, %s", toString(rawEvent.Event["location_name"]), toString(rawEvent.Event["room_number"])), " ,") - - // Parse the event types, event topic, and event target audience - filters := toMap(rawEvent.Event["filters"]) - eventTypes := []string{} - eventTopics := []string{} - targetAudiences := []string{} - - rawTypes := toSlice(filters["event_types"]) - for _, rawType := range rawTypes { - eventTypes = append(eventTypes, toString(toMap(rawType)["name"])) - } - - rawAudiences := toSlice(filters["event_target_audience"]) - for _, audience := range rawAudiences { - targetAudiences = append(targetAudiences, toString(toMap(audience)["name"])) - } - - rawTopics := toSlice(filters["event_topic"]) - for _, topic := range rawTopics { - eventTopics = append(eventTopics, toString(toMap(topic)["name"])) - } - - // Parse the event departments, and tags - departments := []string{} - tags := []string{} - - rawTags := toSlice(rawEvent.Event["tags"]) - for _, tag := range rawTags { - tags = append(tags, tag.(string)) - } - - rawDeparments := toSlice(rawEvent.Event["departments"]) - for _, deparment := range rawDeparments { - departments = append(departments, toMap(deparment)["name"].(string)) - } - - // Parse the contact info, =ote that some events won't have contact phone number - rawContactInfo := toMap(rawEvent.Event["custom_fields"]) - contactInfo := [3]string{} - for i, infoField := range []string{ - "contact_information_name", "contact_information_email", "contact_information_phone", - } { - contactInfo[i] = toString(rawContactInfo[infoField]) - } - - events = append(events, schema.Event{ + // Parse all necessary info + startTime, endTime := getTime(rawEvent) + eventTypes, targetAudiences, eventTopics := getFilters(rawEvent) + departments, tags := getDepartmentsAndTags(rawEvent) + contactInfo := getContactInfo(rawEvent) + + calendarEvents = append(calendarEvents, schema.Event{ Id: primitive.NewObjectID(), - Summary: toString(rawEvent.Event["title"]), - Location: location, + Summary: convert[string](rawEvent.Event["title"]), + Location: getEventLocation(rawEvent), StartTime: startTime, EndTime: endTime, - Description: toString(rawEvent.Event["description_text"]), + Description: convert[string](rawEvent.Event["description_text"]), EventType: eventTypes, TargetAudience: targetAudiences, Topic: eventTopics, EventTags: tags, - EventWebsite: toString(rawEvent.Event["url"]), + EventWebsite: convert[string](rawEvent.Event["url"]), Department: departments, ContactName: contactInfo[0], ContactEmail: contactInfo[1], ContactPhoneNumber: contactInfo[2], }) } + log.Printf("Scraped events of page %d successfully!\n", page+1) } - if err := utils.WriteJSON(fmt.Sprintf("%s/cometCalendarScraped.json", outDir), events); err != nil { + writePath := fmt.Sprintf("%s/cometCalendarScraped.json", outDir) + if err := utils.WriteJSON(writePath, calendarEvents); err != nil { panic(err) } - log.Printf("Finished scraping %d events successfully!\n\n", len(events)) + + log.Printf("Finished scraping %d events successfully!\n\n", len(calendarEvents)) } // scrapeAndUnmarshal fetches a calendar page and decodes it into data. -func scrapeAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { +func callAndUnmarshal(client *http.Client, page int, data *APICalendarResponse) error { // Call API to get the byte data - calendarUrl := fmt.Sprintf("https://calendar.utdallas.edu/api/2/events?days=365&pp=100&page=%d", page) - req, err := http.NewRequest("GET", calendarUrl, nil) + calendarUrl := fmt.Sprintf("%s?days=365&pp=100&page=%d", CAL_URL, page) + request, err := http.NewRequest("GET", calendarUrl, nil) if err != nil { return err } - res, err := client.Do(req) + request.Header = http.Header{ + "Content-type": {"application/json"}, + "Accept": {"application/json"}, + } + + response, err := client.Do(request) if err != nil { return err } - if res != nil && res.StatusCode != 200 { - return fmt.Errorf("ERROR: Non-200 status is returned, %s", res.Status) + if response != nil && response.StatusCode != 200 { + return fmt.Errorf("ERROR: Non-200 status is returned, %s", response.Status) } + defer response.Body.Close() // Unmarshal bytes to the response data buffer := bytes.Buffer{} - if _, err = buffer.ReadFrom(res.Body); err != nil { + if _, err = buffer.ReadFrom(response.Body); err != nil { return err } - res.Body.Close() if err = json.Unmarshal(buffer.Bytes(), &data); err != nil { return err } + return nil } -// toSlice attempts to convert data into a slice of interface{}. -func toSlice(data interface{}) []interface{} { - if array, ok := data.([]interface{}); ok { - return array +// getTime parses the start and end time of the event +func getTime(event RawEvent) (time.Time, time.Time) { + instance := convert[map[string]any]( + convert[map[string]any]( + convert[[]any](event.Event["event_instances"])[0])["event_instance"]) + + // Converts RFC3339 timestamp string to time.Time + startTime, err := time.Parse(time.RFC3339, convert[string](instance["start"])) + if err != nil { + panic(err) } - return nil + + var endTime time.Time + if convert[string](instance["end"]) != "" { + endTime, err = time.Parse(time.RFC3339, convert[string](instance["end"])) + if err != nil { + panic(err) + } + } else { + endTime = startTime + } + + return startTime, endTime +} + +// getEventLocation parses the location of the event +func getEventLocation(event RawEvent) string { + building := convert[string](event.Event["location_name"]) + room := convert[string](event.Event["room_number"]) + location := strings.Trim(fmt.Sprintf("%s, %s", building, room), " ,") + + return location } -// toMap attempts to convert data into a map keyed by string. -func toMap(data interface{}) map[string]interface{} { - if dataMap, ok := data.(map[string]interface{}); ok { - return dataMap +// getFilters parses the types, topics, and target audiences +func getFilters(event RawEvent) ([]string, []string, []string) { + types := []string{} + audiences := []string{} + topics := []string{} + + filters := convert[map[string]any](event.Event["filters"]) + + rawTypes := convert[[]any](filters["event_types"]) + for _, rawType := range rawTypes { + types = append(types, convert[string](convert[map[string]any](rawType)["name"])) } - return nil + + rawAudiences := convert[[]any](filters["event_target_audience"]) + for _, audience := range rawAudiences { + audiences = append(audiences, convert[string](convert[map[string]any](audience)["name"])) + } + + rawTopics := convert[[]any](filters["event_topic"]) + for _, topic := range rawTopics { + topics = append(topics, convert[string](convert[map[string]any](topic)["name"])) + } + + return types, audiences, topics } -// toString returns the string form of data or empty string when nil. -func toString(data interface{}) string { - if data != nil { - if dataString, ok := data.(string); ok { - return dataString - } +// getDepartmentsAndTags parses the departments, and tags +func getDepartmentsAndTags(event RawEvent) ([]string, []string) { + departments := []string{} + tags := []string{} + + rawTags := convert[[]any](event.Event["tags"]) + for _, tag := range rawTags { + tags = append(tags, convert[string](tag)) + } + + rawDeparments := convert[[]any](event.Event["departments"]) + for _, deparment := range rawDeparments { + departments = append(departments, convert[string](convert[map[string]any](deparment)["name"])) } - return "" + + return departments, tags } -// parseTime converts an RFC3339 timestamp string to a time.Time. -func parseTime(stringTime string) time.Time { - parsedTime, err := time.Parse(time.RFC3339, stringTime) - if err != nil { - panic(err) +// getContactInfo parses the contact info. +func getContactInfo(event RawEvent) [3]string { + // Note that some events won't have contact phone number + contactInfo := [3]string{} + + rawContactInfo := convert[map[string]any](event.Event["custom_fields"]) + for i, infoField := range []string{ + "contact_information_name", + "contact_information_email", + "contact_information_phone", + } { + contactInfo[i] = convert[string](rawContactInfo[infoField]) + } + + return contactInfo +} + +// convert() attempts to convert data into types for this scraper +func convert[T []any | map[string]any | string](data any) T { + if newTypedData, ok := data.(T); ok { + return newTypedData } - return parsedTime + var zeroValue T + return zeroValue } diff --git a/scrapers/organizations.go b/scrapers/organizations.go deleted file mode 100644 index 46aa833..0000000 --- a/scrapers/organizations.go +++ /dev/null @@ -1,302 +0,0 @@ -/* - This file contains the code for the student organization scraper. -*/ - -package scrapers - -import ( - "bufio" - "context" - "encoding/base64" - "encoding/csv" - "encoding/json" - "fmt" - "io" - "log" - "net/url" - "os" - "path/filepath" - "regexp" - "strings" - "time" - - "github.com/UTDNebula/api-tools/utils" - "github.com/UTDNebula/nebula-api/api/schema" - "github.com/chromedp/cdproto/browser" - "github.com/chromedp/cdproto/network" - "github.com/chromedp/chromedp" - "go.mongodb.org/mongo-driver/bson/primitive" -) - -const ( - socBaseUrl = `https://cometmail.sharepoint.com` - socLoginUrl = socBaseUrl + `/sites/StudentOrganizationCenterSP/Lists/Student%20Organization%20Directory/All%20Items%20gallery.aspx` - localPartCharClass = `[:alnum:]!#$%&'*+/=?^_` + "`" + `{|}~-` - subdomainPattern = `([[:alnum:]]([[:alnum:]-]*[[:alnum:]])?\.)+` - topdomainPattern = `[[:alnum:]]([[:alnum:]-]*[[:alnum:]])?` -) - -var ( - baseUrlStruct, _ = url.Parse(socBaseUrl) - localPartPattern = fmt.Sprintf(`[%[1]s]+(\.[%[1]s]+)*`, localPartCharClass) - emailRegex = regexp.MustCompile(fmt.Sprintf(`%s@%s%s`, localPartPattern, subdomainPattern, topdomainPattern)) -) - -// ScrapeOrganizations authenticates with SharePoint and exports the student organization directory CSV. -func ScrapeOrganizations(outdir string) { - log.Println("Scraping SOC ...") - ctx, cancel := utils.InitChromeDp() - defer cancel() - - if err := loginToSoc(ctx); err != nil { - panic(err) - } - if err := scrapeData(ctx, outdir); err != nil { - panic(err) - } -} - -func loginToSoc(ctx context.Context) error { - log.Println("Logging into SOC ...") - netID, err := utils.GetEnv("LOGIN_NETID") - if err != nil { - return err - } - password, err := utils.GetEnv("LOGIN_PASSWORD") - if err != nil { - return err - } - - _, err = chromedp.RunResponse(ctx, - network.ClearBrowserCookies(), - chromedp.Navigate(socLoginUrl), - chromedp.SendKeys(`input[type="email"]`, netID+"@utdallas.edu"), - chromedp.Click(`input[type="submit"]`), - chromedp.SendKeys(`input[type="password"]`, password), - // wait for sign in button to load (regular WaitVisible and WaitReady methods do not work) - chromedp.Sleep(1*time.Second), - chromedp.Click(`input[type="submit"]`), - chromedp.Sleep(2*time.Second), - chromedp.Click(`button.auth-button`), - chromedp.WaitReady(`body`), - ) - - return err -} - -func scrapeData(ctx context.Context, outdir string) error { - log.Println("Scraping data ...") - // download file method adapted from https://github.com/chromedp/examples/blob/master/download_file/main.go - timedCtx, cancel := context.WithTimeout(ctx, time.Minute) - defer cancel() - - done := make(chan string, 1) - // listen for download events - chromedp.ListenTarget(timedCtx, func(v interface{}) { - ev, ok := v.(*browser.EventDownloadProgress) - if !ok { - return - } - if ev.State == browser.DownloadProgressStateCompleted { - // stop listening for further download events and send guid - cancel() - done <- ev.GUID - close(done) - } - }) - - tempDir, _ := filepath.Abs(filepath.Join(outdir, "tmp")) - utils.VPrintf("Downloading CSV to %s ...", tempDir) - if err := os.MkdirAll(tempDir, 0755); err != nil { - return err - } - if err := chromedp.Run(ctx, - chromedp.Sleep(1*time.Second), - chromedp.Click(`button[name="Export"]`, chromedp.NodeReady), - browser.SetDownloadBehavior(browser.SetDownloadBehaviorBehaviorAllowAndName).WithDownloadPath(tempDir).WithEventsEnabled(true), - chromedp.Sleep(1*time.Second), - chromedp.Click(`button[name="Export to CSV"]`, chromedp.NodeReady), - ); err != nil { - return err - } - - // get GUID of download and reconstruct path - guid := <-done - guidPath := filepath.Join(tempDir, guid) - defer func() { - // remove temp file and directory - os.Remove(guidPath) - }() - - outPath := filepath.Join(outdir, "organizations.json") - - if err := processCsv(ctx, guidPath, outPath); err != nil { - return err - } - - return nil -} - -func processCsv(ctx context.Context, inputPath string, storageFilePath string) error { - // open csv for reading - csvFile, err := os.Open(inputPath) - if err != nil { - return err - } - - // init csv reader - bufReader := bufio.NewReader(csvFile) - // discard headers - if _, _, err := bufReader.ReadLine(); err != nil { - return err - } - csvReader := csv.NewReader(bufReader) - - // write to json - storageFile, err := os.Create(storageFilePath) - if err != nil { - return err - } - encoder := json.NewEncoder(bufio.NewWriter(storageFile)) - encoder.SetIndent("", "\t") - - var orgs []*schema.Organization - // process each row of csv - for i := 1; true; i++ { - entry, err := csvReader.Read() - if err != nil { - if err == io.EOF { - break - } - return err - } - - utils.VPrintf("Processing row %d", i) - org, err := parseCsvRecord(ctx, entry) - if err != nil { - return err - } - - orgs = append(orgs, org) - } - - // Write JSON to file - if err = encoder.Encode(orgs); err != nil { - return err - } - - if err := csvFile.Close(); err != nil { - return err - } - - if err := storageFile.Close(); err != nil { - return err - } - - return nil -} - -func parseCsvRecord(ctx context.Context, entry []string) (*schema.Organization, error) { - // initial cleaning - for i, v := range entry { - v = strings.ReplaceAll(v, "\u0026", "") - v = strings.TrimSpace(v) - entry[i] = v - } - - imageData, err := retrieveImage(ctx, entry[5]) - if err != nil { - utils.VPrintf("Error retrieving image for %s: %v", entry[0], err) - } - return &schema.Organization{ - Id: primitive.NewObjectID(), - Title: entry[0], - Categories: parseCategories(entry[1]), - Description: entry[2], - President_name: entry[3], - Emails: parseEmails(entry[4]), - Picture_data: imageData, - }, nil -} - -func parseCategories(cats string) []string { - cats = strings.TrimLeft(cats, "[") - cats = strings.TrimRight(cats, "]") - // strange character appears in csv; need to remove it - cats = strings.ReplaceAll(cats, `"`, "") - // split by comma - catsArray := strings.Split(cats, ",") - // strip whitespace from ends - for j, v := range catsArray { - catsArray[j] = strings.TrimSpace(v) - } - - return catsArray -} - -func parseEmails(emails string) []string { - return emailRegex.FindAllString(emails, -1) -} - -func retrieveImage(ctx context.Context, imageUri string) (string, error) { - if imageUri == "" { - return "", nil - } - - urlStruct, err := url.Parse(imageUri) - if err != nil { - return "", err - } - - requestUrl := baseUrlStruct.ResolveReference(urlStruct).String() - - //log.Printf("loading image %s", requestUrl) - // method adapted from https://github.com/chromedp/examples/blob/master/download_image/main.go - - ctx, cancel := context.WithTimeout(ctx, 10*time.Second) - defer cancel() - - done := make(chan bool) - - // this will be used to capture the request id for matching network events - var requestID network.RequestID - - // listen for network requests and choose desired - chromedp.ListenTarget(ctx, func(v interface{}) { - switch ev := v.(type) { - case *network.EventRequestWillBeSent: - if ev.Request.URL == requestUrl { - requestID = ev.RequestID - } - case *network.EventLoadingFinished: - if ev.RequestID == requestID { - close(done) - } - } - }) - - if err := chromedp.Run(ctx, chromedp.Navigate(requestUrl)); err != nil { - log.Printf("Error navigating to %s: %v", requestUrl, err) - return "", err - } - - // wait for image request to finish - <-done - //log.Printf("Done retrieving image from %s", requestUrl) - - var buf []byte - if err := chromedp.Run(ctx, chromedp.ActionFunc(func(ctx context.Context) error { - var err error - buf, err = network.GetResponseBody(requestID).Do(ctx) - if err != nil { - log.Printf("Error getting response body for %s: %v", requestUrl, err) - } - return err - })); err != nil { - return "", err - } - - encoded := base64.StdEncoding.EncodeToString(buf) - // get response body - return encoded, nil -} diff --git a/scrapers/profiles.go b/scrapers/profiles.go index 59ec59d..0844e2f 100644 --- a/scrapers/profiles.go +++ b/scrapers/profiles.go @@ -287,7 +287,7 @@ func ScrapeProfiles(outDir string) { Profile_uri: link, Image_uri: imageUri, Office_hours: []schema.Meeting{}, - Sections: []primitive.ObjectID{}, + Section_keys: []schema.SectionKey{}, }) utils.VPrintf("Scraped profile for %s %s!", firstName, lastName) diff --git a/uploader/testdata/uploader.go/case_basic/courses.json b/uploader/testdata/uploader.go/case_basic/courses.json new file mode 100644 index 0000000..720d623 --- /dev/null +++ b/uploader/testdata/uploader.go/case_basic/courses.json @@ -0,0 +1,25 @@ +[ + { + "_id": "67d07ee0c972c18731e23bee", + "subject_prefix": "BA", + "course_number": "1320", + "title": "Business in a Global World", + "description": "BA 1320 - Business in a Global World (3 semester credit hours) This course provides a primer on the impacts of globalization on business.", + "enrollment_reqs": "BA 1320 Repeat Restriction", + "school": "Naveen Jindal School of Management", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "015444", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": ["67d07ee0c972c18731e23bef"], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_basic/professors.json b/uploader/testdata/uploader.go/case_basic/professors.json new file mode 100644 index 0000000..79806d9 --- /dev/null +++ b/uploader/testdata/uploader.go/case_basic/professors.json @@ -0,0 +1,15 @@ +[ + { + "_id": "67d07ee0c972c18731e23bf0", + "first_name": "Peter", + "last_name": "Lewin", + "titles": ["Primary Instructor"], + "email": "plewin@utdallas.edu", + "phone_number": "", + "office": {"building": "", "room": "", "map_uri": ""}, + "profile_uri": "", + "image_uri": "", + "office_hours": null, + "sections": ["67d07ee0c972c18731e23bef"] + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_basic/sections.json b/uploader/testdata/uploader.go/case_basic/sections.json new file mode 100644 index 0000000..d1b9b43 --- /dev/null +++ b/uploader/testdata/uploader.go/case_basic/sections.json @@ -0,0 +1,32 @@ +[ + { + "_id": "67d07ee0c972c18731e23bef", + "section_number": "501", + "course_reference": "67d07ee0c972c18731e23bee", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": ["67d07ee0c972c18731e23bf0"], + "teaching_assistants": [], + "internal_class_number": "27195", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": ["Tuesday", "Thursday"], + "start_time": "5:30pm", + "end_time": "6:45pm", + "modality": "", + "location": {"building": "JSOM", "room": "12.218", "map_uri": "https://locator.utdallas.edu/SOM_12.218"} + } + ], + "core_flags": ["080", "090"], + "syllabus_uri": "https://dox.utdallas.edu/syl153033", + "grade_distribution": [], + "attributes": null + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_edge/courses.json b/uploader/testdata/uploader.go/case_edge/courses.json new file mode 100644 index 0000000..17977d8 --- /dev/null +++ b/uploader/testdata/uploader.go/case_edge/courses.json @@ -0,0 +1,25 @@ +[ + { + "_id": "67d07ee0c972c18731e23bf4", + "subject_prefix": "AERO", + "course_number": "3320", + "title": "- Recitation", + "description": "- ()", + "enrollment_reqs": "", + "school": "Undergraduate Studies", + "credit_hours": "Non-Enroll", + "class_level": "Undergraduate", + "activity_type": "Laboratory - No Lab Fee", + "grading": "Graded - Undergraduate", + "internal_course_number": "000243", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": ["67d07ee0c972c18731e23bf5"], + "lecture_contact_hours": "", + "laboratory_contact_hours": "", + "offering_frequency": "", + "catalog_year": "24", + "attributes": null + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_edge/professors.json b/uploader/testdata/uploader.go/case_edge/professors.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/uploader/testdata/uploader.go/case_edge/professors.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_edge/sections.json b/uploader/testdata/uploader.go/case_edge/sections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/uploader/testdata/uploader.go/case_edge/sections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_multiple/courses.json b/uploader/testdata/uploader.go/case_multiple/courses.json new file mode 100644 index 0000000..c91310d --- /dev/null +++ b/uploader/testdata/uploader.go/case_multiple/courses.json @@ -0,0 +1,25 @@ +[ + { + "_id": "67d07ee0c972c18731e23be9", + "subject_prefix": "ACCT", + "course_number": "2301", + "title": "Introductory Financial Accounting", + "description": "ACCT 2301 - Introductory Financial Accounting (3 semester credit hours) An introduction to financial reporting...", + "enrollment_reqs": "ACCT 2301 Repeat Restriction", + "school": "Naveen Jindal School of Management", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "grading": "Graded - Undergraduate", + "internal_course_number": "000061", + "prerequisites": null, + "corequisites": null, + "co_or_pre_requisites": null, + "sections": ["67d07ee0c972c18731e23bea", "67d07ee0c972c18731e23bed"], + "lecture_contact_hours": "3", + "laboratory_contact_hours": "0", + "offering_frequency": "S", + "catalog_year": "24", + "attributes": null + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_multiple/professors.json b/uploader/testdata/uploader.go/case_multiple/professors.json new file mode 100644 index 0000000..b6f627b --- /dev/null +++ b/uploader/testdata/uploader.go/case_multiple/professors.json @@ -0,0 +1,15 @@ +[ + { + "_id": "67d07ee0c972c18731e23beb", + "first_name": "Naim Bugra", + "last_name": "Ozel", + "titles": ["Primary Instructor (50%)"], + "email": "nbo150030@utdallas.edu", + "phone_number": "", + "office": {"building": "", "room": "", "map_uri": ""}, + "profile_uri": "", + "image_uri": "", + "office_hours": null, + "sections": ["67d07ee0c972c18731e23bea", "67d07ee0c972c18731e23bed"] + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_multiple/sections.json b/uploader/testdata/uploader.go/case_multiple/sections.json new file mode 100644 index 0000000..8bd812a --- /dev/null +++ b/uploader/testdata/uploader.go/case_multiple/sections.json @@ -0,0 +1,66 @@ +[ + { + "_id": "67d07ee0c972c18731e23bea", + "section_number": "003", + "course_reference": "67d07ee0c972c18731e23be9", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": ["67d07ee0c972c18731e23beb"], + "teaching_assistants": [ + {"first_name": "Galymzhan", "last_name": "Tazhibayev", "role": "Teaching Assistant", "email": "gxt230023@utdallas.edu"} + ], + "internal_class_number": "27706", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": ["Tuesday", "Thursday"], + "start_time": "10:00am", + "end_time": "11:15am", + "modality": "", + "location": {"building": "JSOM", "room": "2.717", "map_uri": "https://locator.utdallas.edu/SOM_2.717"} + } + ], + "core_flags": [], + "syllabus_uri": "https://dox.utdallas.edu/syl152555", + "grade_distribution": [], + "attributes": null + }, + { + "_id": "67d07ee0c972c18731e23bed", + "section_number": "001", + "course_reference": "67d07ee0c972c18731e23be9", + "section_corequisites": null, + "academic_session": { + "name": "25S", + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-16T00:00:00-05:00" + }, + "professors": ["67d07ee0c972c18731e23beb"], + "teaching_assistants": [ + {"first_name": "Galymzhan", "last_name": "Tazhibayev", "role": "Teaching Assistant", "email": "gxt230023@utdallas.edu"} + ], + "internal_class_number": "26595", + "instruction_mode": "Face-to-Face", + "meetings": [ + { + "start_date": "2025-01-21T00:00:00-06:00", + "end_date": "2025-05-09T00:00:00-05:00", + "meeting_days": ["Tuesday", "Thursday"], + "start_time": "8:30am", + "end_time": "9:45am", + "modality": "", + "location": {"building": "JSOM", "room": "2.717", "map_uri": "https://locator.utdallas.edu/SOM_2.717"} + } + ], + "core_flags": [], + "syllabus_uri": "https://dox.utdallas.edu/syl152552", + "grade_distribution": [], + "attributes": null + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_relationship/courses.json b/uploader/testdata/uploader.go/case_relationship/courses.json new file mode 100644 index 0000000..5a3cf56 --- /dev/null +++ b/uploader/testdata/uploader.go/case_relationship/courses.json @@ -0,0 +1,13 @@ +[ + { + "_id": "67d07ee0c972c18731e23be9", + "subject_prefix": "ACCT", + "course_number": "2301", + "title": "Introductory Financial Accounting", + "school": "Naveen Jindal School of Management", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "sections": ["67d07ee0c972c18731e23bea", "67d07ee0c972c18731e23bed"] + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_relationship/professors.json b/uploader/testdata/uploader.go/case_relationship/professors.json new file mode 100644 index 0000000..896f8f4 --- /dev/null +++ b/uploader/testdata/uploader.go/case_relationship/professors.json @@ -0,0 +1,18 @@ +[ + { + "_id": "67d07ee0c972c18731e23beb", + "first_name": "Naim Bugra", + "last_name": "Ozel", + "titles": ["Primary Instructor (50%)"], + "email": "nbo150030@utdallas.edu", + "sections": ["67d07ee0c972c18731e23bea", "67d07ee0c972c18731e23bed"] + }, + { + "_id": "67d07ee0c972c18731e23bec", + "first_name": "Jieying", + "last_name": "Zhang", + "titles": ["Primary Instructor (50%)"], + "email": "jxz146230@utdallas.edu", + "sections": ["67d07ee0c972c18731e23bea", "67d07ee0c972c18731e23bed"] + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_relationship/sections.json b/uploader/testdata/uploader.go/case_relationship/sections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/uploader/testdata/uploader.go/case_relationship/sections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_sorting/courses.json b/uploader/testdata/uploader.go/case_sorting/courses.json new file mode 100644 index 0000000..f36b1e0 --- /dev/null +++ b/uploader/testdata/uploader.go/case_sorting/courses.json @@ -0,0 +1,38 @@ +[ + { + "_id": "67d07ee0c972c18731e23be9", + "subject_prefix": "ACCT", + "course_number": "2301", + "title": "Introductory Financial Accounting", + "school": "Naveen Jindal School of Management", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "catalog_year": "24", + "sections": [] + }, + { + "_id": "67d07ee0c972c18731e23bee", + "subject_prefix": "BA", + "course_number": "1320", + "title": "Business in a Global World", + "school": "Naveen Jindal School of Management", + "credit_hours": "3", + "class_level": "Undergraduate", + "activity_type": "Lecture", + "catalog_year": "23", + "sections": [] + }, + { + "_id": "67d07ee0c972c18731e23bf1", + "subject_prefix": "BIOL", + "course_number": "6111", + "title": "Graduate Research Presentation", + "school": "School of Natural Sciences and Mathematics", + "credit_hours": "1", + "class_level": "Graduate", + "activity_type": "Lecture", + "catalog_year": "24", + "sections": [] + } +] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_sorting/professors.json b/uploader/testdata/uploader.go/case_sorting/professors.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/uploader/testdata/uploader.go/case_sorting/professors.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/uploader/testdata/uploader.go/case_sorting/sections.json b/uploader/testdata/uploader.go/case_sorting/sections.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/uploader/testdata/uploader.go/case_sorting/sections.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/uploader/uploader.go b/uploader/uploader.go index 1bb4015..49f9f17 100644 --- a/uploader/uploader.go +++ b/uploader/uploader.go @@ -28,10 +28,15 @@ import ( var filesToUpload [3]string = [3]string{"courses.json", "professors.json", "sections.json"} +// Wrapped for testability - can be replaced with mock in unit tests +var connectDBFunc = func() *mongo.Client { + return connectDB() +} + // Upload sends parsed JSON files to MongoDB and refreshes static aggregations. func Upload(inDir string, replace bool, staticOnly bool) { //Connect to mongo - client := connectDB() + client := connectDBFunc() // Get 5 minute context ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) diff --git a/uploader/uploader_test.go b/uploader/uploader_test.go new file mode 100644 index 0000000..c975f60 --- /dev/null +++ b/uploader/uploader_test.go @@ -0,0 +1,103 @@ +package uploader + +import ( + "path/filepath" + "testing" + + "go.mongodb.org/mongo-driver/mongo" +) + +func TestUpload(t *testing.T) { + // Save original function and restore after test + originalConnectDB := connectDBFunc + defer func() { connectDBFunc = originalConnectDB }() + + // Create a simple mock that returns nil (or a minimal mock client) + connectDBFunc = func() *mongo.Client { + return nil + } + + // Test cases + tests := []struct { + name string + inDir string + replace bool + staticOnly bool + }{ + { + name: "Case Basic: static only mode", + inDir: filepath.Join(".", "testdata", "case_basic"), + replace: false, + staticOnly: true, + }, + { + name: "Case Basic: full upload with replace", + inDir: filepath.Join(".", "testdata", "case_basic"), + replace: true, + staticOnly: false, + }, + { + name: "Case Edge: static only mode", + inDir: filepath.Join(".", "testdata", "case_edge"), + replace: false, + staticOnly: true, + }, + { + name: "Case Edge: full upload with replace", + inDir: filepath.Join(".", "testdata", "case_edge"), + replace: true, + staticOnly: false, + }, + { + name: "Case Multiple: static only mode", + inDir: filepath.Join(".", "testdata", "case_multiple"), + replace: false, + staticOnly: true, + }, + { + name: "Case Multiple: full upload with replace", + inDir: filepath.Join(".", "testdata", "case_multiple"), + replace: true, + staticOnly: false, + }, + { + name: "Case Relationship: static only mode", + inDir: filepath.Join(".", "testdata", "case_relationship"), + replace: false, + staticOnly: true, + }, + { + name: "Case Relationship: full upload with replace", + inDir: filepath.Join(".", "testdata", "case_relationship"), + replace: true, + staticOnly: false, + }, + { + name: "Case Sorting: static only mode", + inDir: filepath.Join(".", "testdata", "case_sorting"), + replace: false, + staticOnly: true, + }, + { + name: "Case Sorting: full upload with replace", + inDir: filepath.Join(".", "testdata", "case_sorting"), + replace: true, + staticOnly: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // This will panic when it tries to use the nil client, but that's fine for now + // The goal is to test that the function calls what it should call + + defer func() { + if r := recover(); r != nil { + t.Logf("Expected panic when database operations are attempted: %v", r) + } + }() + + Upload(tt.inDir, tt.replace, tt.staticOnly) + }) + } +}