Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ const LanguagesMap = new Map([
lang_subcode: null,
readable_name: 'Brahui',
native_name: 'Brahui',
lang_direction: 'ltr',
lang_direction: 'rtl',
},
],
[
Expand Down Expand Up @@ -1130,7 +1130,7 @@ const LanguagesMap = new Map([
lang_subcode: null,
readable_name: 'Northern Pashto',
native_name: 'Northern Pashto',
lang_direction: 'ltr',
lang_direction: 'rtl',
},
],
[
Expand Down Expand Up @@ -1471,7 +1471,7 @@ const LanguagesMap = new Map([
lang_subcode: null,
readable_name: 'Southern Balochi',
native_name: 'Southern Balochi',
lang_direction: 'ltr',
lang_direction: 'rtl',
},
],
[
Expand Down Expand Up @@ -1626,7 +1626,7 @@ const LanguagesMap = new Map([
lang_subcode: null,
readable_name: 'Uighur; Uyghur',
native_name: 'Uy\u01a3urq\u0259, \u0626\u06c7\u064a\u063a\u06c7\u0631\u0686\u06d5\u200e',
lang_direction: 'ltr',
lang_direction: 'rtl',
},
],
[
Expand Down Expand Up @@ -2540,6 +2540,17 @@ const LanguagesMap = new Map([
lang_direction: 'rtl',
},
],
[
'prs',
{
id: 'prs',
lang_code: 'prs',
lang_subcode: null,
readable_name: 'Dari',
native_name: '\u062f\u0631\u06cc',
lang_direction: 'rtl',
},
],
[
'arq',
{
Expand Down Expand Up @@ -2604,7 +2615,7 @@ const LanguagesMap = new Map([
readable_name: 'Kashmiri',
native_name:
'\u0915\u0936\u094d\u092e\u0940\u0930\u0940, \u0643\u0634\u0645\u064a\u0631\u064a\u200e',
lang_direction: 'ltr',
lang_direction: 'rtl',
},
],
[
Expand Down Expand Up @@ -2740,7 +2751,7 @@ const LanguagesMap = new Map([
readable_name: 'Sindhi',
native_name:
'\u0938\u093f\u0928\u094d\u0927\u0940, \u0633\u0646\u068c\u064a\u060c \u0633\u0646\u062f\u06be\u06cc\u200e',
lang_direction: 'ltr',
lang_direction: 'rtl',
},
],
[
Expand Down Expand Up @@ -2784,7 +2795,7 @@ const LanguagesMap = new Map([
lang_subcode: null,
readable_name: 'Punjabi',
native_name: '\u0a2a\u0a70\u0a1c\u0a3e\u0a2c\u0a40',
lang_direction: 'ltr',
lang_direction: 'rtl',
},
],
[
Expand Down Expand Up @@ -3390,6 +3401,7 @@ export const LanguagesNames = {
HE: 'he',
UR: 'ur',
AR: 'ar',
PRS: 'prs',
ARQ: 'arq',
FA: 'fa',
PS: 'ps',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
const MasteryModels = new Set([
'do_all',
'm_of_n',
'pre_post_test',
'num_correct_in_a_row_2',
'num_correct_in_a_row_3',
'num_correct_in_a_row_5',
Expand All @@ -15,6 +16,7 @@ export const MasteryModelsList = Array.from(MasteryModels);
export const MasteryModelsNames = {
DO_ALL: 'do_all',
M_OF_N: 'm_of_n',
PRE_POST_TEST: 'pre_post_test',
NUM_CORRECT_IN_A_ROW_2: 'num_correct_in_a_row_2',
NUM_CORRECT_IN_A_ROW_3: 'num_correct_in_a_row_3',
NUM_CORRECT_IN_A_ROW_5: 'num_correct_in_a_row_5',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@
},
methods: {
languageText(item) {
// VAutocomplete eagerly evaluates getText(internalValue) as a fallback arg to
// getValue, even when that fallback isn't needed. In multiple mode, internalValue
// is an Array, so languageText receives the array directly. Return early to avoid
// calling .split() on undefined.
if (Array.isArray(item)) {
return '';
}
const firstNativeName = item.native_name.split(',')[0].trim();
return this.$tr('languageItemText', { language: firstNativeName, code: item.id });
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,15 @@ describe('languageDropdown', () => {
const item = { native_name: '', id: 'de' };
expect(wrapper.vm.languageText(item)).toBe(' (de)');
});

it('returns empty string when called with an array (multiple mode VAutocomplete internal call)', () => {
const wrapper = shallowMount(LanguageDropdown, {
mocks: {
$tr: (key, params) => `${params.language} (${params.code})`,
},
});
// VAutocomplete eagerly evaluates getText(internalValue) as a fallback to getValue.
// In multiple mode, internalValue is an Array, so languageText receives the array.
expect(wrapper.vm.languageText(['en', 'fr'])).toBe('');
});
});
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import csv
import io
import logging
import time
import uuid
from pathlib import Path
from typing import Optional
from typing import Tuple

from django.core.management.base import BaseCommand
from django.db.models import Exists
Expand All @@ -9,20 +14,101 @@
from django.db.models import Q
from django.db.models.expressions import F
from django_cte import With
from le_utils.constants import content_kinds

from contentcuration.models import Channel
from contentcuration.models import ContentNode
from contentcuration.models import License


logger = logging.getLogger(__name__)


class LicensingFixesLookup(object):
"""Consolidates logic for reading and processing the licensing fixes from the CSV"""

def __init__(self):
self._lookup = {}
self._license_lookup = {}

def load(self, fp: io.TextIOWrapper):
"""Loads the data from the CSV file, and the necessary license data from the database"""
reader = csv.DictReader(fp)
license_names = set()

# create a lookup index by channel ID from the CSV data
for row in reader:
lookup_key = f"{uuid.UUID(row['channel_id']).hex}:{row.get('kind', '')}"
self._lookup[lookup_key] = row
if row["license_name"]:
license_names.add(row["license_name"])

# load all licenses, regardless of whether they are named in the CSV
license_lookup_by_name = {}
for lic in License.objects.all():
self._license_lookup[lic.id] = lic
license_lookup_by_name[lic.license_name] = lic
license_names.discard(lic.license_name)

# ensure we've found all the licenses
if len(license_names):
raise ValueError(f"Could not find all licenses: {license_names}")

# we now are certain all licenses are found
for info in self._lookup.values():
if info["license_name"]:
info["license_id"] = license_lookup_by_name[info["license_name"]].id

def get_info(
self,
channel_id: str,
kind: str,
license_id: Optional[int],
license_description: Optional[str],
copyright_holder: Optional[str],
) -> Tuple[Optional[int], Optional[str], Optional[str]]:
"""
Determines the complete licensing metadata, given the current metadata, and comparing it
with what would make the node complete.

:param channel_id: The channel the node was sourced from
:param kind: The content kind of the node
:param license_id: The current license_id of the node
:param license_description: The current license_description of the node
:param copyright_holder: The current copyright_holder of the node
:return: A tuple of (license_id, license_description, copyright_holder) to use on the node
"""
# first check kind-specific metadata, fallback to channel-wide (no kind)
info = self._lookup.get(f"{channel_id}:{kind}", None)
if info is None:
info = self._lookup.get(f"{channel_id}:", None)

if info is None:
logger.warning(f"Failed to find licensing info for channel: {channel_id}")
return license_id, license_description, copyright_holder

if not license_id:
license_id = info["license_id"]

if not license_id:
return None, license_description, copyright_holder

license_obj = self._license_lookup.get(license_id)

if license_obj.is_custom and not license_description:
license_description = info["license_description"]

if license_obj.copyright_holder_required and not copyright_holder:
copyright_holder = info["copyright_holder"]

return license_id, license_description, copyright_holder


class Command(BaseCommand):
"""
Audits nodes that have imported content from public channels and whether the imported content
has a missing source node.

TODO: this does not yet FIX them
has a missing source node. We've determined that pretty much all of these have incomplete
licensing data
"""

def handle(self, *args, **options):
Expand Down Expand Up @@ -71,32 +157,27 @@ def handle(self, *args, **options):

logger.info("=== Iterating over private destination channels. ===")
channel_count = 0
total_node_count = 0

with open("fix_missing_import_sources.csv", "w", newline="") as csv_file:
csv_writer = csv.DictWriter(
csv_file,
fieldnames=[
"channel_id",
"channel_name",
"contentnode_id",
"contentnode_title",
"public_channel_id",
"public_channel_name",
"public_channel_deleted",
],
)
csv_writer.writeheader()
total_fixed = 0
lookup = LicensingFixesLookup()

command_dir = Path(__file__).parent
csv_path = command_dir / "licensing_fixes_lookup.csv"

with csv_path.open("r", encoding="utf-8", newline="") as csv_file:
lookup.load(csv_file)

for channel in destination_channels.iterator():
node_count = self.handle_channel(csv_writer, channel)
# skip using an iterator here, to limit transaction duration to `handle_channel`
for channel in destination_channels:
node_count = self.handle_channel(lookup, channel)

if node_count > 0:
total_node_count += node_count
channel_count += 1
if node_count > 0:
total_fixed += node_count
channel_count += 1

logger.info("=== Done iterating over private destination channels. ===")
logger.info(f"Found {total_node_count} nodes across {channel_count} channels.")
logger.info(
f"Fixed incomplete licensing data on {total_fixed} nodes across {channel_count} channels."
)
logger.info(f"Finished in {time.time() - start}")

def get_public_cte(self) -> With:
Expand All @@ -110,7 +191,15 @@ def get_public_cte(self) -> With:
name="public_cte",
)

def handle_channel(self, csv_writer: csv.DictWriter, channel: dict) -> int:
def handle_channel(self, lookup: LicensingFixesLookup, channel: dict) -> int:
"""
Goes through the nodes of the channel, that were imported from public channels, but no
longer have a valid source node. For each node, it applies license metadata as necessary

:param lookup: The lookup utility to pull licensing data from
:param channel: The channel to fix
:return: The total node count that are now marked complete as a result of the fixes
"""
public_cte = self.get_public_cte()
channel_id = channel["id"]
channel_name = channel["name"]
Expand All @@ -127,6 +216,7 @@ def handle_channel(self, csv_writer: csv.DictWriter, channel: dict) -> int:
public_channel_name=public_cte.col.name,
public_channel_deleted=public_cte.col.deleted,
)
.exclude(kind=content_kinds.TOPIC)
.filter(
Q(public_channel_deleted=True)
| ~Exists(
Expand All @@ -136,29 +226,51 @@ def handle_channel(self, csv_writer: csv.DictWriter, channel: dict) -> int:
)
)
)
.values(
"public_channel_id",
"public_channel_name",
"public_channel_deleted",
contentnode_id=F("id"),
contentnode_title=F("title"),
)
)

# Count and log results
node_count = missing_source_nodes.count()
processed = 0
was_complete = 0
unfixed = 0
now_complete = 0

# TODO: this will be replaced with logic to correct the missing source nodes
if node_count > 0:
def _log():
logger.info(
f"{channel_id}:{channel_name}\t{node_count} node(s) with missing source nodes."
f"Fixing {channel_id}:{channel_name}\ttotal: {node_count}; before: {was_complete} unfixed: {unfixed}; after: {now_complete};"
)
row_dict = {
"channel_id": channel_id,
"channel_name": channel_name,
}
for node_dict in missing_source_nodes.iterator():
row_dict.update(node_dict)
csv_writer.writerow(row_dict)

return node_count

if node_count > 0:
for node in missing_source_nodes.iterator():
# determine the new license metadata
license_id, license_description, copyright_holder = lookup.get_info(
node.original_channel_id,
node.kind_id,
node.license_id,
node.license_description,
node.copyright_holder,
)

# if there isn't a license, there's nothing to do
if not license_id:
unfixed += 1
# cannot fix
continue

if node.complete:
was_complete += 1

# apply updates
node.license_id = license_id
node.license_description = license_description
node.copyright_holder = copyright_holder
if not node.mark_complete():
now_complete += 1
node.save()
processed += 1
if processed % 100 == 0:
_log()

_log()

return now_complete - was_complete
Loading
Loading