releasenotes: add similarity check function to compare incoming notes
authorRishabh Madan <rishabhmadan96@gmail.com>
Sat, 05 Aug 2017 05:25:36 +0530
changeset 33698 3748098d072a
parent 33697 4d1e79945c2e
child 33699 50c44dee741a
releasenotes: add similarity check function to compare incoming notes It is possible that the incoming note fragments have some similar content as the existing release notes. In case of a bug fix, we match for issueNNNN in the existing notes. For other general cases, it makes use of fuzzywuzzy library to get a similarity score. If the score is above a certain threshold, we ignore the fragment, otherwise add it. But the score might be misleading for small commit messages. So, it uses similarity function only if the length of string (in words) is above a certain value. The patch adds tests related to its usage. But it needs improvement in the sense of combining incoming notes. We can use interactive mode for adding notes. Maybe we can do this if similarity is under a certain range.
hgext/releasenotes.py
tests/test-releasenotes-formatting.t
tests/test-releasenotes-merging.t
tests/test-releasenotes-parsing.t
--- a/hgext/releasenotes.py	Sat Jul 29 14:06:26 2017 +0530
+++ b/hgext/releasenotes.py	Sat Aug 05 05:25:36 2017 +0530
@@ -46,6 +46,7 @@
 ]
 
 RE_DIRECTIVE = re.compile('^\.\. ([a-zA-Z0-9_]+)::\s*([^$]+)?$')
+RE_ISSUE = r'\bissue ?[0-9]{4,6}(?![0-9])\b'
 
 BULLET_SECTION = _('Other Changes')
 
@@ -92,6 +93,8 @@
         This is used to combine multiple sources of release notes together.
         """
         for section in other:
+            existingnotes = converttitled(self.titledforsection(section)) + \
+                convertnontitled(self.nontitledforsection(section))
             for title, paragraphs in other.titledforsection(section):
                 if self.hastitledinsection(section, title):
                     # TODO prompt for resolution if different and running in
@@ -100,16 +103,32 @@
                              (title, section))
                     continue
 
-                # TODO perform similarity comparison and try to match against
-                # existing.
+                incoming_str = converttitled([(title, paragraphs)])[0]
+                if section == 'fix':
+                    issue = getissuenum(incoming_str)
+                    if issue:
+                        if findissue(ui, existingnotes, issue):
+                            continue
+
+                if similar(ui, existingnotes, incoming_str):
+                    continue
+
                 self.addtitleditem(section, title, paragraphs)
 
             for paragraphs in other.nontitledforsection(section):
                 if paragraphs in self.nontitledforsection(section):
                     continue
 
-                # TODO perform similarily comparison and try to match against
-                # existing.
+                incoming_str = convertnontitled([paragraphs])[0]
+                if section == 'fix':
+                    issue = getissuenum(incoming_str)
+                    if issue:
+                        if findissue(ui, existingnotes, issue):
+                            continue
+
+                if similar(ui, existingnotes, incoming_str):
+                    continue
+
                 self.addnontitleditem(section, paragraphs)
 
 class releasenotessections(object):
@@ -136,6 +155,77 @@
 
         return None
 
+def converttitled(titledparagraphs):
+    """
+    Convert titled paragraphs to strings
+    """
+    string_list = []
+    for title, paragraphs in titledparagraphs:
+        lines = []
+        for para in paragraphs:
+            lines.extend(para)
+        string_list.append(' '.join(lines))
+    return string_list
+
+def convertnontitled(nontitledparagraphs):
+    """
+    Convert non-titled bullets to strings
+    """
+    string_list = []
+    for paragraphs in nontitledparagraphs:
+        lines = []
+        for para in paragraphs:
+            lines.extend(para)
+        string_list.append(' '.join(lines))
+    return string_list
+
+def getissuenum(incoming_str):
+    """
+    Returns issue number from the incoming string if it exists
+    """
+    issue = re.search(RE_ISSUE, incoming_str, re.IGNORECASE)
+    if issue:
+        issue = issue.group()
+    return issue
+
+def findissue(ui, existing, issue):
+    """
+    Returns true if issue number already exists in notes.
+    """
+    if any(issue in s for s in existing):
+        ui.write(_('"%s" already exists in notes; ignoring\n') % issue)
+        return True
+    else:
+        return False
+
+def similar(ui, existing, incoming_str):
+    """
+    Returns true if similar note found in existing notes.
+    """
+    if len(incoming_str.split()) > 10:
+        merge = similaritycheck(incoming_str, existing)
+        if not merge:
+            ui.write(_('"%s" already exists in notes file; ignoring\n')
+                     % incoming_str)
+            return True
+        else:
+            return False
+    else:
+        return False
+
+def similaritycheck(incoming_str, existingnotes):
+    """
+    Returns true when note fragment can be merged to existing notes.
+    """
+    import fuzzywuzzy.fuzz as fuzz
+    merge = True
+    for bullet in existingnotes:
+        score = fuzz.token_set_ratio(incoming_str, bullet)
+        if score > 75:
+            merge = False
+            break
+    return merge
+
 def getcustomadmonitions(repo):
     ctx = repo['.']
     p = config.config()
--- a/tests/test-releasenotes-formatting.t	Sat Jul 29 14:06:26 2017 +0530
+++ b/tests/test-releasenotes-formatting.t	Sat Aug 05 05:25:36 2017 +0530
@@ -1,3 +1,5 @@
+#require fuzzywuzzy
+
   $ cat >> $HGRCPATH << EOF
   > [extensions]
   > releasenotes=
--- a/tests/test-releasenotes-merging.t	Sat Jul 29 14:06:26 2017 +0530
+++ b/tests/test-releasenotes-merging.t	Sat Aug 05 05:25:36 2017 +0530
@@ -1,3 +1,5 @@
+#require fuzzywuzzy
+
   $ cat >> $HGRCPATH << EOF
   > [extensions]
   > releasenotes=
@@ -158,3 +160,122 @@
   
   * this is fix3.
 
+  $ cd ..
+
+Ignores commit messages containing issueNNNN based on issue number.
+
+  $ hg init simple-fuzzrepo
+  $ cd simple-fuzzrepo
+  $ touch fix1
+  $ hg -q commit -A -l - << EOF
+  > commit 1
+  > 
+  > .. fix::
+  > 
+  >    Resolved issue4567.
+  > EOF
+
+  $ cat >> $TESTTMP/issue-number-notes << EOF
+  > Bug Fixes
+  > =========
+  > 
+  > * Fixed issue1234 related to XYZ.
+  > 
+  > * Fixed issue4567 related to ABC.
+  > 
+  > * Fixed issue3986 related to PQR.
+  > EOF
+
+  $ hg releasenotes -r . $TESTTMP/issue-number-notes
+  "issue4567" already exists in notes; ignoring
+
+  $ cat $TESTTMP/issue-number-notes
+  Bug Fixes
+  =========
+  
+  * Fixed issue1234 related to XYZ.
+  
+  * Fixed issue4567 related to ABC.
+  
+  * Fixed issue3986 related to PQR.
+
+  $ cd ..
+
+Adds short commit messages (words < 10) without
+comparison unless there is an exact match.
+
+  $ hg init tempdir
+  $ cd tempdir
+  $ touch feature1
+  $ hg -q commit -A -l - << EOF
+  > commit 1
+  > 
+  > .. feature::
+  > 
+  >    Adds a new feature 1.
+  > EOF
+
+  $ hg releasenotes -r . $TESTTMP/short-sentence-notes
+
+  $ touch feature2
+  $ hg -q commit -A -l - << EOF
+  > commit 2
+  > 
+  > .. feature::
+  > 
+  >    Adds a new feature 2.
+  > EOF
+
+  $ hg releasenotes -r . $TESTTMP/short-sentence-notes
+  $ cat $TESTTMP/short-sentence-notes
+  New Features
+  ============
+  
+  * Adds a new feature 1.
+  
+  * Adds a new feature 2.
+
+  $ cd ..
+
+Ignores commit messages based on fuzzy comparison.
+
+  $ hg init fuzznotes
+  $ cd fuzznotes
+  $ touch fix1
+  $ hg -q commit -A -l - << EOF
+  > commit 1
+  > 
+  > .. fix::
+  > 
+  >    This is a fix with another line.
+  >    And it is a big one.
+  > EOF
+
+  $ cat >> $TESTTMP/fuzz-ignore-notes << EOF
+  > Bug Fixes
+  > =========
+  > 
+  > * Fixed issue4567 by improving X.
+  > 
+  > * This is the first line. This is next line with one newline.
+  > 
+  >   This is another line written after two newlines. This is going to be a big one.
+  > 
+  > * This fixes another problem.
+  > EOF
+
+  $ hg releasenotes -r . $TESTTMP/fuzz-ignore-notes
+  "This is a fix with another line. And it is a big one." already exists in notes file; ignoring
+
+  $ cat $TESTTMP/fuzz-ignore-notes
+  Bug Fixes
+  =========
+  
+  * Fixed issue4567 by improving X.
+  
+  * This is the first line. This is next line with one newline.
+  
+    This is another line written after two newlines. This is going to be a big
+    one.
+  
+  * This fixes another problem.
--- a/tests/test-releasenotes-parsing.t	Sat Jul 29 14:06:26 2017 +0530
+++ b/tests/test-releasenotes-parsing.t	Sat Aug 05 05:25:36 2017 +0530
@@ -1,3 +1,5 @@
+#require fuzzywuzzy
+
   $ cat >> $HGRCPATH << EOF
   > [extensions]
   > releasenotes=