From df982fac17d0c6bc1fa7bde44bb6554157e7ebc7 Mon Sep 17 00:00:00 2001 From: Gisle Aune Date: Thu, 20 Aug 2020 20:58:16 +0200 Subject: [PATCH] add IrcCloud log importer. --- graph2/resolvers/log.go | 7 +- graph2/schema/types/Log.gql | 8 ++ models/log-importer.go | 4 +- services/logs.go | 40 +++++++- services/parsers/irccloud.go | 161 ++++++++++++++++++++++++++++++ services/parsers/irccloud_test.go | 154 ++++++++++++++++++++++++++++ services/parsers/mirclike.go | 4 +- services/parsers/mirclike_test.go | 4 + 8 files changed, 377 insertions(+), 5 deletions(-) create mode 100644 services/parsers/irccloud.go create mode 100644 services/parsers/irccloud_test.go diff --git a/graph2/resolvers/log.go b/graph2/resolvers/log.go index 7c996a2..8ece5e2 100644 --- a/graph2/resolvers/log.go +++ b/graph2/resolvers/log.go @@ -55,7 +55,12 @@ func (r *mutationResolver) ImportLog(ctx context.Context, input graphcore.LogImp tz = parsedTZ } - return r.s.Logs.Import(ctx, input.Importer, date, tz, input.ChannelName, input.Data) + sessionThreshold := time.Hour * 12 + if input.SessionThresholdMs != nil { + sessionThreshold = time.Duration(*input.SessionThresholdMs) * time.Millisecond + } + + return r.s.Logs.Import(ctx, input.Importer, date, tz, input.ChannelName, sessionThreshold, input.Data) } func (r *mutationResolver) SplitLog(ctx context.Context, input graphcore.LogSplitInput) (*models.Log, error) { diff --git a/graph2/schema/types/Log.gql b/graph2/schema/types/Log.gql index 5aa125b..52184a0 100644 --- a/graph2/schema/types/Log.gql +++ b/graph2/schema/types/Log.gql @@ -132,6 +132,9 @@ input LogImportInput { "The date of the log, if not provided in the log body." date: Time + "Session threwhold" + sessionThresholdMs: Int + "The log body itself." data: String! } @@ -171,4 +174,9 @@ enum LogImporter { Forum log: This importer parses the format on the forum. The displayed log, not the post source. """ ForumLog + + """ + IRCCloud: This importer parses irccloud exports. + """ + IrcCloud } \ No newline at end of file diff --git a/models/log-importer.go b/models/log-importer.go index 199407d..f1348af 100644 --- a/models/log-importer.go +++ b/models/log-importer.go @@ -14,12 +14,14 @@ const ( LogImporterMircLike LogImporter = "MircLike" // LogImporterForumLog is a value of LogImporter LogImporterForumLog LogImporter = "ForumLog" + // LogImporterForumLog is a value of LogImporter + LogImporterIrcCloud LogImporter = "IrcCloud" ) // IsValid returns true if the underlying string is one of the correct values. func (e LogImporter) IsValid() bool { switch e { - case LogImporterForumLog, LogImporterMircLike: + case LogImporterForumLog, LogImporterMircLike, LogImporterIrcCloud: return true } return false diff --git a/services/logs.go b/services/logs.go index 157f1fb..d9e234b 100644 --- a/services/logs.go +++ b/services/logs.go @@ -112,7 +112,7 @@ func (s *LogService) Create(ctx context.Context, title, description, channelName } // Import creates new logs from common formats. -func (s *LogService) Import(ctx context.Context, importer models.LogImporter, date time.Time, tz *time.Location, channelName string, data string) ([]*models.Log, error) { +func (s *LogService) Import(ctx context.Context, importer models.LogImporter, date time.Time, tz *time.Location, channelName string, sessionThreshold time.Duration, data string) ([]*models.Log, error) { if err := s.authService.CheckPermission(ctx, "add", &models.Log{}); err != nil { return nil, err } @@ -174,13 +174,48 @@ func (s *LogService) Import(ctx context.Context, importer models.LogImporter, da } for _, parsed := range parseResults { + parsed.Log.EventName = eventName + parsed.Log.ChannelName = channelName + log, err := s.logs.Insert(ctx, parsed.Log) if err != nil { return nil, err } + + for _, post := range parsed.Posts { + post.LogID = log.ShortID + } + + posts, err := s.posts.InsertMany(ctx, parsed.Posts...) + if err != nil { + _ = s.logs.Delete(ctx, *log) + + return nil, err + } + + s.changeService.Submit(ctx, models.ChangeModelLog, "add", true, changekeys.Listed(log), log) + s.changeService.Submit(ctx, models.ChangeModelPost, "add", true, changekeys.Listed(log, posts), log, posts) + + results = append(results, log) + } + + } + case models.LogImporterIrcCloud: + { + parseResults, err := parsers.IRCCloudLogs(data, tz, sessionThreshold) + if err != nil { + return nil, err + } + + for _, parsed := range parseResults { parsed.Log.EventName = eventName parsed.Log.ChannelName = channelName + log, err := s.logs.Insert(ctx, parsed.Log) + if err != nil { + return nil, err + } + for _, post := range parsed.Posts { post.LogID = log.ShortID } @@ -194,8 +229,11 @@ func (s *LogService) Import(ctx context.Context, importer models.LogImporter, da s.changeService.Submit(ctx, models.ChangeModelLog, "add", true, changekeys.Listed(log), log) s.changeService.Submit(ctx, models.ChangeModelPost, "add", true, changekeys.Listed(log, posts), log, posts) + + results = append(results, log) } } + default: { return nil, errors.New("Invalid importer: " + importer.String()) diff --git a/services/parsers/irccloud.go b/services/parsers/irccloud.go new file mode 100644 index 0000000..21d343a --- /dev/null +++ b/services/parsers/irccloud.go @@ -0,0 +1,161 @@ +package parsers + +import ( + "errors" + "fmt" + "git.aiterp.net/rpdata/api/models" + "strings" + "time" +) + +var ErrSkip = errors.New("parsers: skip this post") + +func IRCCloudLogs(data string, location *time.Location, threshold time.Duration) ([]ParsedLog, error) { + lines := strings.Split(data, "\n") + pos := 0 + results := make([]ParsedLog, 0, 8) + for pos < len(lines) { + log, n, err := IRCCloudLog(lines[pos:], location, threshold) + if err != nil { + if err == ErrEmptyLog { + pos += n + continue + } + + return nil, err + } + + pos += n + results = append(results, *log) + } + + return results, nil +} + +// IRCCloudLog parses the log and returns the things that can be gleamed from them. +func IRCCloudLog(lines []string, location *time.Location, threshold time.Duration) (*ParsedLog, int, error) { + posts := make([]*models.Post, 0, len(lines)) + prev := (*models.Post)(nil) + amount := 0 + + for _, line := range lines { + line = strings.Trim(line, "\r\t  ") + if len(line) < 1 { + amount += 1 + continue + } + + post, err := IRCCloudPost(line, location) + if err == ErrSkip { + amount += 1 + continue + } else if err != nil { + return nil, -1, err + } + + if prev != nil { + if post.Time.Sub(prev.Time) >= threshold { + break + } + + post.Position = prev.Position + 1 + } else { + post.Position = 1 + } + + posts = append(posts, &post) + prev = &post + amount += 1 + } + + if len(posts) == 0 { + return nil, amount, ErrEmptyLog + } + + log := models.Log{ + Date: posts[0].Time, + } + + return &ParsedLog{log, posts}, amount, nil +} + +// IRCCloudPost parses a post from a mirc-like line. If the previous post is included (it can be empty), it will be used +// to determine whether midnight has passed. +func IRCCloudPost(line string, tz *time.Location) (models.Post, error) { + // Do basic validation + line = strings.Trim(line, "  \t\n\r") + if len(line) == 0 || !strings.HasPrefix(line, "[") { + return models.Post{}, &ParseError{ + Line: line, + Problem: "no timestamp", + } + } + + // Parse timestamp + tsEndIndex := strings.IndexByte(line, ']') + if tsEndIndex == -1 || len(line) < tsEndIndex+5 { + return models.Post{}, &ParseError{ + Line: line, + Problem: "incomplete timestamp", + } + } + tsStr := line[1:tsEndIndex] + + ts, err := time.ParseInLocation("2006-01-02 15:04:05", tsStr, tz) + if err != nil { + return models.Post{}, &ParseError{ + Line: line, + Problem: fmt.Sprintf("Could not parse date: %s", err.Error()), + } + } + + if strings.HasPrefix(line[tsEndIndex+2:], "—") { + split := strings.SplitN(line[tsEndIndex+6:], " ", 2) + if len(split) == 1 { + return models.Post{}, &ParseError{ + Line: line, + Problem: "post is empty", + } + } + + post := models.Post{ + ID: "UNASSIGNED", + LogID: "UNASSIGNED", + Time: ts, + Kind: "action", + Nick: strings.Trim(strings.TrimLeft(split[0], "+@!~"), "\u001F"), + Text: split[1], + } + + if strings.HasPrefix(post.Nick, "=") { + post.Kind = "scene" + } + + return post, nil + } else if line[tsEndIndex+2] == '<' { + split := strings.SplitN(line[tsEndIndex+2:], " ", 2) + if len(split) == 1 { + return models.Post{}, &ParseError{ + Line: line, + Problem: "post is empty", + } + } + + post := models.Post{ + ID: "UNASSIGNED", + LogID: "UNASSIGNED", + Time: ts, + Kind: "text", + Nick: strings.Trim(strings.TrimLeft(split[0][1:len(split[0])-1], "+@!~"), "\u001F"), + Text: split[1], + } + + if strings.HasPrefix(post.Nick, "=") { + post.Kind = "scene" + } + + return post, nil + } else { + return models.Post{}, ErrSkip + } +} diff --git a/services/parsers/irccloud_test.go b/services/parsers/irccloud_test.go new file mode 100644 index 0000000..3b64933 --- /dev/null +++ b/services/parsers/irccloud_test.go @@ -0,0 +1,154 @@ +package parsers + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "testing" + "time" +) + +func TestIRCCloudPost(t *testing.T) { + table := []struct { + Input string + TS string + Kind string + Nick string + Text string + Skipped bool + }{ + { + "[2016-07-18 12:57:17] — \u001FCharacter_Name\u001F does things.", + "2016-07-18 12:57:17", "action", "Character_Name", "does things.", + false, + }, + { + "[2016-07-18 13:04:06] — Character keeps their mouth shut again for the time beings.", + "2016-07-18 13:04:06", "action", "Character", "keeps their mouth shut again for the time beings.", + false, + }, + { + "[2016-07-18 12:34:42] \"I can keep it professional.\"", + "2016-07-18 12:34:42", "text", "Victoria_Steels", "\"I can keep it professional.\"", + false, + }, + { + "[2016-07-18 10:41:10] * UserZzz → User", + "", "", "", "", + true, + }, + { + "[2016-07-18 12:53:55] → Someone joined", + "", "", "", "", + true, + }, + { + "[2016-07-18 22:14:15] ⇐ SomeoneElse quit (s@example.com): Ping timeout: 547 seconds", + "", "", "", "", + true, + }, + } + + for i, row := range table { + t.Run(fmt.Sprintf("Row_%d", i), func(t *testing.T) { + post, err := IRCCloudPost(row.Input, time.UTC) + if err != nil && err != ErrSkip { + t.Fatal("Could not parse post:", err) + } + + if row.Skipped { + if err == ErrSkip { + return + } + + t.Fatal("Row should be skipped") + } + if !row.Skipped && err == ErrSkip { + t.Fatal("Row should not be skipped, but is") + } + + assert.Equal(t, row.TS, post.Time.Format("2006-01-02 15:04:05"), "Timestamps should match.") + assert.Equal(t, row.Kind, post.Kind, "Kinds should match.") + assert.Equal(t, row.Nick, post.Nick, "Kinds should match.") + assert.Equal(t, row.Text, post.Text, "Kinds should match.") + }) + } +} + +func TestIRCCloudLog(t *testing.T) { + type logLine struct { + TS string + Kind string + Nick string + Text string + } + + logs := [][]logLine{ + { + {"2011-09-23 15:37:16", "action", "Jason_Wolfe", "sheds a bit of the tension he was harboring as Markus extends his hand."}, + {"2011-09-23 15:41:15", "action", "Markus_Vasquez", "gives another nod, but there's no smile from him."}, + {"2011-09-23 15:47:22", "action", "Jason_Wolfe", "fights the temptation to shove his hand back in his pocket as he notices Markus."}, + {"2011-09-23 15:47:22", "action", "Jason_Wolfe", "warmer to wear."}, + {"2011-09-23 15:47:32", "text", "Dante", "((you're*))"}, + {"2011-09-23 15:48:45", "action", "Markus_Vasquez", "grunts as they walk toward the door, shaking his head slightly."}, + }, + { + {"2011-09-25 21:03:55", "scene", "=Scene=", "It's early evening on the 11th of November, and the weather is clear."}, + {"2011-09-25 21:03:55", "scene", "=Scene=", "café on the eastern edge of town."}, + {"2011-09-25 21:08:33", "action", "Sofia_Tennhausen", "is sat on a small table outside the cafe, a cup of lukewarm coffee in her hand."}, + }, + } + + results, err := IRCCloudLogs(testLog, time.UTC, time.Hour*6) + if err != nil { + t.Fatal("Parse", err) + } + + if len(results) != len(logs) { + t.Fatal("Two logs expected, got", len(logs)) + } + + for i, log := range logs { + result := results[i] + + if len(result.Posts) != len(log) { + t.Error(len(log), "posts expected in log", i, "but got", len(result.Posts)) + for _, post := range result.Posts { + t.Log(*post) + } + + return + } + + for j, post := range result.Posts { + t.Run(fmt.Sprintf("Log_%d_Post_%d", i, j), func(t *testing.T) { + assert.Equal(t, log[j].TS, post.Time.Format("2006-01-02 15:04:05"), "Timestamps should match.") + assert.Equal(t, log[j].Kind, post.Kind, "Kinds should match.") + assert.Equal(t, log[j].Nick, post.Nick, "Kinds should match.") + assert.Equal(t, log[j].Text, post.Text, "Kinds should match.") + }) + } + + } +} + +var testLog = ` +[2011-09-23 15:37:16] — Jason_Wolfe sheds a bit of the tension he was harboring as Markus extends his hand. +[2011-09-23 15:41:15] — Markus_Vasquez gives another nod, but there's no smile from him. + + +[2011-09-23 15:47:22] — Jason_Wolfe fights the temptation to shove his hand back in his pocket as he notices Markus. +[2011-09-23 15:47:22] — Jason_Wolfe warmer to wear. +[2011-09-23 15:47:32] ((you're*)) +[2011-09-23 15:48:45] — Markus_Vasquez grunts as they walk toward the door, shaking his head slightly. +[2011-09-23 18:00:28] ⇐ Tyranniac quit (Tyranniac@example.com): Ping timeout: 260 seconds +[2011-09-23 18:58:39] → Tyranniac joined (Tyranniac@example.com) +[2011-09-23 18:58:39] * ChanServ set +v Tyranniac +[2011-09-24 01:45:21] → Hobo joined (Hobo@example.com) +[2011-09-24 11:37:07] ⇐ Hobo quit (Hobo@example.com): +[2011-09-24 21:01:34] ⇐ Tyranniac quit (Tyranniac@example.com): Ping timeout: 264 seconds +[2011-09-24 23:00:32] → Tyranniac joined (Tyranniac@example.com) +[2011-09-25 20:54:43] * Bowe → Sofia_Tennhausen +[2011-09-25 21:03:55] <=Scene=> It's early evening on the 11th of November, and the weather is clear. +[2011-09-25 21:03:55] <=Scene=> café on the eastern edge of town. +[2011-09-25 21:08:33] — Sofia_Tennhausen is sat on a small table outside the cafe, a cup of lukewarm coffee in her hand. +` diff --git a/services/parsers/mirclike.go b/services/parsers/mirclike.go index f96a293..7c034e2 100644 --- a/services/parsers/mirclike.go +++ b/services/parsers/mirclike.go @@ -129,7 +129,7 @@ func MircPost(line string, date time.Time, prev models.Post) (models.Post, error LogID: "UNASSIGNED", Time: ts, Kind: "action", - Nick: strings.TrimLeft(split[0], "+@!~"), + Nick: strings.Trim(strings.TrimLeft(split[0], "+@!~\u001F"), "\u001F"), Text: split[1], Position: prev.Position + 1, } @@ -153,7 +153,7 @@ func MircPost(line string, date time.Time, prev models.Post) (models.Post, error LogID: "UNASSIGNED", Time: ts, Kind: "text", - Nick: strings.TrimLeft(split[0][1:len(split[0])-1], "+@!~"), + Nick: strings.Trim(strings.TrimLeft(split[0][1:len(split[0])-1], "+@!~"), "\u001F"), Text: split[1], Position: prev.Position + 1, } diff --git a/services/parsers/mirclike_test.go b/services/parsers/mirclike_test.go index 26608a0..83169c1 100644 --- a/services/parsers/mirclike_test.go +++ b/services/parsers/mirclike_test.go @@ -30,6 +30,10 @@ func TestMircPost(t *testing.T) { "[13:36:59] Things said.", "13:36:59", "text", "Stuff", "Things said.", }, + { + "[13:36:59] <\u001FStuff\u001F> Things said.", + "13:36:59", "text", "Stuff", "Things said.", + }, { "[23:59] <=Scene=> Scenery and such.", "23:59:00", "scene", "=Scene=", "Scenery and such.",